Import stdarch history as a Josh subtree

This commit is contained in:
Jakub Beránek 2025-06-23 17:22:53 +02:00
commit e433101882
No known key found for this signature in database
GPG key ID: 909CD0D26483516B
306 changed files with 729730 additions and 0 deletions

View file

@ -0,0 +1,16 @@
task:
name: x86_64-unknown-freebsd
freebsd_instance:
image_family: freebsd-13-4
env:
# FIXME(freebsd): FreeBSD has a segfault when `RUST_BACKTRACE` is set
# https://github.com/rust-lang/rust/issues/132185
RUST_BACKTRACE: "0"
setup_script:
- curl https://sh.rustup.rs -sSf --output rustup.sh
- sh rustup.sh --default-toolchain nightly -y
- . $HOME/.cargo/env
- rustup default nightly
test_script:
- . $HOME/.cargo/env
- cargo build --all

View file

@ -0,0 +1,4 @@
# Use `git config blame.ignorerevsfile .git-blame-ignore-revs` to make `git blame` ignore the following commits.
# format with style edition 2024
fc87bd98d689590a0b6f5ee4110c5b9f962faa66

View file

@ -0,0 +1,288 @@
name: CI
on:
pull_request:
merge_group:
jobs:
style:
name: Check Style
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Rust
run: rustup update nightly --no-self-update && rustup default nightly
- run: ci/style.sh
docs:
name: Build Documentation
needs: [style]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Rust
run: rustup update nightly --no-self-update && rustup default nightly
- run: ci/dox.sh
env:
CI: 1
verify:
name: Automatic intrinsic verification
needs: [style]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Rust
run: rustup update nightly --no-self-update && rustup default nightly
- run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
test:
needs: [style]
name: Test
runs-on: ${{ matrix.target.os }}
strategy:
matrix:
profile:
- dev
- release
target:
# Dockers that are run through docker on linux
- tuple: i686-unknown-linux-gnu
os: ubuntu-latest
- tuple: x86_64-unknown-linux-gnu
os: ubuntu-latest
- tuple: arm-unknown-linux-gnueabihf
os: ubuntu-latest
- tuple: armv7-unknown-linux-gnueabihf
os: ubuntu-latest
- tuple: aarch64-unknown-linux-gnu
os: ubuntu-latest
- tuple: aarch64_be-unknown-linux-gnu
os: ubuntu-latest
- tuple: riscv32gc-unknown-linux-gnu
os: ubuntu-latest
- tuple: riscv64gc-unknown-linux-gnu
os: ubuntu-latest
- tuple: powerpc-unknown-linux-gnu
os: ubuntu-latest
- tuple: powerpc64-unknown-linux-gnu
os: ubuntu-latest
- tuple: powerpc64le-unknown-linux-gnu
os: ubuntu-latest
# MIPS targets disabled since they are dropped to tier 3.
# See https://github.com/rust-lang/compiler-team/issues/648
#- tuple: mips-unknown-linux-gnu
# os: ubuntu-latest
#- tuple: mips64-unknown-linux-gnuabi64
# os: ubuntu-latest
#- tuple: mips64el-unknown-linux-gnuabi64
# os: ubuntu-latest
#- tuple: mipsel-unknown-linux-musl
# os: ubuntu-latest
- tuple: s390x-unknown-linux-gnu
os: ubuntu-latest
- tuple: i586-unknown-linux-gnu
os: ubuntu-latest
- tuple: nvptx64-nvidia-cuda
os: ubuntu-latest
- tuple: thumbv6m-none-eabi
os: ubuntu-latest
- tuple: thumbv7m-none-eabi
os: ubuntu-latest
- tuple: thumbv7em-none-eabi
os: ubuntu-latest
- tuple: thumbv7em-none-eabihf
os: ubuntu-latest
- tuple: loongarch64-unknown-linux-gnu
os: ubuntu-latest
- tuple: wasm32-wasip1
os: ubuntu-latest
# macOS targets
- tuple: x86_64-apple-darwin
os: macos-15-large
- tuple: x86_64-apple-ios-macabi
os: macos-15-large
- tuple: aarch64-apple-darwin
os: macos-15
- tuple: aarch64-apple-ios-macabi
os: macos-15
# FIXME: gh-actions build environment doesn't have linker support
# - tuple: i686-apple-darwin
# os: macos-13
# Windows targets
- tuple: x86_64-pc-windows-msvc
os: windows-2025
- tuple: i686-pc-windows-msvc
os: windows-2025
- tuple: aarch64-pc-windows-msvc
os: windows-11-arm
- tuple: x86_64-pc-windows-gnu
os: windows-2025
# - tuple: i686-pc-windows-gnu
# os: windows-latest
# Add additional variables to the matrix variations generated above using `include`:
include:
# `TEST_EVERYTHING` setups - there should be at least 1 for each architecture
- target:
tuple: aarch64-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
- target:
tuple: aarch64_be-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
build_std: true
- target:
tuple: armv7-unknown-linux-gnueabihf
os: ubuntu-latest
test_everything: true
- target:
tuple: loongarch64-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
- target:
tuple: powerpc-unknown-linux-gnu
os: ubuntu-latest
disable_assert_instr: true
test_everything: true
- target:
tuple: powerpc64-unknown-linux-gnu
os: ubuntu-latest
disable_assert_instr: true
test_everything: true
- target:
tuple: powerpc64le-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
- target:
tuple: riscv32gc-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
build_std: true
- target:
tuple: riscv64gc-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
- target:
tuple: s390x-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
- target:
tuple: x86_64-unknown-linux-gnu
os: ubuntu-latest
test_everything: true
# MIPS targets disabled since they are dropped to tier 3.
# See https://github.com/rust-lang/compiler-team/issues/648
#- target:
# tuple: mips-unknown-linux-gnu
# os: ubuntu-latest
# norun: true
#- target:
# tuple: mips64-unknown-linux-gnuabi64
# os: ubuntu-latest
# norun: true
#- target:
# tuple: mips64el-unknown-linux-gnuabi64
# os: ubuntu-latest
# norun: true
#- target:
# tuple: mipsel-unknown-linux-musl
# os: ubuntu-latest
# norun: true
- target:
tuple: aarch64-apple-darwin
os: macos-15
norun: true # https://github.com/rust-lang/stdarch/issues/1206
- target:
tuple: aarch64-apple-ios-macabi
os: macos-15
norun: true # https://github.com/rust-lang/stdarch/issues/1206
steps:
- uses: actions/checkout@v4
- name: Install Rust
run: |
rustup update nightly --no-self-update
rustup default nightly
shell: bash
if: matrix.target.os != 'windows-11-arm'
- name: Install Rust for `windows-11-arm` runners
# The arm runners don't have Rust pre-installed (https://github.com/actions/partner-runner-images/issues/77)
run: |
curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
shell: bash
if: matrix.target.os == 'windows-11-arm'
- run: rustup target add ${{ matrix.target.tuple }}
shell: bash
if: matrix.build_std == ''
- run: |
rustup component add rust-src
echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV
shell: bash
if: matrix.build_std != ''
# Configure some env vars based on matrix configuration
- run: echo "PROFILE=--profile=${{matrix.profile}}" >> $GITHUB_ENV
shell: bash
- run: echo "NORUN=1" >> $GITHUB_ENV
shell: bash
if: matrix.norun != '' || startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda'
- run: echo "STDARCH_TEST_EVERYTHING=1" >> $GITHUB_ENV
shell: bash
if: matrix.test_everything != ''
- run: echo "STDARCH_DISABLE_ASSERT_INSTR=1" >> $GITHUB_ENV
shell: bash
if: matrix.disable_assert_instr != ''
- run: echo "NOSTD=1" >> $GITHUB_ENV
shell: bash
if: startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda'
# Windows & OSX go straight to `run.sh` ...
- run: ./ci/run.sh
shell: bash
if: matrix.target.os != 'ubuntu-latest' || startsWith(matrix.target.tuple, 'thumb')
env:
TARGET: ${{ matrix.target.tuple }}
# ... while Linux goes to `run-docker.sh`
- run: ./ci/run-docker.sh ${{ matrix.target.tuple }}
shell: bash
if: matrix.target.os == 'ubuntu-latest' && !startsWith(matrix.target.tuple, 'thumb')
env:
TARGET: ${{ matrix.target.tuple }}
build-std-detect:
needs: [style]
name: Build std_detect
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Rust
run: rustup update nightly && rustup default nightly
- run: ./ci/build-std-detect.sh
conclusion:
needs:
- docs
- verify
- test
- build-std-detect
runs-on: ubuntu-latest
# We need to ensure this job does *not* get skipped if its dependencies fail,
# because a skipped job is considered a success by GitHub. So we have to
# overwrite `if:`. We use `!cancelled()` to ensure the job does still not get run
# when the workflow is canceled manually.
#
# ALL THE PREVIOUS JOBS NEED TO BE ADDED TO THE `needs` SECTION OF THIS JOB!
if: ${{ !cancelled() }} # make sure this is never "skipped"
steps:
- name: Conclusion
run: |
# Print the dependent jobs to see them in the CI log
jq -C <<< '${{ toJson(needs) }}'
# Check if all jobs that we depend on (in the needs array) were successful.
jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}'

9
library/stdarch/.gitignore vendored Normal file
View file

@ -0,0 +1,9 @@
.*.swp
target
tags
crates/stdarch-gen-arm/aarch64.rs
crates/stdarch-gen-arm/arm.rs
crates/stdarch-gen-loongarch/lasx.c
crates/stdarch-gen-loongarch/lsx.c
c_programs/*
rust_programs/*

0
library/stdarch/.gitmodules vendored Normal file
View file

View file

@ -0,0 +1,93 @@
# Contributing to stdarch
The `stdarch` crate is more than willing to accept contributions! First you'll
probably want to check out the repository and make sure that tests pass for you:
```
$ git clone https://github.com/rust-lang/stdarch
$ cd stdarch
$ TARGET="<your-target-arch>" ci/run.sh
```
Where `<your-target-arch>` is the target triple as used by `rustup`, e.g. `x86_64-unknown-linux-gnu` (without any preceding `nightly-` or similar).
Also remember that this repository requires the nightly channel of Rust!
The above tests do in fact require nightly rust to be the default on your system, to set that use `rustup default nightly` (and `rustup default stable` to revert).
If any of the above steps don't work, [please let us know][new]!
Next up you can [find an issue][issues] to help out on, we've selected a few
with the [`help wanted`][help] tag which could
particularly use some help. You may be most interested in [#40][vendor],
implementing all vendor intrinsics on x86. That issue's got some good pointers
about where to get started!
If you've got general questions feel free to [join us on gitter][gitter] and ask
around! Feel free to ping either @BurntSushi or @alexcrichton with questions.
[gitter]: https://gitter.im/rust-impl-period/WG-libs-simd
# How to write examples for stdarch intrinsics
There are a few features that must be enabled for the given intrinsic to work
properly and the example must only be run by `cargo test --doc` when the feature
is supported by the CPU. As a result, the default `fn main` that is generated by
`rustdoc` will not work (in most cases). Consider using the following as a guide
to ensure your example works as expected.
```rust
/// # // We need cfg_target_feature to ensure the example is only
/// # // run by `cargo test --doc` when the CPU supports the feature
/// # #![feature(cfg_target_feature)]
/// # // We need target_feature for the intrinsic to work
/// # #![feature(target_feature)]
/// #
/// # // rustdoc by default uses `extern crate stdarch`, but we need the
/// # // `#[macro_use]`
/// # #[macro_use] extern crate stdarch;
/// #
/// # // The real main function
/// # fn main() {
/// # // Only run this if `<target feature>` is supported
/// # if cfg_feature_enabled!("<target feature>") {
/// # // Create a `worker` function that will only be run if the target feature
/// # // is supported and ensure that `target_feature` is enabled for your worker
/// # // function
/// # #[target_feature(enable = "<target feature>")]
/// # unsafe fn worker() {
///
/// // Write your example here. Feature specific intrinsics will work here! Go wild!
///
/// # }
/// # unsafe { worker(); }
/// # }
/// # }
```
If some of the above syntax does not look familiar, the [Documentation as tests] section
of the [Rust Book] describes the `rustdoc` syntax quite well. As always, feel free
to [join us on gitter][gitter] and ask us if you hit any snags, and thank you for helping
to improve the documentation of `stdarch`!
# Alternative Testing Instructions
It is generally recommended that you use `ci/run-docker.sh` to run the tests.
However this might not work for you, e.g. if you are on Windows.
In that case you can fall back to running `cargo +nightly test` and `cargo +nightly test --release -p core_arch` for testing the code generation.
Note that these require the nightly toolchain to be installed and for `rustc` to know about your target triple and its CPU.
In particular you need to set the `TARGET` environment variable as you would for `ci/run.sh`.
In addition you need to set `RUSTCFLAGS` (need the `C`) to indicate target features, e.g. `RUSTCFLAGS="-C -target-features=+avx2"`.
You can also set `-C -target-cpu=native` if you're "just" developing against your current CPU.
Be warned that when you use these alternative instructions, [things may go less smoothly than they would with `ci/run-docker.sh`][ci-run-good], e.g. instruction generation tests may fail because the disassembler named them differently, e.g. it may generate `vaesenc` instead of `aesenc` instructions despite them behaving the same.
Also these instructions execute less tests than would normally be done, so don't be surprised that when you eventually pull-request some errors may show up for tests not covered here.
[new]: https://github.com/rust-lang/stdarch/issues/new
[issues]: https://github.com/rust-lang/stdarch/issues
[help]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22
[impl]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3Aimpl-period
[vendor]: https://github.com/rust-lang/stdarch/issues/40
[Documentation as tests]: https://doc.rust-lang.org/book/first-edition/documentation.html#documentation-as-tests
[Rust Book]: https://doc.rust-lang.org/book/first-edition
[ci-run-good]: https://github.com/rust-lang/stdarch/issues/931#issuecomment-711412126

965
library/stdarch/Cargo.lock Normal file
View file

@ -0,0 +1,965 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "0.6.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
[[package]]
name = "anstyle-parse"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "assert-instr-macro"
version = "0.1.0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.102",
]
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "cc"
version = "1.2.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "956a5e21988b87f372569b66183b78babf23ebc2e744b733e4350a752c4dafac"
dependencies = [
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
[[package]]
name = "clap"
version = "4.5.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim 0.11.1",
]
[[package]]
name = "clap_derive"
version = "4.5.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.102",
]
[[package]]
name = "clap_lex"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
[[package]]
name = "colorchoice"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "core_arch"
version = "0.1.5"
dependencies = [
"std_detect",
"stdarch-test",
"syscalls",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "csv"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d"
dependencies = [
"memchr",
]
[[package]]
name = "darling"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim 0.10.0",
"syn 1.0.109",
]
[[package]]
name = "darling_macro"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835"
dependencies = [
"darling_core",
"quote",
"syn 1.0.109",
]
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "env_logger"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
dependencies = [
"log",
"regex",
]
[[package]]
name = "env_logger"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580"
dependencies = [
"humantime",
"is-terminal",
"log",
"regex",
"termcolor",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "getrandom"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hermit-abi"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]]
name = "humantime"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
]
[[package]]
name = "indexmap"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
dependencies = [
"equivalent",
"hashbrown 0.15.4",
]
[[package]]
name = "intrinsic-test"
version = "0.1.0"
dependencies = [
"clap",
"csv",
"diff",
"itertools",
"lazy_static",
"log",
"pretty_env_logger",
"rayon",
"regex",
"serde",
"serde_json",
]
[[package]]
name = "is-terminal"
version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
dependencies = [
"hermit-abi",
"libc",
"windows-sys",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itertools"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.172"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
[[package]]
name = "linked-hash-map"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]]
name = "log"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "memchr"
version = "2.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "once_cell_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "pretty_env_logger"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c"
dependencies = [
"env_logger 0.10.2",
"log",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quick-xml"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ca7dd09b5f4a9029c35e323b086d0a68acdc673317b9c4d002c6f1d4a7278c6"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "quickcheck"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
dependencies = [
"env_logger 0.8.4",
"log",
"rand",
]
[[package]]
name = "quote"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "regex"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rustc-demangle"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
[[package]]
name = "rustc-std-workspace-alloc"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d441c3b2ebf55cebf796bfdc265d67fa09db17b7bb6bd4be75c509e1e8fec3"
[[package]]
name = "rustc-std-workspace-core"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa9c45b374136f52f2d6311062c7146bff20fec063c3f5d46a410bd937746955"
[[package]]
name = "ryu"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "semver"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
[[package]]
name = "serde"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.102",
]
[[package]]
name = "serde_json"
version = "1.0.140"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
]
[[package]]
name = "serde_with"
version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff"
dependencies = [
"serde",
"serde_with_macros",
]
[[package]]
name = "serde_with_macros"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "serde_yaml"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b"
dependencies = [
"indexmap 1.9.3",
"ryu",
"serde",
"yaml-rust",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "simd-test-macro"
version = "0.1.0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.102",
]
[[package]]
name = "std_detect"
version = "0.1.5"
dependencies = [
"cfg-if",
"libc",
"rustc-std-workspace-alloc",
"rustc-std-workspace-core",
]
[[package]]
name = "stdarch-gen-arm"
version = "0.1.0"
dependencies = [
"itertools",
"lazy_static",
"proc-macro2",
"quote",
"regex",
"serde",
"serde_with",
"serde_yaml",
"walkdir",
]
[[package]]
name = "stdarch-gen-loongarch"
version = "0.1.0"
dependencies = [
"rand",
]
[[package]]
name = "stdarch-test"
version = "0.1.0"
dependencies = [
"assert-instr-macro",
"cc",
"cfg-if",
"lazy_static",
"rustc-demangle",
"simd-test-macro",
"wasmprinter",
]
[[package]]
name = "stdarch-verify"
version = "0.1.0"
dependencies = [
"proc-macro2",
"quick-xml",
"quote",
"serde",
"serde_json",
"syn 2.0.102",
]
[[package]]
name = "stdarch_examples"
version = "0.0.0"
dependencies = [
"core_arch",
"quickcheck",
"rand",
"std_detect",
]
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syscalls"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43d0e35dc7d73976a53c7e6d7d177ef804a0c0ee774ec77bcc520c2216fd7cbe"
[[package]]
name = "termcolor"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
dependencies = [
"winapi-util",
]
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasmparser"
version = "0.113.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "286049849b5a5bd09a8773171be96824afabffc7cc3df6caaf33a38db6cd07ae"
dependencies = [
"indexmap 2.9.0",
"semver",
]
[[package]]
name = "wasmprinter"
version = "0.2.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6615a5587149e753bf4b93f90fa3c3f41c88597a7a2da72879afcabeda9648f"
dependencies = [
"anyhow",
"wasmparser",
]
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
]
[[package]]
name = "zerocopy"
version = "0.8.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.102",
]

View file

@ -0,0 +1,19 @@
[workspace]
resolver = "1"
members = [
"crates/*",
"examples",
]
exclude = [
"crates/wasm-assert-instr-tests"
]
[profile.release]
debug = true
opt-level = 3
incremental = true
[profile.bench]
debug = 1
opt-level = 3
incremental = true

View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -0,0 +1,25 @@
Copyright (c) 2017 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

18
library/stdarch/README.md Normal file
View file

@ -0,0 +1,18 @@
stdarch - Rust's standard library SIMD components
=======
[![Actions Status](https://github.com/rust-lang/stdarch/workflows/CI/badge.svg)](https://github.com/rust-lang/stdarch/actions)
# Crates
This repository contains two main crates:
* [`core_arch`](crates/core_arch/README.md) implements `core::arch` - Rust's
core library architecture-specific intrinsics, and
* [`std_detect`](crates/std_detect/README.md) implements `std::detect` - Rust's
standard library run-time CPU feature detection.
The `std::simd` component now lives in the
[`packed_simd_2`](https://github.com/rust-lang/packed_simd) crate.

View file

@ -0,0 +1,46 @@
#!/usr/bin/env bash
# Build std_detect on non-Linux & non-x86 targets.
#
# In std_detect, non-x86 targets have OS-specific implementations,
# but we can test only Linux in CI. This script builds targets supported
# by std_detect but cannot be tested in CI.
set -ex
cd "$(dirname "$0")"/..
targets=(
# Linux
aarch64-unknown-linux-musl
armv5te-unknown-linux-musleabi
aarch64-unknown-linux-ohos
armv7-unknown-linux-ohos
# Android
aarch64-linux-android
arm-linux-androideabi
# FreeBSD
aarch64-unknown-freebsd
armv6-unknown-freebsd
powerpc-unknown-freebsd
powerpc64-unknown-freebsd
# OpenBSD
aarch64-unknown-openbsd
# Windows
aarch64-pc-windows-msvc
)
rustup component add rust-src # for -Z build-std
cd crates/std_detect
for target in "${targets[@]}"; do
if rustup target add "${target}" &>/dev/null; then
cargo build --target "${target}"
else
# tier 3 targets requires -Z build-std.
cargo build -Z build-std="core,alloc" --target "${target}"
fi
done

View file

@ -0,0 +1,19 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
ca-certificates \
libc6-dev \
gcc-aarch64-linux-gnu \
g++-aarch64-linux-gnu \
libc6-dev-arm64-cross \
qemu-user \
make \
file \
clang-19 \
lld
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
OBJDUMP=aarch64-linux-gnu-objdump \
STDARCH_TEST_SKIP_FEATURE=tme

View file

@ -0,0 +1,30 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
ca-certificates \
libc6-dev \
libc6-dev-arm64-cross \
qemu-user \
make \
file \
clang-19 \
curl \
xz-utils \
lld
ENV TOOLCHAIN="arm-gnu-toolchain-14.2.rel1-x86_64-aarch64_be-none-linux-gnu"
# Download the aarch64_be gcc toolchain
RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.2.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
RUN tar -xvf "${TOOLCHAIN}.tar.xz"
RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc"
ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}"
ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump"
ENV STDARCH_TEST_SKIP_FEATURE=tme

View file

@ -0,0 +1,13 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
gcc-arm-linux-gnueabihf \
libc6-dev-armhf-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
OBJDUMP=arm-linux-gnueabihf-objdump

View file

@ -0,0 +1,17 @@
FROM ubuntu:24.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
ca-certificates \
libc6-dev \
gcc-arm-linux-gnueabihf \
g++-arm-linux-gnueabihf \
libc6-dev-armhf-cross \
qemu-user \
make \
file \
clang-19 \
lld
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
OBJDUMP=arm-linux-gnueabihf-objdump

View file

@ -0,0 +1,7 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \
file \
make \
ca-certificates

View file

@ -0,0 +1,7 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \
file \
make \
ca-certificates

View file

@ -0,0 +1,12 @@
FROM ubuntu:25.04
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user-static ca-certificates \
gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \
CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-loongarch64-static -cpu max -L /usr/loongarch64-linux-gnu" \
OBJDUMP=loongarch64-linux-gnu-objdump \
STDARCH_TEST_SKIP_FEATURE=frecipe

View file

@ -0,0 +1,13 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-mips-linux-gnu libc6-dev-mips-cross \
qemu-system-mips \
qemu-user \
make \
file
ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
OBJDUMP=mips-linux-gnu-objdump

View file

@ -0,0 +1,10 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
qemu-system-mips64 qemu-user
ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
OBJDUMP=mips64-linux-gnuabi64-objdump

View file

@ -0,0 +1,10 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
qemu-system-mips64el
ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
OBJDUMP=mips64el-linux-gnuabi64-objdump

View file

@ -0,0 +1,25 @@
FROM ubuntu:25.04
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
gcc \
libc6-dev \
make \
qemu-user \
qemu-system-mips \
bzip2 \
curl \
file
RUN mkdir /toolchain
# Note that this originally came from:
# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
tar xjf - -C /toolchain --strip-components=2
ENV PATH=$PATH:/rust/bin:/toolchain/bin \
CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"

View file

@ -0,0 +1,5 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
ca-certificates

View file

@ -0,0 +1,12 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
qemu-system-ppc make file
ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu mpc8610 -L /usr/powerpc-linux-gnu" \
CC=powerpc-linux-gnu-gcc \
OBJDUMP=powerpc-linux-gnu-objdump \
STDARCH_TEST_SKIP_FEATURE=vsx

View file

@ -0,0 +1,14 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \
file make
ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -cpu power11 -L /usr/powerpc64-linux-gnu" \
CC=powerpc64-linux-gnu-gcc \
OBJDUMP=powerpc64-linux-gnu-objdump \
STDARCH_TEST_SKIP_FEATURE=vsx \
# These 2 tests have erratic behaviour with qemu, see https://gitlab.com/qemu-project/qemu/-/issues/1623#note_2449012173
STDARCH_TEST_SKIP_FUNCTION=vec_lde_u16,vec_lde_u32

View file

@ -0,0 +1,12 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
file make
# Work around qemu triggering a sigill on vec_subs if the cpu target is not defined.
ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -cpu power11 -L /usr/powerpc64le-linux-gnu" \
CC=powerpc64le-linux-gnu-gcc \
OBJDUMP=powerpc64le-linux-gnu-objdump

View file

@ -0,0 +1,15 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
wget xz-utils make file llvm
ENV VERSION=2025.01.20
RUN wget "https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${VERSION}/riscv32-glibc-ubuntu-24.04-gcc-nightly-${VERSION}-nightly.tar.xz" \
-O riscv-toolchain.tar.xz
RUN tar -xJf riscv-toolchain.tar.xz
ENV CARGO_TARGET_RISCV32GC_UNKNOWN_LINUX_GNU_LINKER=/riscv/bin/riscv32-unknown-linux-gnu-gcc \
CARGO_TARGET_RISCV32GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv32 -cpu max -L /riscv/sysroot" \
OBJDUMP=llvm-objdump

View file

@ -0,0 +1,10 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \
llvm
ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc \
CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv64 -cpu max -L /usr/riscv64-linux-gnu" \
OBJDUMP=llvm-objdump

View file

@ -0,0 +1,14 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
curl ca-certificates \
gcc libc6-dev \
gcc-s390x-linux-gnu libc6-dev-s390x-cross \
qemu-user \
make \
clang \
file
ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -cpu max -L /usr/s390x-linux-gnu" \
OBJDUMP=s390x-linux-gnu-objdump

View file

@ -0,0 +1,13 @@
FROM ubuntu:25.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update -y && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
xz-utils \
clang
RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasmtime-v18.0.2-x86_64-linux.tar.xz | tar xJf -
ENV PATH=$PATH:/wasmtime-v18.0.2-x86_64-linux
ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime --dir /checkout/target/wasm32-wasip1/release/deps::."

View file

@ -0,0 +1,18 @@
FROM ubuntu:25.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
file \
make \
ca-certificates \
wget \
xz-utils
RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.53.0-2025-03-16-lin.tar.xz -O sde.tar.xz
RUN mkdir intel-sde
RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
-cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \
-rtm-mode full -tsx --"
# These tests fail with SDE as it doesn't support saving register data
ENV STDARCH_TEST_SKIP_FUNCTION="xsave,xsaveopt,xsave64,xsaveopt64"

View file

@ -0,0 +1,71 @@
# Copyright (C) 2024-2024 Intel Corporation.
#
# This software and the related documents are Intel copyrighted materials, and your
# use of them is governed by the express license under which they were provided to
# you ("License"). Unless the License provides otherwise, you may not use, modify,
# copy, publish, distribute, disclose or transmit this software or the related
# documents without Intel's prior written permission.
#
# This software and the related documents are provided as is, with no express or
# implied warranties, other than those that are expressly stated in the License.
#
# The CPUID information in this file is for software enabling purposes only and
# it is not a full and accurate representation of the CPU under development which
# it represents.
# The CPUID information in this file is not a guarantee of the availability of
# features or characteristics in the final released CPU.
#
# CPUID_VERSION = 1.0
# Input => Output
# EAX ECX => EAX EBX ECX EDX
00000000 ******** => 00000024 68747541 444d4163 69746e65
00000001 ******** => 000d06f0 00100800 7ffaf3ff bfebfbff
00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000
00000003 ******** => 00000000 00000000 00000000 00000000
00000004 00000000 => 7c004121 02c0003f 0000003f 00000000 #Deterministic Cache
00000004 00000001 => 7c004122 01c0003f 0000003f 00000000
00000004 00000002 => 7c004143 03c0003f 000007ff 00000000
00000004 00000003 => 7c0fc163 04c0003f 0005ffff 00000004
00000004 00000004 => 00000000 00000000 00000000 00000000
00000005 ******** => 00000040 00000040 00000003 00042120 #MONITOR/MWAIT
00000006 ******** => 00000077 00000002 00000001 00000000 #Thermal and Power
00000007 00000000 => 00000001 f3bfbfbf bbc05ffe 03d55130 #Extended Features
00000007 00000001 => 88ee00bf 00000002 00000000 1d29cd3e
00000008 ******** => 00000000 00000000 00000000 00000000
00000009 ******** => 00000000 00000000 00000000 00000000 #Direct Cache
0000000a ******** => 07300403 00000000 00000000 00000603
0000000b 00000000 => 00000001 00000002 00000100 0000001e #Extended Topology
0000000b 00000001 => 00000004 00000002 00000201 0000001e
0000000c ******** => 00000000 00000000 00000000 00000000
0000000d 00000000 => 000e02e7 00002b00 00002b00 00000000 #xcr0
0000000d 00000001 => 0000001f 00000240 00000100 00000000
0000000d 00000002 => 00000100 00000240 00000000 00000000
0000000d 00000005 => 00000040 00000440 00000000 00000000 #zmasks
0000000d 00000006 => 00000200 00000480 00000000 00000000 #zmmh
0000000d 00000007 => 00000400 00000680 00000000 00000000 #zmm
0000000d 00000011 => 00000040 00000ac0 00000002 00000000 #tileconfig
0000000d 00000012 => 00002000 00000b00 00000006 00000000 #tiles
0000000d 00000013 => 00000080 000003c0 00000000 00000000 #APX
00000014 00000000 => 00000000 00000010 00000000 00000000 #ptwrite
00000019 ******** => 00000000 00000005 00000000 00000000 #Key Locker
0000001d 00000000 => 00000001 00000000 00000000 00000000 #AMX Tile
0000001d 00000001 => 04002000 00080040 00000010 00000000 #AMX Palette1
0000001e 00000000 => 00000001 00004010 00000000 00000000 #AMX Tmul
0000001e 00000001 => 000001ff 00000000 00000000 00000000
0000001f 00000000 => 00000001 00000002 00000100 0000001e
0000001f 00000001 => 00000007 00000070 00000201 0000001e
0000001f 00000002 => 00000000 00000000 00000002 0000001e
00000024 00000000 => 00000000 00070002 00000000 00000000 #AVX10
80000000 ******** => 80000008 00000000 00000000 00000000
80000001 ******** => 00000000 00000000 00200961 2c100000
80000002 ******** => 00000000 00000000 00000000 00000000
80000003 ******** => 00000000 00000000 00000000 00000000
80000004 ******** => 00000000 00000000 00000000 00000000
80000005 ******** => 00000000 00000000 00000000 00000000
80000006 ******** => 00000000 00000000 01006040 00000000
80000007 ******** => 00000000 00000000 00000000 00000100
80000008 ******** => 00003028 00000200 00000200 00000000
# This file was copied from intel-sde/misc/cpuid/dmr/cpuid.def, and modified to
# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM`,
# `AVX512_VP2INTERSECT` and the VEX variants of AVX512 was added in the CPUID.

41
library/stdarch/ci/dox.sh Executable file
View file

@ -0,0 +1,41 @@
#!/usr/bin/env bash
# Builds documentation for all target triples that we have a registered URL for
# in liblibc. This scrapes the list of triples to document from `src/lib.rs`
# which has a bunch of `html_root_url` directives we pick up.
set -ex
export RUSTDOCFLAGS="-D warnings"
dox() {
if [ "$CI" != "" ]; then
rustup target add "${1}" || true
fi
cargo clean --target "${1}"
cargo build --verbose --target "${1}" --manifest-path crates/core_arch/Cargo.toml
cargo build --verbose --target "${1}" --manifest-path crates/std_detect/Cargo.toml
cargo doc --verbose --target "${1}" --manifest-path crates/core_arch/Cargo.toml
cargo doc --verbose --target "${1}" --manifest-path crates/std_detect/Cargo.toml
}
if [ -z "$1" ]; then
dox i686-unknown-linux-gnu
dox x86_64-unknown-linux-gnu
dox armv7-unknown-linux-gnueabihf
dox aarch64-unknown-linux-gnu
dox powerpc-unknown-linux-gnu
dox powerpc64le-unknown-linux-gnu
dox loongarch64-unknown-linux-gnu
# MIPS targets disabled since they are dropped to tier 3.
# See https://github.com/rust-lang/compiler-team/issues/648
#dox mips-unknown-linux-gnu
#dox mips64-unknown-linux-gnuabi64
dox wasm32-unknown-unknown
dox nvptx64-nvidia-cuda
else
dox "${1}"
fi

View file

@ -0,0 +1,60 @@
#!/usr/bin/env sh
# Small script to run tests for a target (or all targets) inside all the
# respective docker images.
set -ex
if [ $# -lt 1 ]; then
>&2 echo "Usage: $0 <TARGET>"
exit 1
fi
run() {
# Set the linker that is used for the host (e.g. when compiling a build.rs)
# This overrides any configuration in e.g. `.cargo/config.toml`, which will
# probably not work within the docker container.
HOST_LINKER="CARGO_TARGET_$(rustc --print host-tuple | tr '[:lower:]-' '[:upper:]_')_LINKER"
# Prevent `Read-only file system (os error 30)`.
cargo generate-lockfile
echo "Building docker container for TARGET=${1}"
docker build -t stdarch -f "ci/docker/${1}/Dockerfile" ci/
mkdir -p target c_programs rust_programs
echo "Running docker"
# shellcheck disable=SC2016
docker run \
--rm \
--user "$(id -u)":"$(id -g)" \
--env CARGO_HOME=/cargo \
--env CARGO_TARGET_DIR=/checkout/target \
--env TARGET="${1}" \
--env "${HOST_LINKER}"="cc" \
--env STDARCH_TEST_EVERYTHING \
--env STDARCH_DISABLE_ASSERT_INSTR \
--env NOSTD \
--env NORUN \
--env RUSTFLAGS \
--env CARGO_UNSTABLE_BUILD_STD \
--env RUST_STD_DETECT_UNSTABLE \
--volume "${HOME}/.cargo":/cargo \
--volume "$(rustc --print sysroot)":/rust:ro \
--volume "$(pwd)":/checkout:ro \
--volume "$(pwd)"/target:/checkout/target \
--volume "$(pwd)"/c_programs:/checkout/c_programs \
--volume "$(pwd)"/rust_programs:/checkout/rust_programs \
--init \
--workdir /checkout \
--privileged \
stdarch \
sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh ${1}"
}
if [ -z "$1" ]; then
for d in ci/docker/*; do
run "${d}"
done
else
run "${1}"
fi

203
library/stdarch/ci/run.sh Executable file
View file

@ -0,0 +1,203 @@
#!/usr/bin/env sh
set -ex
: "${TARGET?The TARGET environment variable must be set.}"
# Tests are all super fast anyway, and they fault often enough on travis that
# having only one thread increases debuggability to be worth it.
#export RUST_BACKTRACE=full
#export RUST_TEST_NOCAPTURE=1
#export RUST_TEST_THREADS=1
export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir"
export HOST_RUSTFLAGS="${RUSTFLAGS}"
export PROFILE="${PROFILE:="--profile=release"}"
case ${TARGET} in
# On Windows the linker performs identical COMDAT folding (ICF) by default
# in release mode which removes identical COMDAT sections. This interferes
# with our instruction assertions just like LLVM's MergeFunctions pass so
# we disable it.
*-pc-windows-msvc)
export RUSTFLAGS="${RUSTFLAGS} -Clink-args=/OPT:NOICF"
;;
# On 32-bit use a static relocation model which avoids some extra
# instructions when dealing with static data, notably allowing some
# instruction assertion checks to pass below the 20 instruction limit. If
# this is the default, dynamic, then too many instructions are generated
# when we assert the instruction for a function and it causes tests to fail.
i686-* | i586-*)
export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static"
;;
# Some x86_64 targets enable by default more features beyond SSE2,
# which cause some instruction assertion checks to fail.
x86_64-*)
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=-sse3"
;;
#Unoptimized build uses fast-isel which breaks with msa
mips-* | mipsel-*)
export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
;;
armv7-*eabihf | thumbv7-*eabihf)
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
;;
# Some of our test dependencies use the deprecated `gcc` crates which
# doesn't detect RISC-V compilers automatically, so do it manually here.
riscv*)
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc"
;;
esac
echo "RUSTFLAGS=${RUSTFLAGS}"
echo "OBJDUMP=${OBJDUMP}"
echo "STDARCH_DISABLE_ASSERT_INSTR=${STDARCH_DISABLE_ASSERT_INSTR}"
echo "STDARCH_TEST_EVERYTHING=${STDARCH_TEST_EVERYTHING}"
echo "STDARCH_TEST_SKIP_FEATURE=${STDARCH_TEST_SKIP_FEATURE}"
echo "STDARCH_TEST_SKIP_FUNCTION=${STDARCH_TEST_SKIP_FUNCTION}"
echo "PROFILE=${PROFILE}"
cargo_test() {
cmd="cargo"
subcmd="test"
if [ "$NORUN" = "1" ]; then
export subcmd="build"
fi
cmd="$cmd ${subcmd} --target=$TARGET $1"
cmd="$cmd -- $2"
case ${TARGET} in
# wasm targets can't catch panics so if a test failures make sure the test
# harness isn't trying to capture output, otherwise we won't get any useful
# output.
wasm32*)
cmd="$cmd --nocapture"
;;
esac
$cmd
}
CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml"
STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml"
STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml"
INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml"
cargo_test "${CORE_ARCH} ${PROFILE}"
if [ "$NOSTD" != "1" ]; then
cargo_test "${STD_DETECT} ${PROFILE}"
cargo_test "${STD_DETECT} --no-default-features"
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_file_io"
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval"
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval,std_detect_file_io"
cargo_test "${STDARCH_EXAMPLES} ${PROFILE}"
fi
# Test targets compiled with extra features.
case ${TARGET} in
x86_64-unknown-linux-gnu)
export STDARCH_DISABLE_ASSERT_INSTR=1
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
cargo_test "${PROFILE}"
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx512f"
cargo_test "${PROFILE}"
;;
x86_64* | i686*)
export STDARCH_DISABLE_ASSERT_INSTR=1
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
cargo_test "${PROFILE}"
;;
# FIXME: don't build anymore
#mips-*gnu* | mipsel-*gnu*)
# export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa,+fp64,+mips32r5"
# cargo_test "${PROFILE}"
# ;;
mips64*)
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa"
cargo_test "${PROFILE}"
;;
s390x*)
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vector-enhancements-1"
cargo_test "${PROFILE}"
;;
powerpc64*)
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec"
cargo_test "${PROFILE}"
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vsx"
cargo_test "${PROFILE}"
;;
powerpc*)
# qemu has a bug in PPC32 which leads to a crash when compiled with `vsx`
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec"
cargo_test "${PROFILE}"
;;
# Setup aarch64 & armv7 specific variables, the runner, along with some
# tests to skip
aarch64-unknown-linux-gnu*)
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
TEST_CXX_COMPILER="clang++-19"
TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
;;
aarch64_be-unknown-linux-gnu*)
TEST_CPPFLAGS="-fuse-ld=lld"
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
TEST_CXX_COMPILER="clang++-19"
TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
;;
armv7-unknown-linux-gnueabihf*)
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
TEST_CXX_COMPILER="clang++-19"
TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
;;
*)
;;
esac
# Arm specific
case "${TARGET}" in
aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*)
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
--runner "${TEST_RUNNER}" \
--cppcompiler "${TEST_CXX_COMPILER}" \
--skip "${TEST_SKIP_INTRINSICS}" \
--target "${TARGET}"
;;
aarch64_be-unknown-linux-gnu*)
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
--runner "${TEST_RUNNER}" \
--cppcompiler "${TEST_CXX_COMPILER}" \
--skip "${TEST_SKIP_INTRINSICS}" \
--target "${TARGET}" \
--linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
--cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}"
;;
*)
;;
esac
if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
# Test examples
(
cd examples
cargo test --target "$TARGET" "${PROFILE}"
echo test | cargo run --target "$TARGET" "${PROFILE}" hex
)
fi

22
library/stdarch/ci/style.sh Executable file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env sh
set -ex
if rustup component add rustfmt-preview ; then
command -v rustfmt
rustfmt -V
cargo fmt --all -- --check
fi
# if rustup component add clippy-preview ; then
# cargo clippy -V
# cargo clippy --all -- -D clippy::pedantic
# fi
if shellcheck --version ; then
shellcheck -e SC2103 ci/*.sh
else
echo "shellcheck not found"
exit 1
fi

View file

@ -0,0 +1,17 @@
[package]
name = "assert-instr-macro"
version = "0.1.0"
authors = ["Alex Crichton <alex@alexcrichton.com>"]
edition = "2024"
[lib]
proc-macro = true
test = false
[dependencies]
proc-macro2 = "1.0"
quote = "1.0"
syn = { version = "2.0", features = ["full"] }
[lints.rust]
unexpected_cfgs = {level = "warn", check-cfg = ['cfg(optimized)'] }

View file

@ -0,0 +1,12 @@
use std::env;
fn main() {
let opt_level = env::var("OPT_LEVEL")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let profile = env::var("PROFILE").unwrap_or_default();
if profile == "release" || opt_level >= 2 {
println!("cargo:rustc-cfg=optimized");
}
}

View file

@ -0,0 +1,224 @@
//! Implementation of the `#[assert_instr]` macro
//!
//! This macro is used when testing the `stdarch` crate and is used to generate
//! test cases to assert that functions do indeed contain the instructions that
//! we're expecting them to contain.
//!
//! The procedural macro here is relatively simple, it simply appends a
//! `#[test]` function to the original token stream which asserts that the
//! function itself contains the relevant instruction.
#![deny(rust_2018_idioms)]
#[macro_use]
extern crate quote;
use proc_macro2::TokenStream;
use quote::ToTokens;
#[proc_macro_attribute]
pub fn assert_instr(
attr: proc_macro::TokenStream,
item: proc_macro::TokenStream,
) -> proc_macro::TokenStream {
let invoc = match syn::parse::<Invoc>(attr) {
Ok(s) => s,
Err(e) => return e.to_compile_error().into(),
};
let item = match syn::parse::<syn::Item>(item) {
Ok(s) => s,
Err(e) => return e.to_compile_error().into(),
};
let func = match item {
syn::Item::Fn(ref f) => f,
_ => panic!("must be attached to a function"),
};
let instr = &invoc.instr;
let name = &func.sig.ident;
let maybe_allow_deprecated = if func
.attrs
.iter()
.any(|attr| attr.path().is_ident("deprecated"))
{
quote! { #[allow(deprecated)] }
} else {
quote! {}
};
// Disable assert_instr for x86 targets compiled with avx enabled, which
// causes LLVM to generate different intrinsics that the ones we are
// testing for.
let disable_assert_instr = std::env::var("STDARCH_DISABLE_ASSERT_INSTR").is_ok();
// If instruction tests are disabled avoid emitting this shim at all, just
// return the original item without our attribute.
if !cfg!(optimized) || disable_assert_instr {
return (quote! { #item }).into();
}
let instr_str = instr
.replace(['.', '/', ':'], "_")
.replace(char::is_whitespace, "");
let assert_name = syn::Ident::new(&format!("assert_{name}_{instr_str}"), name.span());
// These name has to be unique enough for us to find it in the disassembly later on:
let shim_name = syn::Ident::new(
&format!("stdarch_test_shim_{name}_{instr_str}"),
name.span(),
);
let mut inputs = Vec::new();
let mut input_vals = Vec::new();
let mut const_vals = Vec::new();
let ret = &func.sig.output;
for arg in func.sig.inputs.iter() {
let capture = match *arg {
syn::FnArg::Typed(ref c) => c,
ref v => panic!(
"arguments must not have patterns: `{:?}`",
v.clone().into_token_stream()
),
};
let ident = match *capture.pat {
syn::Pat::Ident(ref i) => &i.ident,
_ => panic!("must have bare arguments"),
};
if let Some((_, tokens)) = invoc.args.iter().find(|a| *ident == a.0) {
input_vals.push(quote! { #tokens });
} else {
inputs.push(capture);
input_vals.push(quote! { #ident });
}
}
for arg in func.sig.generics.params.iter() {
let c = match *arg {
syn::GenericParam::Const(ref c) => c,
ref v => panic!(
"only const generics are allowed: `{:?}`",
v.clone().into_token_stream()
),
};
if let Some((_, tokens)) = invoc.args.iter().find(|a| c.ident == a.0) {
const_vals.push(quote! { #tokens });
} else {
panic!("const generics must have a value for tests");
}
}
let attrs = func
.attrs
.iter()
.filter(|attr| {
attr.path()
.segments
.first()
.expect("attr.path.segments.first() failed")
.ident
.to_string()
.starts_with("target")
})
.collect::<Vec<_>>();
let attrs = Append(&attrs);
// Use an ABI on Windows that passes SIMD values in registers, like what
// happens on Unix (I think?) by default.
let abi = if cfg!(windows) {
let target = std::env::var("TARGET").unwrap();
if target.contains("x86_64") {
syn::LitStr::new("sysv64", proc_macro2::Span::call_site())
} else if target.contains("86") {
syn::LitStr::new("vectorcall", proc_macro2::Span::call_site())
} else {
syn::LitStr::new("C", proc_macro2::Span::call_site())
}
} else {
syn::LitStr::new("C", proc_macro2::Span::call_site())
};
let to_test = quote! {
#attrs
#maybe_allow_deprecated
#[unsafe(no_mangle)]
#[inline(never)]
pub unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
#name::<#(#const_vals),*>(#(#input_vals),*)
}
};
let tokens: TokenStream = quote! {
#[test]
#[allow(non_snake_case)]
fn #assert_name() {
#to_test
::stdarch_test::assert(#shim_name as usize, stringify!(#shim_name), #instr);
}
};
let tokens: TokenStream = quote! {
#item
#tokens
};
tokens.into()
}
struct Invoc {
instr: String,
args: Vec<(syn::Ident, syn::Expr)>,
}
impl syn::parse::Parse for Invoc {
fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
use syn::{Token, ext::IdentExt};
let mut instr = String::new();
while !input.is_empty() {
if input.parse::<Token![,]>().is_ok() {
break;
}
if let Ok(ident) = syn::Ident::parse_any(input) {
instr.push_str(&ident.to_string());
continue;
}
if input.parse::<Token![.]>().is_ok() {
instr.push('.');
continue;
}
if let Ok(s) = input.parse::<syn::LitStr>() {
instr.push_str(&s.value());
continue;
}
println!("{:?}", input.cursor().token_stream());
return Err(input.error("expected an instruction"));
}
if instr.is_empty() {
return Err(input.error("expected an instruction before comma"));
}
let mut args = Vec::new();
while !input.is_empty() {
let name = input.parse::<syn::Ident>()?;
input.parse::<Token![=]>()?;
let expr = input.parse::<syn::Expr>()?;
args.push((name, expr));
if input.parse::<Token![,]>().is_err() {
if !input.is_empty() {
return Err(input.error("extra tokens at end"));
}
break;
}
}
Ok(Self { instr, args })
}
}
struct Append<T>(T);
impl<T> quote::ToTokens for Append<T>
where
T: Clone + IntoIterator,
T::Item: quote::ToTokens,
{
fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
for item in self.0.clone() {
item.to_tokens(tokens);
}
}
}

View file

@ -0,0 +1,33 @@
[package]
name = "core_arch"
version = "0.1.5"
authors = [
"Alex Crichton <alex@alexcrichton.com>",
"Andrew Gallant <jamslam@gmail.com>",
"Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>",
]
description = "`core::arch` - Rust's core library architecture-specific intrinsics."
homepage = "https://github.com/rust-lang/stdarch"
repository = "https://github.com/rust-lang/stdarch"
readme = "README.md"
keywords = ["core", "simd", "arch", "intrinsics"]
categories = ["hardware-support", "no-std"]
license = "MIT OR Apache-2.0"
edition = "2024"
[badges]
is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
is-it-maintained-open-issues = { repository = "rust-lang/stdarch" }
maintenance = { status = "experimental" }
[dev-dependencies]
stdarch-test = { version = "0.*", path = "../stdarch-test" }
std_detect = { version = "0.*", path = "../std_detect" }
[target.'cfg(all(target_arch = "x86_64", target_os = "linux"))'.dev-dependencies]
syscalls = { version = "0.6.18", default-features = false }
[lints.clippy]
too_long_first_doc_paragraph = "allow"
missing_transmute_annotations = "allow"
useless_transmute = "allow"

View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -0,0 +1,25 @@
Copyright (c) 2017 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View file

@ -0,0 +1,116 @@
## The following neon instructions are currently not implemented in stdarch
### Not implemented on arm:
`vcadd_rot270_f32`
`vcadd_rot90_f32`
`vcaddq_rot270_f32`
`vcaddq_rot90_f32`
`vdot_s32`
`vdot_u32`
`vdotq_s32`
`vdotq_u32`
`vdot_lane_s32`
`vdot_lane_u32`
`vdotq_lane_s32`
`vdotq_lane_u32`
`vcmla_f32`
`vcmla_lane_f32`
`vcmla_laneq_f32`
`vcmla_rot180_f32`
`vcmla_rot180_lane_f32`
`vcmla_rot180_laneq_f32`
`vcmla_rot270_f32`
`vcmla_rot270_lane_f32`
`vcmla_rot270_laneq_f32`
`vcmla_rot90_f32`
`vcmla_rot90_lane_f32`
`vcmla_rot90_laneq_f32`
`vcmlaq_f32`
`vcmlaq_lane_f32`
`vcmlaq_laneq_f32`
`vcmlaq_rot180_f32`
`vcmlaq_rot180_lane_f32`
`vcmlaq_rot180_laneq_f32`
`vcmlaq_rot270_f32`
`vcmlaq_rot270_lane_f32`
`vcmlaq_rot270_laneq_f32`
`vcmlaq_rot90_f32`
`vcmlaq_rot90_lane_f32`
`vcmlaq_rot90_laneq_f32`
### Not implemented in LLVM:
`vrnd32x_f64`
`vrnd32xq_f64`
`vrnd32z_f64`
`vrnd32zq_f64`
`vrnd64x_f64`
`vrnd64xq_f64`
`vrnd64z_f64`
`vrnd64zq_f64`
### LLVM Select errors may occur:
`vsudot_lane_s32`
`vsudot_laneq_s32`
`vsudotq_lane_s32`
`vsudotq_laneq_s32`
`vusdot_lane_s32`
`vusdot_laneq_s32`
`vusdot_s32`
`vusdotq_lane_s32`
`vusdotq_laneq_s32`
`vusdotq_s32v`

View file

@ -0,0 +1,58 @@
`core::arch` - Rust's core library architecture-specific intrinsics
=======
The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD).
# Usage
`core::arch` is available as part of `libcore` and it is re-exported by
`libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate.
Using `core::arch` via this crate requires nightly Rust, and it can (and does)
break often. The only cases in which you should consider using it via this crate
are:
* if you need to re-compile `core::arch` yourself, e.g., with particular
target-features enabled that are not enabled for `libcore`/`libstd`. Note: if
you need to re-compile it for a non-standard target, please prefer using
`xargo` and re-compiling `libcore`/`libstd` as appropriate instead of using
this crate.
* using some features that might not be available even behind unstable Rust
features. We try to keep these to a minimum. If you need to use some of these
features, please open an issue so that we can expose them in nightly Rust and
you can use them from there.
# Documentation
* [Documentation - i686][i686]
* [Documentation - x86\_64][x86_64]
* [Documentation - arm][arm]
* [Documentation - aarch64][aarch64]
* [Documentation - powerpc][powerpc]
* [Documentation - powerpc64][powerpc64]
* [How to get started][contrib]
* [How to help implement intrinsics][help-implement]
[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md
[help-implement]: https://github.com/rust-lang/stdarch/issues/40
[i686]: https://rust-lang.github.io/stdarch/i686/core_arch/
[x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/
[arm]: https://rust-lang.github.io/stdarch/arm/core_arch/
[aarch64]: https://rust-lang.github.io/stdarch/aarch64/core_arch/
[powerpc]: https://rust-lang.github.io/stdarch/powerpc/core_arch/
[powerpc64]: https://rust-lang.github.io/stdarch/powerpc64/core_arch/
# License
`core_arch` is primarily distributed under the terms of both the MIT license and
the Apache License (Version 2.0), with portions covered by various BSD-like
licenses.
See LICENSE-APACHE, and LICENSE-MIT for details.
# Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in `core_arch` by you, as defined in the Apache-2.0 license,
shall be dual licensed as above, without any additional terms or conditions.

View file

@ -0,0 +1,258 @@
<details><summary>["AMX-BF16"]</summary><p>
* [ ] [`__tile_dpbf16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbf16ps)
</p></details>
<details><summary>["AMX-COMPLEX"]</summary><p>
* [ ] [`__tile_cmmimfp16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_cmmimfp16ps)
* [ ] [`__tile_cmmrlfp16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_cmmrlfp16ps)
</p></details>
<details><summary>["AMX-FP16"]</summary><p>
* [ ] [`__tile_dpfp16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpfp16ps)
</p></details>
<details><summary>["AMX-INT8"]</summary><p>
* [ ] [`__tile_dpbssd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbssd)
* [ ] [`__tile_dpbsud`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbsud)
* [ ] [`__tile_dpbusd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbusd)
* [ ] [`__tile_dpbuud`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbuud)
</p></details>
<details><summary>["AMX-TILE"]</summary><p>
* [ ] [`__tile_loadd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_loadd)
* [ ] [`__tile_stored`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_stored)
* [ ] [`__tile_stream_loadd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_stream_loadd)
* [ ] [`__tile_zero`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_zero)
</p></details>
<details><summary>["AVX512_FP16"]</summary><p>
* [ ] [`_mm256_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pch)
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
* [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
</p></details>
<details><summary>["AVX512_VP2INTERSECT", "AVX512F"]</summary><p>
* [ ] [`_mm512_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32)
* [ ] [`_mm512_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi64)
</p></details>
<details><summary>["AVX512_VP2INTERSECT", "AVX512VL"]</summary><p>
* [ ] [`_mm256_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi32)
* [ ] [`_mm256_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi64)
* [ ] [`_mm_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi32)
* [ ] [`_mm_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi64)
</p></details>
<details><summary>["CET_SS"]</summary><p>
* [ ] [`_clrssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clrssbsy)
* [ ] [`_get_ssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_get_ssp)
* [ ] [`_get_ssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_get_ssp)
* [ ] [`_inc_ssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_inc_ssp)
* [ ] [`_incsspd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_incsspd)
* [ ] [`_incsspq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_incsspq)
* [ ] [`_rdsspd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdsspd_i32)
* [ ] [`_rdsspq_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdsspq_i64)
* [ ] [`_rstorssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rstorssp)
* [ ] [`_saveprevssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_saveprevssp)
* [ ] [`_setssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_setssbsy)
* [ ] [`_wrssd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrssd)
* [ ] [`_wrssq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrssq)
* [ ] [`_wrussd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrussd)
* [ ] [`_wrussq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrussq)
</p></details>
<details><summary>["CLDEMOTE"]</summary><p>
* [ ] [`_mm_cldemote`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cldemote)
</p></details>
<details><summary>["CLFLUSHOPT"]</summary><p>
* [ ] [`_mm_clflushopt`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflushopt)
</p></details>
<details><summary>["CLWB"]</summary><p>
* [ ] [`_mm_clwb`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clwb)
</p></details>
<details><summary>["CMPCCXADD"]</summary><p>
* [ ] [`_cmpccxadd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cmpccxadd_epi32)
* [ ] [`_cmpccxadd_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cmpccxadd_epi64)
</p></details>
<details><summary>["ENQCMD"]</summary><p>
* [ ] [`_enqcmd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_enqcmd)
* [ ] [`_enqcmds`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_enqcmds)
</p></details>
<details><summary>["FSGSBASE"]</summary><p>
* [ ] [`_readfsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readfsbase_u32)
* [ ] [`_readfsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readfsbase_u64)
* [ ] [`_readgsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readgsbase_u32)
* [ ] [`_readgsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readgsbase_u64)
* [ ] [`_writefsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writefsbase_u32)
* [ ] [`_writefsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writefsbase_u64)
* [ ] [`_writegsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writegsbase_u32)
* [ ] [`_writegsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writegsbase_u64)
</p></details>
<details><summary>["HRESET"]</summary><p>
* [ ] [`_hreset`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_hreset)
</p></details>
<details><summary>["INVPCID"]</summary><p>
* [ ] [`_invpcid`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_invpcid)
</p></details>
<details><summary>["MONITOR"]</summary><p>
* [ ] [`_mm_monitor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_monitor)
* [ ] [`_mm_mwait`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mwait)
</p></details>
<details><summary>["MOVBE"]</summary><p>
* [ ] [`_loadbe_i16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_loadbe_i16)
* [ ] [`_loadbe_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_loadbe_i32)
* [ ] [`_loadbe_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_loadbe_i64)
* [ ] [`_storebe_i16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_storebe_i16)
* [ ] [`_storebe_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_storebe_i32)
* [ ] [`_storebe_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_storebe_i64)
</p></details>
<details><summary>["MOVDIR64B"]</summary><p>
* [ ] [`_movdir64b`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_movdir64b)
</p></details>
<details><summary>["MOVDIRI"]</summary><p>
* [ ] [`_directstoreu_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_directstoreu_u32)
* [ ] [`_directstoreu_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_directstoreu_u64)
</p></details>
<details><summary>["PCONFIG"]</summary><p>
* [ ] [`_pconfig_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pconfig_u32)
</p></details>
<details><summary>["POPCNT"]</summary><p>
* [ ] [`_mm_popcnt_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u32)
* [ ] [`_mm_popcnt_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u64)
</p></details>
<details><summary>["PREFETCHI"]</summary><p>
* [ ] [`_m_prefetchit0`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_prefetchit0)
* [ ] [`_m_prefetchit1`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_prefetchit1)
</p></details>
<details><summary>["RAO_INT"]</summary><p>
* [ ] [`_aadd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aadd_i32)
* [ ] [`_aadd_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aadd_i64)
* [ ] [`_aand_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aand_i32)
* [ ] [`_aand_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aand_i64)
* [ ] [`_aor_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aor_i32)
* [ ] [`_aor_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aor_i64)
* [ ] [`_axor_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_axor_i32)
* [ ] [`_axor_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_axor_i64)
</p></details>
<details><summary>["RDPID"]</summary><p>
* [ ] [`_rdpid_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdpid_u32)
</p></details>
<details><summary>["SERIALIZE"]</summary><p>
* [ ] [`_serialize`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_serialize)
</p></details>
<details><summary>["SSE"]</summary><p>
* [ ] [`_mm_free`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free)
* [ ] [`_mm_malloc`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_malloc)
</p></details>
<details><summary>["TSXLDTRK"]</summary><p>
* [ ] [`_xresldtrk`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xresldtrk)
* [ ] [`_xsusldtrk`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsusldtrk)
</p></details>
<details><summary>["UINTR"]</summary><p>
* [ ] [`_clui`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clui)
* [ ] [`_senduipi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_senduipi)
* [ ] [`_stui`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_stui)
* [ ] [`_testui`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_testui)
</p></details>
<details><summary>["USER_MSR"]</summary><p>
* [ ] [`_urdmsr`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_urdmsr)
* [ ] [`_uwrmsr`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_uwrmsr)
</p></details>
<details><summary>["WAITPKG"]</summary><p>
* [ ] [`_tpause`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tpause)
* [ ] [`_umonitor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_umonitor)
* [ ] [`_umwait`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_umwait)
</p></details>
<details><summary>["WBNOINVD"]</summary><p>
* [ ] [`_wbnoinvd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wbnoinvd)
</p></details>

View file

@ -0,0 +1,3 @@
ignore = [
"src/simd.rs",
]

View file

@ -0,0 +1,39 @@
//! AArch64 intrinsics.
//!
//! The reference for NEON is [Arm's NEON Intrinsics Reference][arm_ref]. The
//! [Arm's NEON Intrinsics Online Database][arm_dat] is also useful.
//!
//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
#![cfg_attr(
all(target_arch = "aarch64", target_abi = "softfloat"),
// Just allow the warning: anyone soundly using the intrinsics has to enable
// the target feature, and that will generate a warning for them.
allow(aarch64_softfloat_neon)
)]
mod mte;
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
pub use self::mte::*;
mod neon;
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub use self::neon::*;
mod tme;
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub use self::tme::*;
mod prefetch;
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
pub use self::prefetch::*;
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub use super::arm_shared::*;
#[cfg(test)]
use stdarch_test::assert_instr;
#[cfg(test)]
pub(crate) mod test_support;

View file

@ -0,0 +1,171 @@
//! AArch64 Memory tagging intrinsics
//!
//! [ACLE documentation](https://arm-software.github.io/acle/main/acle.html#markdown-toc-mte-intrinsics)
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.irg"
)]
fn irg_(ptr: *const (), exclude: i64) -> *const ();
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.gmi"
)]
fn gmi_(ptr: *const (), exclude: i64) -> i64;
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.ldg"
)]
fn ldg_(ptr: *const (), tag_ptr: *const ()) -> *const ();
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.stg"
)]
fn stg_(tagged_ptr: *const (), addr_to_tag: *const ());
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.addg"
)]
fn addg_(ptr: *const (), value: i64) -> *const ();
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.subp"
)]
fn subp_(ptr_a: *const (), ptr_b: *const ()) -> i64;
}
/// Return a pointer containing a randomly generated logical address tag.
///
/// `src`: A pointer containing an address.
/// `mask`: A mask where each of the lower 16 bits specifies logical
/// tags which must be excluded from consideration. Zero excludes no
/// tags.
///
/// The returned pointer contains a copy of the `src` address, but with a
/// randomly generated logical tag, excluding any specified by `mask`.
///
/// SAFETY: The pointer provided by this intrinsic will be invalid until the memory
/// has been appropriately tagged with `__arm_mte_set_tag`. If using that intrinsic
/// on the provided pointer is itself invalid, then it will be permanently invalid
/// and Undefined Behavior to dereference it.
#[inline]
#[target_feature(enable = "mte")]
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
pub unsafe fn __arm_mte_create_random_tag<T>(src: *const T, mask: u64) -> *const T {
irg_(src as *const (), mask as i64) as *const T
}
/// Return a pointer with the logical address tag offset by a value.
///
/// `src`: A pointer containing an address and a logical tag.
/// `OFFSET`: A compile-time constant value in the range [0, 15].
///
/// Adds offset to the logical address tag in `src`, wrapping if the result is
/// outside of the valid 16 tags.
///
/// SAFETY: See `__arm_mte_create_random_tag`.
#[inline]
#[target_feature(enable = "mte")]
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
pub unsafe fn __arm_mte_increment_tag<const OFFSET: i64, T>(src: *const T) -> *const T {
addg_(src as *const (), OFFSET) as *const T
}
/// Add a logical tag to the set of excluded logical tags.
///
/// `src`: A pointer containing an address and a logical tag.
/// `excluded`: A mask where the lower 16 bits each specify currently-excluded
/// logical tags.
///
/// Adds the logical tag stored in `src` to the set in `excluded`, and returns
/// the result.
#[inline]
#[target_feature(enable = "mte")]
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
pub unsafe fn __arm_mte_exclude_tag<T>(src: *const T, excluded: u64) -> u64 {
gmi_(src as *const (), excluded as i64) as u64
}
/// Store an allocation tag for the 16-byte granule of memory.
///
/// `tag_address`: A pointer containing an address and a logical tag, which
/// must be 16-byte aligned.
///
/// SAFETY: `tag_address` must be 16-byte aligned. The tag will apply to the
/// entire 16-byte memory granule.
#[inline]
#[target_feature(enable = "mte")]
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
pub unsafe fn __arm_mte_set_tag<T>(tag_address: *const T) {
stg_(tag_address as *const (), tag_address as *const ());
}
/// Load an allocation tag from memory, returning a new pointer with the
/// corresponding logical tag.
///
/// `address`: A pointer containing an address from which allocation tag memory
/// is read. This does not need to be 16-byte aligned.
#[inline]
#[target_feature(enable = "mte")]
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
pub unsafe fn __arm_mte_get_tag<T>(address: *const T) -> *const T {
ldg_(address as *const (), address as *const ()) as *const T
}
/// Calculate the difference between the address parts of two pointers, ignoring
/// the tags, and sign-extending the result.
#[inline]
#[target_feature(enable = "mte")]
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
pub unsafe fn __arm_mte_ptrdiff<T, U>(a: *const T, b: *const U) -> i64 {
subp_(a as *const (), b as *const ())
}
#[cfg(test)]
mod test {
use super::*;
use stdarch_test::assert_instr;
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(irg))] // FIXME: MSVC `dumpbin` doesn't support MTE
#[allow(dead_code)]
#[target_feature(enable = "mte")]
unsafe fn test_arm_mte_create_random_tag(src: *const (), mask: u64) -> *const () {
__arm_mte_create_random_tag(src, mask)
}
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(addg))]
#[allow(dead_code)]
#[target_feature(enable = "mte")]
unsafe fn test_arm_mte_increment_tag(src: *const ()) -> *const () {
__arm_mte_increment_tag::<1, _>(src)
}
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(gmi))]
#[allow(dead_code)]
#[target_feature(enable = "mte")]
unsafe fn test_arm_mte_exclude_tag(src: *const (), excluded: u64) -> u64 {
__arm_mte_exclude_tag(src, excluded)
}
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stg))]
#[allow(dead_code)]
#[target_feature(enable = "mte")]
unsafe fn test_arm_mte_set_tag(src: *const ()) {
__arm_mte_set_tag(src)
}
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldg))]
#[allow(dead_code)]
#[target_feature(enable = "mte")]
unsafe fn test_arm_mte_get_tag(src: *const ()) -> *const () {
__arm_mte_get_tag(src)
}
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(subp))]
#[allow(dead_code)]
#[target_feature(enable = "mte")]
unsafe fn test_arm_mte_ptrdiff(a: *const (), b: *const ()) -> i64 {
__arm_mte_ptrdiff(a, b)
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,80 @@
#[cfg(test)]
use stdarch_test::assert_instr;
unsafe extern "unadjusted" {
#[link_name = "llvm.prefetch"]
fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
}
/// See [`prefetch`](fn._prefetch.html).
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
pub const _PREFETCH_READ: i32 = 0;
/// See [`prefetch`](fn._prefetch.html).
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
pub const _PREFETCH_WRITE: i32 = 1;
/// See [`prefetch`](fn._prefetch.html).
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
pub const _PREFETCH_LOCALITY0: i32 = 0;
/// See [`prefetch`](fn._prefetch.html).
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
pub const _PREFETCH_LOCALITY1: i32 = 1;
/// See [`prefetch`](fn._prefetch.html).
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
pub const _PREFETCH_LOCALITY2: i32 = 2;
/// See [`prefetch`](fn._prefetch.html).
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
pub const _PREFETCH_LOCALITY3: i32 = 3;
/// Fetch the cache line that contains address `p` using the given `RW` and `LOCALITY`.
///
/// The `RW` must be one of:
///
/// * [`_PREFETCH_READ`](constant._PREFETCH_READ.html): the prefetch is preparing
/// for a read.
///
/// * [`_PREFETCH_WRITE`](constant._PREFETCH_WRITE.html): the prefetch is preparing
/// for a write.
///
/// The `LOCALITY` must be one of:
///
/// * [`_PREFETCH_LOCALITY0`](constant._PREFETCH_LOCALITY0.html): Streaming or
/// non-temporal prefetch, for data that is used only once.
///
/// * [`_PREFETCH_LOCALITY1`](constant._PREFETCH_LOCALITY1.html): Fetch into level 3 cache.
///
/// * [`_PREFETCH_LOCALITY2`](constant._PREFETCH_LOCALITY2.html): Fetch into level 2 cache.
///
/// * [`_PREFETCH_LOCALITY3`](constant._PREFETCH_LOCALITY3.html): Fetch into level 1 cache.
///
/// The prefetch memory instructions signal to the memory system that memory accesses
/// from a specified address are likely to occur in the near future. The memory system
/// can respond by taking actions that are expected to speed up the memory access when
/// they do occur, such as preloading the specified address into one or more caches.
/// Because these signals are only hints, it is valid for a particular CPU to treat
/// any or all prefetch instructions as a NOP.
///
///
/// [Arm's documentation](https://developer.arm.com/documentation/den0024/a/the-a64-instruction-set/memory-access-instructions/prefetching-memory?lang=en)
#[inline(always)]
#[cfg_attr(test, assert_instr("prfm pldl1strm", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY0))]
#[cfg_attr(test, assert_instr("prfm pldl3keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY1))]
#[cfg_attr(test, assert_instr("prfm pldl2keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY2))]
#[cfg_attr(test, assert_instr("prfm pldl1keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY3))]
#[cfg_attr(test, assert_instr("prfm pstl1strm", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY0))]
#[cfg_attr(test, assert_instr("prfm pstl3keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY1))]
#[cfg_attr(test, assert_instr("prfm pstl2keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY2))]
#[cfg_attr(test, assert_instr("prfm pstl1keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY3))]
#[rustc_legacy_const_generics(1, 2)]
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
// FIXME: Replace this with the standard ACLE __pld/__pldx/__pli/__plix intrinsics
pub unsafe fn _prefetch<const RW: i32, const LOCALITY: i32>(p: *const i8) {
// We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache).
static_assert_uimm_bits!(RW, 1);
static_assert_uimm_bits!(LOCALITY, 2);
prefetch(p, RW, LOCALITY, 1);
}

View file

@ -0,0 +1,184 @@
use crate::core_arch::{aarch64::neon::*, arm_shared::*, simd::*};
use std::{mem::transmute, vec::Vec};
macro_rules! V_u64 {
() => {
vec![
0x0000000000000000u64,
0x0101010101010101u64,
0x0202020202020202u64,
0x0F0F0F0F0F0F0F0Fu64,
0x8080808080808080u64,
0xF0F0F0F0F0F0F0F0u64,
0xFFFFFFFFFFFFFFFFu64,
]
};
}
macro_rules! V_f64 {
() => {
vec![
0.0f64,
1.0f64,
-1.0f64,
1.2f64,
2.4f64,
f64::MAX,
f64::MIN,
f64::INFINITY,
f64::NEG_INFINITY,
f64::NAN,
]
};
}
macro_rules! to64 {
($t : ident) => {
|v: $t| -> u64 { transmute(v) }
};
}
macro_rules! to128 {
($t : ident) => {
|v: $t| -> u128 { transmute(v) }
};
}
pub(crate) fn test<T, U, V, W, X>(
vals: Vec<T>,
fill1: fn(T) -> V,
fill2: fn(U) -> W,
cast: fn(W) -> X,
test_fun: fn(V, V) -> W,
verify_fun: fn(T, T) -> U,
) where
T: Copy + core::fmt::Debug,
U: Copy + core::fmt::Debug + std::cmp::PartialEq,
V: Copy + core::fmt::Debug,
W: Copy + core::fmt::Debug,
X: Copy + core::fmt::Debug + std::cmp::PartialEq,
{
let pairs = vals.iter().zip(vals.iter());
for (i, j) in pairs {
let a: V = fill1(*i);
let b: V = fill1(*j);
let actual_pre: W = test_fun(a, b);
let expected_pre: W = fill2(verify_fun(*i, *j));
let actual: X = cast(actual_pre);
let expected: X = cast(expected_pre);
assert_eq!(
actual, expected,
"[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
*i, *j, &a, &b, actual_pre, &a, &b, expected_pre
);
}
}
macro_rules! gen_test_fn {
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
unsafe {
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
};
}
};
}
macro_rules! gen_fill_fn {
($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
pub(crate) fn $id(val: $in_t) -> $out_t {
let initial: [$in_t; $num_els] = [val; $num_els];
let result: $cmp_t = unsafe { transmute(initial) };
let result_out: $out_t = unsafe { transmute(result) };
// println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
result_out
}
};
}
gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
gen_fill_fn!(fill_f64, 64, 1, f64, float64x1_t, u64);
gen_fill_fn!(fillq_f64, 64, 2, f64, float64x2_t, u128);
gen_fill_fn!(fill_p64, 64, 1, u64, poly64x1_t, u64);
gen_fill_fn!(fillq_p64, 64, 2, u64, poly64x2_t, u128);
gen_test_fn!(
test_ari_f64,
f64,
f64,
float64x1_t,
float64x1_t,
u64,
V_f64!(),
fill_f64,
fill_f64,
to64!(float64x1_t)
);
gen_test_fn!(
test_cmp_f64,
f64,
u64,
float64x1_t,
uint64x1_t,
u64,
V_f64!(),
fill_f64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_ari_f64,
f64,
f64,
float64x2_t,
float64x2_t,
u128,
V_f64!(),
fillq_f64,
fillq_f64,
to128!(float64x2_t)
);
gen_test_fn!(
testq_cmp_f64,
f64,
u64,
float64x2_t,
uint64x2_t,
u128,
V_f64!(),
fillq_f64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
test_cmp_p64,
u64,
u64,
poly64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_p64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_cmp_p64,
u64,
u64,
poly64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_p64,
fillq_u64,
to128!(uint64x2_t)
);

View file

@ -0,0 +1,201 @@
//! ARM's Transactional Memory Extensions (TME).
//!
//! This CPU feature is available on Aarch64 - A architecture profile.
//! This feature is in the non-neon feature set. TME specific vendor documentation can
//! be found [TME Intrinsics Introduction][tme_intrinsics_intro].
//!
//! The reference is [ACLE Q4 2019][acle_q4_2019_ref].
//!
//! ACLE has a section for TME extensions and state masks for aborts and failure codes.
//! [ARM A64 Architecture Register Datasheet][a_profile_future] also describes possible failure code scenarios.
//!
//! [acle_q4_2019_ref]: https://static.docs.arm.com/101028/0010/ACLE_2019Q4_release-0010.pdf
//! [tme_intrinsics_intro]: https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics
//! [llvm_aarch64_int]: https://github.com/llvm/llvm-project/commit/a36d31478c182903523e04eb271bbf102bfab2cc#diff-ff24e1c35f4d54f1110ce5d90c709319R626-R646
//! [a_profile_future]: https://static.docs.arm.com/ddi0601/a/SysReg_xml_futureA-2019-04.pdf?_ga=2.116560387.441514988.1590524918-1110153136.1588469296
#[cfg(test)]
use stdarch_test::assert_instr;
unsafe extern "unadjusted" {
#[link_name = "llvm.aarch64.tstart"]
fn aarch64_tstart() -> u64;
#[link_name = "llvm.aarch64.tcommit"]
fn aarch64_tcommit();
#[link_name = "llvm.aarch64.tcancel"]
fn aarch64_tcancel(imm0: u64);
#[link_name = "llvm.aarch64.ttest"]
fn aarch64_ttest() -> u64;
}
/// Transaction successfully started.
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
/// Extraction mask for failure reason
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
/// Transaction retry is possible.
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_RTRY: u64 = 1 << 15;
/// Transaction executed a TCANCEL instruction
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_CNCL: u64 = 1 << 16;
/// Transaction aborted because a conflict occurred
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_MEM: u64 = 1 << 17;
/// Fallback error type for any other reason
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_IMP: u64 = 1 << 18;
/// Transaction aborted because a non-permissible operation was attempted
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_ERR: u64 = 1 << 19;
/// Transaction aborted due to read or write set limit was exceeded
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_SIZE: u64 = 1 << 20;
/// Transaction aborted due to transactional nesting level was exceeded
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_NEST: u64 = 1 << 21;
/// Transaction aborted due to a debug trap.
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_DBG: u64 = 1 << 22;
/// Transaction failed from interrupt
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_INT: u64 = 1 << 23;
/// Indicates a TRIVIAL version of TM is available
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
// NOTE: Tests for these instructions are disabled on MSVC as dumpbin doesn't
// understand these instructions.
/// Starts a new transaction. When the transaction starts successfully the return value is 0.
/// If the transaction fails, all state modifications are discarded and a cause of the failure
/// is encoded in the return value.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tstart))]
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub unsafe fn __tstart() -> u64 {
aarch64_tstart()
}
/// Commits the current transaction. For a nested transaction, the only effect is that the
/// transactional nesting depth is decreased. For an outer transaction, the state modifications
/// performed transactionally are committed to the architectural state.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tcommit))]
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub unsafe fn __tcommit() {
aarch64_tcommit()
}
/// Cancels the current transaction and discards all state modifications that were performed transactionally.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(
all(test, not(target_env = "msvc")),
assert_instr(tcancel, IMM16 = 0x0)
)]
#[rustc_legacy_const_generics(0)]
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub unsafe fn __tcancel<const IMM16: u64>() {
static_assert!(IMM16 <= 65535);
aarch64_tcancel(IMM16);
}
/// Tests if executing inside a transaction. If no transaction is currently executing,
/// the return value is 0. Otherwise, this intrinsic returns the depth of the transaction.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ttest))]
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub unsafe fn __ttest() -> u64 {
aarch64_ttest()
}
#[cfg(test)]
mod tests {
use stdarch_test::simd_test;
use crate::core_arch::aarch64::*;
const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
#[simd_test(enable = "tme")]
unsafe fn test_tstart() {
let mut x = 0;
for i in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
x += 1;
assert_eq!(x, i + 1);
break;
}
assert_eq!(x, 0);
}
}
#[simd_test(enable = "tme")]
unsafe fn test_tcommit() {
let mut x = 0;
for i in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
x += 1;
assert_eq!(x, i + 1);
tme::__tcommit();
}
assert_eq!(x, i + 1);
}
}
#[simd_test(enable = "tme")]
unsafe fn test_tcancel() {
let mut x = 0;
for i in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
x += 1;
assert_eq!(x, i + 1);
tme::__tcancel::<CANCEL_CODE>();
break;
}
}
assert_eq!(x, 0);
}
#[simd_test(enable = "tme")]
unsafe fn test_ttest() {
for _ in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
if tme::__ttest() == 2 {
tme::__tcancel::<CANCEL_CODE>();
break;
}
}
}
}
}

View file

@ -0,0 +1,390 @@
//! # References:
//!
//! - Section 8.3 "16-bit multiplications"
//!
//! Intrinsics that could live here:
//!
//! - \[x\] __smulbb
//! - \[x\] __smulbt
//! - \[x\] __smultb
//! - \[x\] __smultt
//! - \[x\] __smulwb
//! - \[x\] __smulwt
//! - \[x\] __qadd
//! - \[x\] __qsub
//! - \[x\] __qdbl
//! - \[x\] __smlabb
//! - \[x\] __smlabt
//! - \[x\] __smlatb
//! - \[x\] __smlatt
//! - \[x\] __smlawb
//! - \[x\] __smlawt
#[cfg(test)]
use stdarch_test::assert_instr;
unsafe extern "unadjusted" {
#[link_name = "llvm.arm.smulbb"]
fn arm_smulbb(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smulbt"]
fn arm_smulbt(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smultb"]
fn arm_smultb(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smultt"]
fn arm_smultt(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smulwb"]
fn arm_smulwb(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smulwt"]
fn arm_smulwt(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qadd"]
fn arm_qadd(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qsub"]
fn arm_qsub(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smlabb"]
fn arm_smlabb(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlabt"]
fn arm_smlabt(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlatb"]
fn arm_smlatb(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlatt"]
fn arm_smlatt(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlawb"]
fn arm_smlawb(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlawt"]
fn arm_smlawt(a: i32, b: i32, c: i32) -> i32;
}
/// Insert a SMULBB instruction
///
/// Returns the equivalent of a\[0\] * b\[0\]
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
#[inline]
#[cfg_attr(test, assert_instr(smulbb))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smulbb(a: i32, b: i32) -> i32 {
arm_smulbb(a, b)
}
/// Insert a SMULTB instruction
///
/// Returns the equivalent of a\[0\] * b\[1\]
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
#[inline]
#[cfg_attr(test, assert_instr(smultb))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smultb(a: i32, b: i32) -> i32 {
arm_smultb(a, b)
}
/// Insert a SMULTB instruction
///
/// Returns the equivalent of a\[1\] * b\[0\]
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
#[inline]
#[cfg_attr(test, assert_instr(smulbt))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smulbt(a: i32, b: i32) -> i32 {
arm_smulbt(a, b)
}
/// Insert a SMULTT instruction
///
/// Returns the equivalent of a\[1\] * b\[1\]
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
#[inline]
#[cfg_attr(test, assert_instr(smultt))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smultt(a: i32, b: i32) -> i32 {
arm_smultt(a, b)
}
/// Insert a SMULWB instruction
///
/// Multiplies the 32-bit signed first operand with the low halfword
/// (as a 16-bit signed integer) of the second operand.
/// Return the top 32 bits of the 48-bit product
#[inline]
#[cfg_attr(test, assert_instr(smulwb))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smulwb(a: i32, b: i32) -> i32 {
arm_smulwb(a, b)
}
/// Insert a SMULWT instruction
///
/// Multiplies the 32-bit signed first operand with the high halfword
/// (as a 16-bit signed integer) of the second operand.
/// Return the top 32 bits of the 48-bit product
#[inline]
#[cfg_attr(test, assert_instr(smulwt))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smulwt(a: i32, b: i32) -> i32 {
arm_smulwt(a, b)
}
/// Signed saturating addition
///
/// Returns the 32-bit saturating signed equivalent of a + b.
/// Sets the Q flag if saturation occurs.
#[inline]
#[cfg_attr(test, assert_instr(qadd))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __qadd(a: i32, b: i32) -> i32 {
arm_qadd(a, b)
}
/// Signed saturating subtraction
///
/// Returns the 32-bit saturating signed equivalent of a - b.
/// Sets the Q flag if saturation occurs.
#[inline]
#[cfg_attr(test, assert_instr(qsub))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __qsub(a: i32, b: i32) -> i32 {
arm_qsub(a, b)
}
/// Insert a QADD instruction
///
/// Returns the 32-bit saturating signed equivalent of a + a
/// Sets the Q flag if saturation occurs.
#[inline]
#[cfg_attr(test, assert_instr(qadd))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __qdbl(a: i32) -> i32 {
arm_qadd(a, a)
}
/// Insert a SMLABB instruction
///
/// Returns the equivalent of a\[0\] * b\[0\] + c
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlabb))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smlabb(a: i32, b: i32, c: i32) -> i32 {
arm_smlabb(a, b, c)
}
/// Insert a SMLABT instruction
///
/// Returns the equivalent of a\[0\] * b\[1\] + c
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlabt))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smlabt(a: i32, b: i32, c: i32) -> i32 {
arm_smlabt(a, b, c)
}
/// Insert a SMLATB instruction
///
/// Returns the equivalent of a\[1\] * b\[0\] + c
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlatb))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smlatb(a: i32, b: i32, c: i32) -> i32 {
arm_smlatb(a, b, c)
}
/// Insert a SMLATT instruction
///
/// Returns the equivalent of a\[1\] * b\[1\] + c
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlatt))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smlatt(a: i32, b: i32, c: i32) -> i32 {
arm_smlatt(a, b, c)
}
/// Insert a SMLAWB instruction
///
/// Returns the equivalent of (a * b\[0\] + (c << 16)) >> 16
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlawb))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smlawb(a: i32, b: i32, c: i32) -> i32 {
arm_smlawb(a, b, c)
}
/// Insert a SMLAWT instruction
///
/// Returns the equivalent of (a * b\[1\] + (c << 16)) >> 16
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlawt))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smlawt(a: i32, b: i32, c: i32) -> i32 {
arm_smlawt(a, b, c)
}
#[cfg(test)]
mod tests {
use crate::core_arch::{
arm::*,
simd::{i8x4, i16x2, u8x4},
};
use std::mem::transmute;
use stdarch_test::simd_test;
#[test]
fn smulbb() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
assert_eq!(super::__smulbb(transmute(a), transmute(b)), 10 * 30);
}
}
#[test]
fn smulbt() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
assert_eq!(super::__smulbt(transmute(a), transmute(b)), 10 * 40);
}
}
#[test]
fn smultb() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
assert_eq!(super::__smultb(transmute(a), transmute(b)), 20 * 30);
}
}
#[test]
fn smultt() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
assert_eq!(super::__smultt(transmute(a), transmute(b)), 20 * 40);
}
}
#[test]
fn smulwb() {
unsafe {
let a = i16x2::new(10, 20);
let b = 30;
assert_eq!(super::__smulwb(transmute(a), b), 20 * b);
}
}
#[test]
fn smulwt() {
unsafe {
let a = i16x2::new(10, 20);
let b = 30;
assert_eq!(super::__smulwt(transmute(a), b), (10 * b) >> 16);
}
}
#[test]
fn qadd() {
unsafe {
assert_eq!(super::__qadd(-10, 60), 50);
assert_eq!(super::__qadd(i32::MAX, 10), i32::MAX);
assert_eq!(super::__qadd(i32::MIN, -10), i32::MIN);
}
}
#[test]
fn qsub() {
unsafe {
assert_eq!(super::__qsub(10, 60), -50);
assert_eq!(super::__qsub(i32::MAX, -10), i32::MAX);
assert_eq!(super::__qsub(i32::MIN, 10), i32::MIN);
}
}
fn qdbl() {
unsafe {
assert_eq!(super::__qdbl(10), 20);
assert_eq!(super::__qdbl(i32::MAX), i32::MAX);
}
}
fn smlabb() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
let c = 50;
let r = (10 * 30) + c;
assert_eq!(super::__smlabb(transmute(a), transmute(b), c), r);
}
}
fn smlabt() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
let c = 50;
let r = (10 * 40) + c;
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
}
}
fn smlatb() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
let c = 50;
let r = (20 * 30) + c;
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
}
}
fn smlatt() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
let c = 50;
let r = (20 * 40) + c;
assert_eq!(super::__smlatt(transmute(a), transmute(b), c), r);
}
}
fn smlawb() {
unsafe {
let a: i32 = 10;
let b = i16x2::new(30, 40);
let c: i32 = 50;
let r: i32 = ((a * 30) + (c << 16)) >> 16;
assert_eq!(super::__smlawb(a, transmute(b), c), r);
}
}
fn smlawt() {
unsafe {
let a: i32 = 10;
let b = i16x2::new(30, 40);
let c: i32 = 50;
let r: i32 = ((a * 40) + (c << 16)) >> 16;
assert_eq!(super::__smlawt(a, transmute(b), c), r);
}
}
}

View file

@ -0,0 +1,66 @@
//! ARM intrinsics.
//!
//! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
//! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
//!
//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT)
#[cfg(any(target_feature = "v6", doc))]
mod sat;
#[cfg(any(target_feature = "v6", doc))]
#[unstable(feature = "stdarch_arm_sat", issue = "none")]
pub use self::sat::*;
// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD)
// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see
// section 5.4.7)
// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on
// '+v5te' rather than on '+dsp'
#[cfg(any(
// >= v5TE but excludes v7-M
all(target_feature = "v5te", not(target_feature = "mclass")),
// v7E-M
all(target_feature = "mclass", target_feature = "dsp"),
doc,
))]
mod dsp;
#[cfg(any(
// >= v5TE but excludes v7-M
all(target_feature = "v5te", not(target_feature = "mclass")),
// v7E-M
all(target_feature = "mclass", target_feature = "dsp"),
doc,
))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub use self::dsp::*;
// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says
// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated
#[cfg(any(
// v7-A, v7-R
all(target_feature = "v6", not(target_feature = "mclass")),
// v7E-M
all(target_feature = "mclass", target_feature = "dsp"),
doc,
))]
mod simd32;
#[cfg(any(
// v7-A, v7-R
all(target_feature = "v6", not(target_feature = "mclass")),
// v7E-M
all(target_feature = "mclass", target_feature = "dsp"),
doc,
))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub use self::simd32::*;
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
pub use crate::core_arch::arm_shared::*;
#[cfg(test)]
use stdarch_test::assert_instr;

View file

@ -0,0 +1,136 @@
use crate::core_arch::arm_shared::neon::*;
#[cfg(test)]
use stdarch_test::assert_instr;
#[allow(improper_ctypes)]
unsafe extern "unadjusted" {
#[link_name = "llvm.arm.neon.vbsl.v8i8"]
fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
#[link_name = "llvm.arm.neon.vbsl.v16i8"]
fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
}
#[doc = "Shift Left and Insert (immediate)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"]
#[doc = "## Safety"]
#[doc = " * Neon instrinsic unsafe"]
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7,aes")]
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
static_assert!(0 <= N && N <= 63);
transmute(vshiftins_v1i64(
transmute(a),
transmute(b),
int64x1_t::splat(N as i64),
))
}
#[doc = "Shift Left and Insert (immediate)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
#[doc = "## Safety"]
#[doc = " * Neon instrinsic unsafe"]
#[inline]
#[cfg(target_endian = "little")]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7,aes")]
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
static_assert!(0 <= N && N <= 63);
transmute(vshiftins_v2i64(
transmute(a),
transmute(b),
int64x2_t::splat(N as i64),
))
}
#[doc = "Shift Left and Insert (immediate)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
#[doc = "## Safety"]
#[doc = " * Neon instrinsic unsafe"]
#[inline]
#[cfg(target_endian = "big")]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7,aes")]
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
static_assert!(0 <= N && N <= 63);
let a: poly64x2_t = simd_shuffle!(a, a, [0, 1]);
let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
let ret_val: poly64x2_t = transmute(vshiftins_v2i64(
transmute(a),
transmute(b),
int64x2_t::splat(N as i64),
));
simd_shuffle!(ret_val, ret_val, [0, 1])
}
#[doc = "Shift Right and Insert (immediate)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)"]
#[doc = "## Safety"]
#[doc = " * Neon instrinsic unsafe"]
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7,aes")]
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
static_assert!(1 <= N && N <= 64);
transmute(vshiftins_v1i64(
transmute(a),
transmute(b),
int64x1_t::splat(-N as i64),
))
}
#[doc = "Shift Right and Insert (immediate)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"]
#[doc = "## Safety"]
#[doc = " * Neon instrinsic unsafe"]
#[inline]
#[cfg(target_endian = "little")]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7,aes")]
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
static_assert!(1 <= N && N <= 64);
transmute(vshiftins_v2i64(
transmute(a),
transmute(b),
int64x2_t::splat(-N as i64),
))
}
#[doc = "Shift Right and Insert (immediate)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"]
#[doc = "## Safety"]
#[doc = " * Neon instrinsic unsafe"]
#[inline]
#[cfg(target_endian = "big")]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7,aes")]
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
static_assert!(1 <= N && N <= 64);
let a: poly64x2_t = simd_shuffle!(a, a, [0, 1]);
let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
let ret_val: poly64x2_t = transmute(vshiftins_v2i64(
transmute(a),
transmute(b),
int64x2_t::splat(-N as i64),
));
simd_shuffle!(ret_val, ret_val, [0, 1])
}

View file

@ -0,0 +1,62 @@
//! # References:
//!
//! - Section 8.4 "Saturating intrinsics"
#[cfg(test)]
use stdarch_test::assert_instr;
/// Saturates a 32-bit signed integer to a signed integer with a given
/// bit width.
#[unstable(feature = "stdarch_arm_sat", issue = "none")]
#[inline]
#[cfg_attr(test, assert_instr("ssat", WIDTH = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn __ssat<const WIDTH: u32>(x: i32) -> i32 {
static_assert!(matches!(WIDTH, 1..=32));
arm_ssat(x, WIDTH as i32)
}
/// Saturates a 32-bit signed integer to an unsigned integer with a given
/// bit width.
#[unstable(feature = "stdarch_arm_sat", issue = "none")]
#[inline]
#[cfg_attr(test, assert_instr("usat", WIDTH = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn __usat<const WIDTH: u32>(x: i32) -> u32 {
static_assert!(matches!(WIDTH, 1..=32));
arm_usat(x, WIDTH as i32)
}
unsafe extern "unadjusted" {
#[link_name = "llvm.arm.ssat"]
fn arm_ssat(x: i32, y: i32) -> i32;
#[link_name = "llvm.arm.usat"]
fn arm_usat(x: i32, y: i32) -> u32;
}
#[cfg(test)]
mod tests {
use super::*;
use stdarch_test::simd_test;
#[test]
fn test_ssat() {
unsafe {
assert_eq!(__ssat::<8>(1), 1);
assert_eq!(__ssat::<8>(1000), 127);
assert_eq!(__ssat::<8>(-1), -1);
assert_eq!(__ssat::<8>(-1000), -128);
}
}
#[test]
fn test_usat() {
unsafe {
assert_eq!(__usat::<8>(1), 1);
assert_eq!(__usat::<8>(1000), 255);
assert_eq!(__usat::<8>(-1), 0);
assert_eq!(__usat::<8>(-1000), 0);
}
}
}

View file

@ -0,0 +1,765 @@
//! # References
//!
//! - Section 8.5 "32-bit SIMD intrinsics" of ACLE
//!
//! Intrinsics that could live here
//!
//! - \[x\] __sel
//! - \[ \] __ssat16
//! - \[ \] __usat16
//! - \[ \] __sxtab16
//! - \[ \] __sxtb16
//! - \[ \] __uxtab16
//! - \[ \] __uxtb16
//! - \[x\] __qadd8
//! - \[x\] __qsub8
//! - \[x\] __sadd8
//! - \[x\] __shadd8
//! - \[x\] __shsub8
//! - \[x\] __ssub8
//! - \[ \] __uadd8
//! - \[ \] __uhadd8
//! - \[ \] __uhsub8
//! - \[ \] __uqadd8
//! - \[ \] __uqsub8
//! - \[x\] __usub8
//! - \[x\] __usad8
//! - \[x\] __usada8
//! - \[x\] __qadd16
//! - \[x\] __qasx
//! - \[x\] __qsax
//! - \[x\] __qsub16
//! - \[x\] __sadd16
//! - \[x\] __sasx
//! - \[x\] __shadd16
//! - \[ \] __shasx
//! - \[ \] __shsax
//! - \[x\] __shsub16
//! - \[ \] __ssax
//! - \[ \] __ssub16
//! - \[ \] __uadd16
//! - \[ \] __uasx
//! - \[ \] __uhadd16
//! - \[ \] __uhasx
//! - \[ \] __uhsax
//! - \[ \] __uhsub16
//! - \[ \] __uqadd16
//! - \[ \] __uqasx
//! - \[x\] __uqsax
//! - \[ \] __uqsub16
//! - \[ \] __usax
//! - \[ \] __usub16
//! - \[x\] __smlad
//! - \[ \] __smladx
//! - \[ \] __smlald
//! - \[ \] __smlaldx
//! - \[x\] __smlsd
//! - \[ \] __smlsdx
//! - \[ \] __smlsld
//! - \[ \] __smlsldx
//! - \[x\] __smuad
//! - \[x\] __smuadx
//! - \[x\] __smusd
//! - \[x\] __smusdx
#[cfg(test)]
use stdarch_test::assert_instr;
use crate::mem::transmute;
/// ARM-specific vector of four packed `i8` packed into a 32-bit integer.
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub type int8x4_t = i32;
/// ARM-specific vector of four packed `u8` packed into a 32-bit integer.
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub type uint8x4_t = u32;
/// ARM-specific vector of two packed `i16` packed into a 32-bit integer.
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub type int16x2_t = i32;
/// ARM-specific vector of two packed `u16` packed into a 32-bit integer.
#[allow(non_camel_case_types)]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub type uint16x2_t = u32;
macro_rules! dsp_call {
($name:expr, $a:expr, $b:expr) => {
transmute($name(transmute($a), transmute($b)))
};
}
unsafe extern "unadjusted" {
#[link_name = "llvm.arm.qadd8"]
fn arm_qadd8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qsub8"]
fn arm_qsub8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qsub16"]
fn arm_qsub16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qadd16"]
fn arm_qadd16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qasx"]
fn arm_qasx(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qsax"]
fn arm_qsax(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.sadd16"]
fn arm_sadd16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.sadd8"]
fn arm_sadd8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smlad"]
fn arm_smlad(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlsd"]
fn arm_smlsd(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.sasx"]
fn arm_sasx(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.sel"]
fn arm_sel(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.shadd8"]
fn arm_shadd8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.shadd16"]
fn arm_shadd16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.shsub8"]
fn arm_shsub8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.ssub8"]
fn arm_ssub8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.usub8"]
fn arm_usub8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.shsub16"]
fn arm_shsub16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smuad"]
fn arm_smuad(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smuadx"]
fn arm_smuadx(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smusd"]
fn arm_smusd(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smusdx"]
fn arm_smusdx(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.usad8"]
fn arm_usad8(a: i32, b: i32) -> u32;
}
/// Saturating four 8-bit integer additions
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = a\[0\] + b\[0\]
/// res\[1\] = a\[1\] + b\[1\]
/// res\[2\] = a\[2\] + b\[2\]
/// res\[3\] = a\[3\] + b\[3\]
#[inline]
#[cfg_attr(test, assert_instr(qadd8))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_qadd8, a, b)
}
/// Saturating two 8-bit integer subtraction
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = a\[0\] - b\[0\]
/// res\[1\] = a\[1\] - b\[1\]
/// res\[2\] = a\[2\] - b\[2\]
/// res\[3\] = a\[3\] - b\[3\]
#[inline]
#[cfg_attr(test, assert_instr(qsub8))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_qsub8, a, b)
}
/// Saturating two 16-bit integer subtraction
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = a\[0\] - b\[0\]
/// res\[1\] = a\[1\] - b\[1\]
#[inline]
#[cfg_attr(test, assert_instr(qsub16))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_qsub16, a, b)
}
/// Saturating two 16-bit integer additions
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = a\[0\] + b\[0\]
/// res\[1\] = a\[1\] + b\[1\]
#[inline]
#[cfg_attr(test, assert_instr(qadd16))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_qadd16, a, b)
}
/// Returns the 16-bit signed saturated equivalent of
///
/// res\[0\] = a\[0\] - b\[1\]
/// res\[1\] = a\[1\] + b\[0\]
#[inline]
#[cfg_attr(test, assert_instr(qasx))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_qasx, a, b)
}
/// Returns the 16-bit signed saturated equivalent of
///
/// res\[0\] = a\[0\] + b\[1\]
/// res\[1\] = a\[1\] - b\[0\]
#[inline]
#[cfg_attr(test, assert_instr(qsax))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_qsax, a, b)
}
/// Returns the 16-bit signed saturated equivalent of
///
/// res\[0\] = a\[0\] + b\[1\]
/// res\[1\] = a\[1\] + b\[0\]
///
/// and the GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(sadd16))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_sadd16, a, b)
}
/// Returns the 8-bit signed saturated equivalent of
///
/// res\[0\] = a\[0\] + b\[1\]
/// res\[1\] = a\[1\] + b\[0\]
/// res\[2\] = a\[2\] + b\[2\]
/// res\[3\] = a\[3\] + b\[3\]
///
/// and the GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(sadd8))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_sadd8, a, b)
}
/// Dual 16-bit Signed Multiply with Addition of products
/// and 32-bit accumulation.
///
/// Returns the 16-bit signed equivalent of
/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c
#[inline]
#[cfg_attr(test, assert_instr(smlad))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
arm_smlad(transmute(a), transmute(b), c)
}
/// Dual 16-bit Signed Multiply with Subtraction of products
/// and 32-bit accumulation and overflow detection.
///
/// Returns the 16-bit signed equivalent of
/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c
#[inline]
#[cfg_attr(test, assert_instr(smlsd))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
arm_smlsd(transmute(a), transmute(b), c)
}
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = a\[0\] - b\[1\]
/// res\[1\] = a\[1\] + b\[0\]
///
/// and the GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(sasx))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_sasx, a, b)
}
/// Select bytes from each operand according to APSR GE flags
///
/// Returns the equivalent of
///
/// res\[0\] = GE\[0\] ? a\[0\] : b\[0\]
/// res\[1\] = GE\[1\] ? a\[1\] : b\[1\]
/// res\[2\] = GE\[2\] ? a\[2\] : b\[2\]
/// res\[3\] = GE\[3\] ? a\[3\] : b\[3\]
///
/// where GE are bits of APSR
#[inline]
#[cfg_attr(test, assert_instr(sel))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __sel(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_sel, a, b)
}
/// Signed halving parallel byte-wise addition.
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = (a\[0\] + b\[0\]) / 2
/// res\[1\] = (a\[1\] + b\[1\]) / 2
/// res\[2\] = (a\[2\] + b\[2\]) / 2
/// res\[3\] = (a\[3\] + b\[3\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shadd8))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_shadd8, a, b)
}
/// Signed halving parallel halfword-wise addition.
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = (a\[0\] + b\[0\]) / 2
/// res\[1\] = (a\[1\] + b\[1\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shadd16))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_shadd16, a, b)
}
/// Signed halving parallel byte-wise subtraction.
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = (a\[0\] - b\[0\]) / 2
/// res\[1\] = (a\[1\] - b\[1\]) / 2
/// res\[2\] = (a\[2\] - b\[2\]) / 2
/// res\[3\] = (a\[3\] - b\[3\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shsub8))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_shsub8, a, b)
}
/// Inserts a `USUB8` instruction.
///
/// Returns the 8-bit unsigned equivalent of
///
/// res\[0\] = a\[0\] - a\[0\]
/// res\[1\] = a\[1\] - a\[1\]
/// res\[2\] = a\[2\] - a\[2\]
/// res\[3\] = a\[3\] - a\[3\]
///
/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
/// The GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(usub8))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t {
dsp_call!(arm_usub8, a, b)
}
/// Inserts a `SSUB8` instruction.
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = a\[0\] - a\[0\]
/// res\[1\] = a\[1\] - a\[1\]
/// res\[2\] = a\[2\] - a\[2\]
/// res\[3\] = a\[3\] - a\[3\]
///
/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
/// The GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(ssub8))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __ssub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_ssub8, a, b)
}
/// Signed halving parallel halfword-wise subtraction.
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = (a\[0\] - b\[0\]) / 2
/// res\[1\] = (a\[1\] - b\[1\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shsub16))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_shsub16, a, b)
}
/// Signed Dual Multiply Add.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smuad))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smuad(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smuad(transmute(a), transmute(b))
}
/// Signed Dual Multiply Add Reversed.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[1\] + a\[1\] * b\[0\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smuadx))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smuadx(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smuadx(transmute(a), transmute(b))
}
/// Signed Dual Multiply Subtract.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smusd))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smusd(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smusd(transmute(a), transmute(b))
}
/// Signed Dual Multiply Subtract Reversed.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[1\] - a\[1\] * b\[0\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smusdx))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __smusdx(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smusdx(transmute(a), transmute(b))
}
/// Sum of 8-bit absolute differences.
///
/// Returns the 8-bit unsigned equivalent of
///
/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\])
#[inline]
#[cfg_attr(test, assert_instr(usad8))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __usad8(a: int8x4_t, b: int8x4_t) -> u32 {
arm_usad8(transmute(a), transmute(b))
}
/// Sum of 8-bit absolute differences and constant.
///
/// Returns the 8-bit unsigned equivalent of
///
/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c
#[inline]
#[cfg_attr(test, assert_instr(usad8))]
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 {
__usad8(a, b) + c
}
#[cfg(test)]
mod tests {
use crate::core_arch::simd::{i8x4, i16x2, u8x4};
use std::mem::transmute;
use stdarch_test::simd_test;
#[test]
fn qadd8() {
unsafe {
let a = i8x4::new(1, 2, 3, i8::MAX);
let b = i8x4::new(2, -1, 0, 1);
let c = i8x4::new(3, 1, 3, i8::MAX);
let r: i8x4 = dsp_call!(super::__qadd8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qsub8() {
unsafe {
let a = i8x4::new(1, 2, 3, i8::MIN);
let b = i8x4::new(2, -1, 0, 1);
let c = i8x4::new(-1, 3, 3, i8::MIN);
let r: i8x4 = dsp_call!(super::__qsub8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qadd16() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(2, -1);
let c = i16x2::new(3, 1);
let r: i16x2 = dsp_call!(super::__qadd16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qsub16() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(20, -10);
let c = i16x2::new(-10, 30);
let r: i16x2 = dsp_call!(super::__qsub16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qasx() {
unsafe {
let a = i16x2::new(1, i16::MAX);
let b = i16x2::new(2, 2);
let c = i16x2::new(-1, i16::MAX);
let r: i16x2 = dsp_call!(super::__qasx, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qsax() {
unsafe {
let a = i16x2::new(1, i16::MAX);
let b = i16x2::new(2, 2);
let c = i16x2::new(3, i16::MAX - 2);
let r: i16x2 = dsp_call!(super::__qsax, a, b);
assert_eq!(r, c);
}
}
#[test]
fn sadd16() {
unsafe {
let a = i16x2::new(1, i16::MAX);
let b = i16x2::new(2, 2);
let c = i16x2::new(3, -i16::MAX);
let r: i16x2 = dsp_call!(super::__sadd16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn sadd8() {
unsafe {
let a = i8x4::new(1, 2, 3, i8::MAX);
let b = i8x4::new(4, 3, 2, 2);
let c = i8x4::new(5, 5, 5, -i8::MAX);
let r: i8x4 = dsp_call!(super::__sadd8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn sasx() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(2, 1);
let c = i16x2::new(0, 4);
let r: i16x2 = dsp_call!(super::__sasx, a, b);
assert_eq!(r, c);
}
}
#[test]
fn smlad() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(3, 4);
let r = super::__smlad(transmute(a), transmute(b), 10);
assert_eq!(r, (1 * 3) + (2 * 4) + 10);
}
}
#[test]
fn smlsd() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(3, 4);
let r = super::__smlsd(transmute(a), transmute(b), 10);
assert_eq!(r, ((1 * 3) - (2 * 4)) + 10);
}
}
#[test]
fn sel() {
unsafe {
let a = i8x4::new(1, 2, 3, i8::MAX);
let b = i8x4::new(4, 3, 2, 2);
// call sadd8() to set GE bits
super::__sadd8(transmute(a), transmute(b));
let c = i8x4::new(1, 2, 3, i8::MAX);
let r: i8x4 = dsp_call!(super::__sel, a, b);
assert_eq!(r, c);
}
}
#[test]
fn shadd8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(5, 4, 3, 2);
let c = i8x4::new(3, 3, 3, 3);
let r: i8x4 = dsp_call!(super::__shadd8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn shadd16() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let c = i16x2::new(3, 3);
let r: i16x2 = dsp_call!(super::__shadd16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn shsub8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(5, 4, 3, 2);
let c = i8x4::new(-2, -1, 0, 1);
let r: i8x4 = dsp_call!(super::__shsub8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn ssub8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(5, 4, 3, 2);
let c = i8x4::new(-4, -2, 0, 2);
let r: i8x4 = dsp_call!(super::__ssub8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn usub8() {
unsafe {
let a = u8x4::new(1, 2, 3, 4);
let b = u8x4::new(5, 4, 3, 2);
let c = u8x4::new(252, 254, 0, 2);
let r: u8x4 = dsp_call!(super::__usub8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn shsub16() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let c = i16x2::new(-2, -1);
let r: i16x2 = dsp_call!(super::__shsub16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn smuad() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = super::__smuad(transmute(a), transmute(b));
assert_eq!(r, 13);
}
}
#[test]
fn smuadx() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = super::__smuadx(transmute(a), transmute(b));
assert_eq!(r, 14);
}
}
#[test]
fn smusd() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = super::__smusd(transmute(a), transmute(b));
assert_eq!(r, -3);
}
}
#[test]
fn smusdx() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = super::__smusdx(transmute(a), transmute(b));
assert_eq!(r, -6);
}
}
#[test]
fn usad8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(4, 3, 2, 1);
let r = super::__usad8(transmute(a), transmute(b));
assert_eq!(r, 8);
}
}
#[test]
fn usad8a() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(4, 3, 2, 1);
let c = 10;
let r = super::__usada8(transmute(a), transmute(b), c);
assert_eq!(r, 8 + c);
}
}
}

View file

@ -0,0 +1,16 @@
//! Access types available on all architectures
/// Full system is the required shareability domain, reads and writes are the
/// required access types
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct SY;
dmb_dsb!(SY);
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
impl super::super::sealed::Isb for SY {
#[inline(always)]
unsafe fn __isb(&self) {
super::isb(super::arg::SY)
}
}

View file

@ -0,0 +1,45 @@
// Reference: ARM11 MPCore Processor Technical Reference Manual (ARM DDI 0360E) Section 3.5 "Summary
// of CP15 instructions"
use crate::arch::asm;
/// Full system is the required shareability domain, reads and writes are the
/// required access types
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct SY;
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
impl super::super::sealed::Dmb for SY {
#[inline(always)]
unsafe fn __dmb(&self) {
asm!(
"mcr p15, 0, {}, c7, c10, 5",
in(reg) 0_u32,
options(preserves_flags, nostack)
)
}
}
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
impl super::super::sealed::Dsb for SY {
#[inline(always)]
unsafe fn __dsb(&self) {
asm!(
"mcr p15, 0, {}, c7, c10, 4",
in(reg) 0_u32,
options(preserves_flags, nostack)
)
}
}
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
impl super::super::sealed::Isb for SY {
#[inline(always)]
unsafe fn __isb(&self) {
asm!(
"mcr p15, 0, {}, c7, c5, 4",
in(reg) 0_u32,
options(preserves_flags, nostack)
)
}
}

View file

@ -0,0 +1,185 @@
// Reference: Section 7.4 "Hints" of ACLE
// CP15 instruction
#[cfg(not(any(
// v8
target_arch = "aarch64",
target_arch = "arm64ec",
// v7
target_feature = "v7",
// v6-M
target_feature = "mclass"
)))]
mod cp15;
#[cfg(not(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_feature = "v7",
target_feature = "mclass"
)))]
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub use self::cp15::*;
// Dedicated instructions
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_feature = "v7",
target_feature = "mclass"
))]
macro_rules! dmb_dsb {
($A:ident) => {
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
impl super::super::sealed::Dmb for $A {
#[inline(always)]
unsafe fn __dmb(&self) {
super::dmb(super::arg::$A)
}
}
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
impl super::super::sealed::Dsb for $A {
#[inline(always)]
unsafe fn __dsb(&self) {
super::dsb(super::arg::$A)
}
}
};
}
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_feature = "v7",
target_feature = "mclass"
))]
mod common;
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_feature = "v7",
target_feature = "mclass"
))]
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub use self::common::*;
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_feature = "v7",
))]
mod not_mclass;
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_feature = "v7",
))]
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub use self::not_mclass::*;
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
mod v8;
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub use self::v8::*;
/// Generates a DMB (data memory barrier) instruction or equivalent CP15 instruction.
///
/// DMB ensures the observed ordering of memory accesses. Memory accesses of the specified type
/// issued before the DMB are guaranteed to be observed (in the specified scope) before memory
/// accesses issued after the DMB.
///
/// For example, DMB should be used between storing data, and updating a flag variable that makes
/// that data available to another core.
///
/// The __dmb() intrinsic also acts as a compiler memory barrier of the appropriate type.
#[inline(always)]
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub unsafe fn __dmb<A>(arg: A)
where
A: super::sealed::Dmb,
{
arg.__dmb()
}
/// Generates a DSB (data synchronization barrier) instruction or equivalent CP15 instruction.
///
/// DSB ensures the completion of memory accesses. A DSB behaves as the equivalent DMB and has
/// additional properties. After a DSB instruction completes, all memory accesses of the specified
/// type issued before the DSB are guaranteed to have completed.
///
/// The __dsb() intrinsic also acts as a compiler memory barrier of the appropriate type.
#[inline(always)]
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub unsafe fn __dsb<A>(arg: A)
where
A: super::sealed::Dsb,
{
arg.__dsb()
}
/// Generates an ISB (instruction synchronization barrier) instruction or equivalent CP15
/// instruction.
///
/// This instruction flushes the processor pipeline fetch buffers, so that following instructions
/// are fetched from cache or memory.
///
/// An ISB is needed after some system maintenance operations. An ISB is also needed before
/// transferring control to code that has been loaded or modified in memory, for example by an
/// overlay mechanism or just-in-time code generator. (Note that if instruction and data caches are
/// separate, privileged cache maintenance operations would be needed in order to unify the caches.)
///
/// The only supported argument for the __isb() intrinsic is 15, corresponding to the SY (full
/// system) scope of the ISB instruction.
#[inline(always)]
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub unsafe fn __isb<A>(arg: A)
where
A: super::sealed::Isb,
{
arg.__isb()
}
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.dmb"
)]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")]
fn dmb(_: i32);
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.dsb"
)]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")]
fn dsb(_: i32);
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.isb"
)]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")]
fn isb(_: i32);
}
// we put these in a module to prevent weirdness with glob re-exports
mod arg {
// See Section 7.3 Memory barriers of ACLE
pub const SY: i32 = 15;
pub const ST: i32 = 14;
pub const LD: i32 = 13;
pub const ISH: i32 = 11;
pub const ISHST: i32 = 10;
pub const ISHLD: i32 = 9;
pub const NSH: i32 = 7;
pub const NSHST: i32 = 6;
pub const NSHLD: i32 = 5;
pub const OSH: i32 = 3;
pub const OSHST: i32 = 2;
pub const OSHLD: i32 = 1;
}

View file

@ -0,0 +1,50 @@
//! Access types available on v7 and v8 but not on v7(E)-M or v8-M
/// Full system is the required shareability domain, writes are the required
/// access type
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct ST;
dmb_dsb!(ST);
/// Inner Shareable is the required shareability domain, reads and writes are
/// the required access types
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct ISH;
dmb_dsb!(ISH);
/// Inner Shareable is the required shareability domain, writes are the required
/// access type
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct ISHST;
dmb_dsb!(ISHST);
/// Non-shareable is the required shareability domain, reads and writes are the
/// required access types
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct NSH;
dmb_dsb!(NSH);
/// Non-shareable is the required shareability domain, writes are the required
/// access type
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct NSHST;
dmb_dsb!(NSHST);
/// Outer Shareable is the required shareability domain, reads and writes are
/// the required access types
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct OSH;
dmb_dsb!(OSH);
/// Outer Shareable is the required shareability domain, writes are the required
/// access type
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct OSHST;
dmb_dsb!(OSHST);

View file

@ -0,0 +1,27 @@
/// Full system is the required shareability domain, reads are the required
/// access type
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct LD;
dmb_dsb!(LD);
/// Inner Shareable is the required shareability domain, reads are the required
/// access type
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct ISHLD;
dmb_dsb!(ISHLD);
/// Non-shareable is the required shareability domain, reads are the required
/// access type
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct NSHLD;
dmb_dsb!(NSHLD);
/// Outer Shareable is the required shareability domain, reads are the required
/// access type
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub struct OSHLD;
dmb_dsb!(OSHLD);

View file

@ -0,0 +1,125 @@
// # References
//
// - Section 7.4 "Hints" of ACLE
// - Section 7.7 "NOP" of ACLE
/// Generates a WFI (wait for interrupt) hint instruction, or nothing.
///
/// The WFI instruction allows (but does not require) the processor to enter a
/// low-power state until one of a number of asynchronous events occurs.
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
// LLVM says "instruction requires: armv6k"
#[cfg(any(
target_feature = "v6",
target_arch = "aarch64",
target_arch = "arm64ec",
doc
))]
#[inline(always)]
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
pub unsafe fn __wfi() {
hint(HINT_WFI);
}
/// Generates a WFE (wait for event) hint instruction, or nothing.
///
/// The WFE instruction allows (but does not require) the processor to enter a
/// low-power state until some event occurs such as a SEV being issued by
/// another processor.
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
// LLVM says "instruction requires: armv6k"
#[cfg(any(
target_feature = "v6",
target_arch = "aarch64",
target_arch = "arm64ec",
doc
))]
#[inline(always)]
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
pub unsafe fn __wfe() {
hint(HINT_WFE);
}
/// Generates a SEV (send a global event) hint instruction.
///
/// This causes an event to be signaled to all processors in a multiprocessor
/// system. It is a NOP on a uniprocessor system.
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M
// LLVM says "instruction requires: armv6k"
#[cfg(any(
target_feature = "v6",
target_arch = "aarch64",
target_arch = "arm64ec",
doc
))]
#[inline(always)]
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
pub unsafe fn __sev() {
hint(HINT_SEV);
}
/// Generates a send a local event hint instruction.
///
/// This causes an event to be signaled to only the processor executing this
/// instruction. In a multiprocessor system, it is not required to affect the
/// other processors.
// LLVM says "instruction requires: armv8"
#[cfg(any(
target_feature = "v8", // 32-bit ARMv8
target_arch = "aarch64", // AArch64
target_arch = "arm64ec", // Arm64EC
doc,
))]
#[inline(always)]
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
pub unsafe fn __sevl() {
hint(HINT_SEVL);
}
/// Generates a YIELD hint instruction.
///
/// This enables multithreading software to indicate to the hardware that it is
/// performing a task, for example a spin-lock, that could be swapped out to
/// improve overall system performance.
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
// LLVM says "instruction requires: armv6k"
#[cfg(any(
target_feature = "v6",
target_arch = "aarch64",
target_arch = "arm64ec",
doc
))]
#[inline(always)]
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
pub unsafe fn __yield() {
hint(HINT_YIELD);
}
/// Generates an unspecified no-op instruction.
///
/// Note that not all architectures provide a distinguished NOP instruction. On
/// those that do, it is unspecified whether this intrinsic generates it or
/// another instruction. It is not guaranteed that inserting this instruction
/// will increase execution time.
#[inline(always)]
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
pub unsafe fn __nop() {
crate::arch::asm!("nop", options(nomem, nostack, preserves_flags));
}
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.hint"
)]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")]
fn hint(_: i32);
}
// from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td
const HINT_NOP: i32 = 0;
const HINT_YIELD: i32 = 1;
const HINT_WFE: i32 = 2;
const HINT_WFI: i32 = 3;
const HINT_SEV: i32 = 4;
const HINT_SEVL: i32 = 5;

View file

@ -0,0 +1,117 @@
//! ARM C Language Extensions (ACLE)
//!
//! # Developer notes
//!
//! Below is a list of built-in targets that are representative of the different ARM
//! architectures; the list includes the `target_feature`s they possess.
//!
//! - `armv4t-unknown-linux-gnueabi` - **ARMv4** - `+v4t`
//! - `armv5te-unknown-linux-gnueabi` - **ARMv5TE** - `+v4t +v5te`
//! - `arm-unknown-linux-gnueabi` - **ARMv6** - `+v4t +v5te +v6`
//! - `thumbv6m-none-eabi` - **ARMv6-M** - `+v4t +v5te +v6 +thumb-mode +mclass`
//! - `armv7-unknown-linux-gnueabihf` - **ARMv7-A** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +aclass`
//! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +rclass`
//! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
//! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass`
//! - `thumbv8m.main-none-eabi` - **ARMv8-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
//! - `armv8r-none-eabi` - **ARMv8-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +v8 +thumb2 +rclass`
//! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon`
//!
//! Section 10.1 of ACLE says:
//!
//! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes
//! its predecessor instruction set."
//!
//! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes
//! its predecessor instruction set."
//!
//! From that info and from looking at how LLVM features work (using custom targets) we can identify
//! features that are subsets of others:
//!
//! Legend: `a < b` reads as "`a` is a subset of `b`"; this means that if `b` is enabled then `a` is
//! enabled as well.
//!
//! - `v4t < v5te < v6 < v6k < v6t2 < v7 < v8`
//! - `v6 < v8m < v6t2`
//! - `v7 < v8m.main`
//!
//! *NOTE*: Section 5.4.7 of ACLE says:
//!
//! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the
//! intrinsics defined in Saturating intrinsics are available."
//!
//! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te
//! targets so we have to work around this difference.
//!
//! # References
//!
//! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest)
#![cfg_attr(
all(target_arch = "aarch64", target_abi = "softfloat"),
// Just allow the warning: anyone soundly using the intrinsics has to enable
// the target feature, and that will generate a warning for them.
allow(aarch64_softfloat_neon)
)]
// Only for 'neon' submodule
#![allow(non_camel_case_types)]
// 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported
// via CP15 instructions. See Section 10.1 of ACLE
mod barrier;
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub use self::barrier::*;
mod hints;
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
pub use self::hints::*;
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_feature = "v7",
doc
))]
pub(crate) mod neon;
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_feature = "v7",
doc
))]
#[cfg_attr(
not(target_arch = "arm"),
stable(feature = "neon_intrinsics", since = "1.59.0")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub use self::neon::*;
#[cfg(test)]
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_feature = "v7",
doc
))]
pub(crate) mod test_support;
mod sealed {
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub trait Dmb {
unsafe fn __dmb(&self);
}
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub trait Dsb {
unsafe fn __dsb(&self);
}
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
pub trait Isb {
unsafe fn __isb(&self);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,206 @@
//! Tests for ARM+v7+neon load (vld1) intrinsics.
//!
//! These are included in `{arm, aarch64}::neon`.
use super::*;
#[cfg(target_arch = "arm")]
use crate::core_arch::arm::*;
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
use crate::core_arch::aarch64::*;
use crate::core_arch::simd::*;
use std::mem;
use stdarch_test::simd_test;
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s8() {
let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s8() {
let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s16() {
let a: [i16; 5] = [0, 1, 2, 3, 4];
let e = i16x4::new(1, 2, 3, 4);
let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s16() {
let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s32() {
let a: [i32; 3] = [0, 1, 2];
let e = i32x2::new(1, 2);
let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s32() {
let a: [i32; 5] = [0, 1, 2, 3, 4];
let e = i32x4::new(1, 2, 3, 4);
let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s64() {
let a: [i64; 2] = [0, 1];
let e = i64x1::new(1);
let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s64() {
let a: [i64; 3] = [0, 1, 2];
let e = i64x2::new(1, 2);
let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u8() {
let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u8() {
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u16() {
let a: [u16; 5] = [0, 1, 2, 3, 4];
let e = u16x4::new(1, 2, 3, 4);
let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u16() {
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u32() {
let a: [u32; 3] = [0, 1, 2];
let e = u32x2::new(1, 2);
let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u32() {
let a: [u32; 5] = [0, 1, 2, 3, 4];
let e = u32x4::new(1, 2, 3, 4);
let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u64() {
let a: [u64; 2] = [0, 1];
let e = u64x1::new(1);
let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u64() {
let a: [u64; 3] = [0, 1, 2];
let e = u64x2::new(1, 2);
let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_p8() {
let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_p8() {
let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_p16() {
let a: [p16; 5] = [0, 1, 2, 3, 4];
let e = u16x4::new(1, 2, 3, 4);
let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_p16() {
let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vld1_p64() {
let a: [p64; 2] = [0, 1];
let e = u64x1::new(1);
let r: u64x1 = transmute(vld1_p64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vld1q_p64() {
let a: [p64; 3] = [0, 1, 2];
let e = u64x2::new(1, 2);
let r: u64x2 = transmute(vld1q_p64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_f32() {
let a: [f32; 3] = [0., 1., 2.];
let e = f32x2::new(1., 2.);
let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_f32() {
let a: [f32; 5] = [0., 1., 2., 3., 4.];
let e = f32x4::new(1., 2., 3., 4.);
let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr()));
assert_eq!(r, e)
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,93 @@
//! Tests for ARM+v7+neon shift and insert (vsli[q]_n, vsri[q]_n) intrinsics.
//!
//! These are included in `{arm, aarch64}::neon`.
use super::*;
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
use crate::core_arch::aarch64::*;
#[cfg(target_arch = "arm")]
use crate::core_arch::arm::*;
use crate::core_arch::simd::*;
use std::mem::transmute;
use stdarch_test::simd_test;
macro_rules! test_vsli {
($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => {
#[simd_test(enable = "neon")]
#[allow(unused_assignments)]
unsafe fn $test_id() {
let a = [$($a as $t),*];
let b = [$($b as $t),*];
let n_bit_mask: $t = (1 << $n) - 1;
let e = [$(($a as $t & n_bit_mask) | (($b as $t) << $n)),*];
let r = $fn_id::<$n>(transmute(a), transmute(b));
let mut d = e;
d = transmute(r);
assert_eq!(d, e);
}
}
}
test_vsli!(test_vsli_n_s8, i8 => vsli_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5));
test_vsli!(test_vsliq_n_s8, i8 => vsliq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2));
test_vsli!(test_vsli_n_s16, i16 => vsli_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7));
test_vsli!(test_vsliq_n_s16, i16 => vsliq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14));
test_vsli!(test_vsli_n_s32, i32 => vsli_n_s32([125683, -78901], [-128, -112944], 23));
test_vsli!(test_vsliq_n_s32, i32 => vsliq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15));
test_vsli!(test_vsli_n_s64, i64 => vsli_n_s64([-333333], [1028], 45));
test_vsli!(test_vsliq_n_s64, i64 => vsliq_n_s64([-333333, -52023], [1028, -99814], 33));
test_vsli!(test_vsli_n_u8, u8 => vsli_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
test_vsli!(test_vsliq_n_u8, u8 => vsliq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
test_vsli!(test_vsli_n_u16, u16 => vsli_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
test_vsli!(test_vsliq_n_u16, u16 => vsliq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
test_vsli!(test_vsli_n_u32, u32 => vsli_n_u32([125683, 78901], [128, 112944], 23));
test_vsli!(test_vsliq_n_u32, u32 => vsliq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15));
test_vsli!(test_vsli_n_u64, u64 => vsli_n_u64([333333], [1028], 45));
test_vsli!(test_vsliq_n_u64, u64 => vsliq_n_u64([333333, 52023], [1028, 99814], 33));
test_vsli!(test_vsli_n_p8, i8 => vsli_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
test_vsli!(test_vsliq_n_p8, i8 => vsliq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
test_vsli!(test_vsli_n_p16, i16 => vsli_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
test_vsli!(test_vsliq_n_p16, i16 => vsliq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
//test_vsli!(test_vsli_n_p64, i64 => vsli_n_p64([333333], [1028], 45));
//test_vsli!(test_vsliq_n_p64, i64 => vsliq_n_p64([333333, 52023], [1028, 99814], 33));
macro_rules! test_vsri {
($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => {
#[simd_test(enable = "neon")]
#[allow(unused_assignments)]
unsafe fn $test_id() {
let a = [$($a as $t),*];
let b = [$($b as $t),*];
let n_bit_mask = (((1 as $t) << $n) - 1).rotate_right($n);
let e = [$(($a as $t & n_bit_mask) | (($b as $t >> $n) & !n_bit_mask)),*];
let r = $fn_id::<$n>(transmute(a), transmute(b));
let mut d = e;
d = transmute(r);
assert_eq!(d, e);
}
}
}
test_vsri!(test_vsri_n_s8, i8 => vsri_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5));
test_vsri!(test_vsriq_n_s8, i8 => vsriq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2));
test_vsri!(test_vsri_n_s16, i16 => vsri_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7));
test_vsri!(test_vsriq_n_s16, i16 => vsriq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14));
test_vsri!(test_vsri_n_s32, i32 => vsri_n_s32([125683, -78901], [-128, -112944], 23));
test_vsri!(test_vsriq_n_s32, i32 => vsriq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15));
test_vsri!(test_vsri_n_s64, i64 => vsri_n_s64([-333333], [1028], 45));
test_vsri!(test_vsriq_n_s64, i64 => vsriq_n_s64([-333333, -52023], [1028, -99814], 33));
test_vsri!(test_vsri_n_u8, u8 => vsri_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
test_vsri!(test_vsriq_n_u8, u8 => vsriq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
test_vsri!(test_vsri_n_u16, u16 => vsri_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
test_vsri!(test_vsriq_n_u16, u16 => vsriq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
test_vsri!(test_vsri_n_u32, u32 => vsri_n_u32([125683, 78901], [128, 112944], 23));
test_vsri!(test_vsriq_n_u32, u32 => vsriq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15));
test_vsri!(test_vsri_n_u64, u64 => vsri_n_u64([333333], [1028], 45));
test_vsri!(test_vsriq_n_u64, u64 => vsriq_n_u64([333333, 52023], [1028, 99814], 33));
test_vsri!(test_vsri_n_p8, i8 => vsri_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
test_vsri!(test_vsriq_n_p8, i8 => vsriq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
test_vsri!(test_vsri_n_p16, i16 => vsri_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
test_vsri!(test_vsriq_n_p16, i16 => vsriq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
//test_vsri!(test_vsri_n_p64, i64 => vsri_n_p64([333333], [1028], 45));
//test_vsri!(test_vsriq_n_p64, i64 => vsriq_n_p64([333333, 52023], [1028, 99814], 33));

View file

@ -0,0 +1,389 @@
//! Tests for ARM+v7+neon store (vst1) intrinsics.
//!
//! These are included in `{arm, aarch64}::neon`.
use super::*;
#[cfg(target_arch = "arm")]
use crate::core_arch::arm::*;
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
use crate::core_arch::aarch64::*;
use crate::core_arch::simd::*;
use stdarch_test::simd_test;
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s8() {
let mut vals = [0_i8; 9];
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1_s8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s8() {
let mut vals = [0_i8; 17];
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
vst1q_s8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
assert_eq!(vals[9], 9);
assert_eq!(vals[10], 10);
assert_eq!(vals[11], 11);
assert_eq!(vals[12], 12);
assert_eq!(vals[13], 13);
assert_eq!(vals[14], 14);
assert_eq!(vals[15], 15);
assert_eq!(vals[16], 16);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s16() {
let mut vals = [0_i16; 5];
let a = i16x4::new(1, 2, 3, 4);
vst1_s16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s16() {
let mut vals = [0_i16; 9];
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1q_s16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s32() {
let mut vals = [0_i32; 3];
let a = i32x2::new(1, 2);
vst1_s32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s32() {
let mut vals = [0_i32; 5];
let a = i32x4::new(1, 2, 3, 4);
vst1q_s32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s64() {
let mut vals = [0_i64; 2];
let a = i64x1::new(1);
vst1_s64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s64() {
let mut vals = [0_i64; 3];
let a = i64x2::new(1, 2);
vst1q_s64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u8() {
let mut vals = [0_u8; 9];
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1_u8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u8() {
let mut vals = [0_u8; 17];
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
vst1q_u8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
assert_eq!(vals[9], 9);
assert_eq!(vals[10], 10);
assert_eq!(vals[11], 11);
assert_eq!(vals[12], 12);
assert_eq!(vals[13], 13);
assert_eq!(vals[14], 14);
assert_eq!(vals[15], 15);
assert_eq!(vals[16], 16);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u16() {
let mut vals = [0_u16; 5];
let a = u16x4::new(1, 2, 3, 4);
vst1_u16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u16() {
let mut vals = [0_u16; 9];
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1q_u16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u32() {
let mut vals = [0_u32; 3];
let a = u32x2::new(1, 2);
vst1_u32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u32() {
let mut vals = [0_u32; 5];
let a = u32x4::new(1, 2, 3, 4);
vst1q_u32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u64() {
let mut vals = [0_u64; 2];
let a = u64x1::new(1);
vst1_u64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u64() {
let mut vals = [0_u64; 3];
let a = u64x2::new(1, 2);
vst1q_u64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p8() {
let mut vals = [0_u8; 9];
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1_p8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p8() {
let mut vals = [0_u8; 17];
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
vst1q_p8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
assert_eq!(vals[9], 9);
assert_eq!(vals[10], 10);
assert_eq!(vals[11], 11);
assert_eq!(vals[12], 12);
assert_eq!(vals[13], 13);
assert_eq!(vals[14], 14);
assert_eq!(vals[15], 15);
assert_eq!(vals[16], 16);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p16() {
let mut vals = [0_u16; 5];
let a = u16x4::new(1, 2, 3, 4);
vst1_p16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p16() {
let mut vals = [0_u16; 9];
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1q_p16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vst1_p64() {
let mut vals = [0_u64; 2];
let a = u64x1::new(1);
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vst1q_p64() {
let mut vals = [0_u64; 3];
let a = u64x2::new(1, 2);
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_f32() {
let mut vals = [0_f32; 3];
let a = f32x2::new(1., 2.);
vst1_f32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0.);
assert_eq!(vals[1], 1.);
assert_eq!(vals[2], 2.);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_f32() {
let mut vals = [0_f32; 5];
let a = f32x4::new(1., 2., 3., 4.);
vst1q_f32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0.);
assert_eq!(vals[1], 1.);
assert_eq!(vals[2], 2.);
assert_eq!(vals[3], 3.);
assert_eq!(vals[4], 4.);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,836 @@
#[cfg(target_arch = "arm")]
use crate::core_arch::arm::*;
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
use crate::core_arch::aarch64::*;
use crate::core_arch::simd::*;
use std::{mem::transmute, vec::Vec};
macro_rules! V_u8 {
() => {
vec![0x00u8, 0x01u8, 0x02u8, 0x0Fu8, 0x80u8, 0xF0u8, 0xFFu8]
};
}
macro_rules! V_u16 {
() => {
vec![
0x0000u16, 0x0101u16, 0x0202u16, 0x0F0Fu16, 0x8000u16, 0xF0F0u16, 0xFFFFu16,
]
};
}
macro_rules! V_u32 {
() => {
vec![
0x00000000u32,
0x01010101u32,
0x02020202u32,
0x0F0F0F0Fu32,
0x80000000u32,
0xF0F0F0F0u32,
0xFFFFFFFFu32,
]
};
}
macro_rules! V_u64 {
() => {
vec![
0x0000000000000000u64,
0x0101010101010101u64,
0x0202020202020202u64,
0x0F0F0F0F0F0F0F0Fu64,
0x8080808080808080u64,
0xF0F0F0F0F0F0F0F0u64,
0xFFFFFFFFFFFFFFFFu64,
]
};
}
macro_rules! V_i8 {
() => {
vec![
0x00i8, 0x01i8, 0x02i8, 0x0Fi8, -128i8, /* 0x80 */
-16i8, /* 0xF0 */
-1i8, /* 0xFF */
]
};
}
macro_rules! V_i16 {
() => {
vec![
0x0000i16, 0x0101i16, 0x0202i16, 0x0F0Fi16, -32768i16, /* 0x8000 */
-3856i16, /* 0xF0F0 */
-1i16, /* 0xFFF */
]
};
}
macro_rules! V_i32 {
() => {
vec![
0x00000000i32,
0x01010101i32,
0x02020202i32,
0x0F0F0F0Fi32,
-2139062144i32, /* 0x80000000 */
-252645136i32, /* 0xF0F0F0F0 */
-1i32, /* 0xFFFFFFFF */
]
};
}
macro_rules! V_i64 {
() => {
vec![
0x0000000000000000i64,
0x0101010101010101i64,
0x0202020202020202i64,
0x0F0F0F0F0F0F0F0Fi64,
-9223372036854775808i64, /* 0x8000000000000000 */
-1152921504606846976i64, /* 0xF000000000000000 */
-1i64, /* 0xFFFFFFFFFFFFFFFF */
]
};
}
macro_rules! V_f32 {
() => {
vec![
0.0f32,
1.0f32,
-1.0f32,
1.2f32,
2.4f32,
f32::MAX,
f32::MIN,
f32::INFINITY,
f32::NEG_INFINITY,
f32::NAN,
]
};
}
macro_rules! to64 {
($t : ident) => {
|v: $t| -> u64 { transmute(v) }
};
}
macro_rules! to128 {
($t : ident) => {
|v: $t| -> u128 { transmute(v) }
};
}
pub(crate) fn test<T, U, V, W, X>(
vals: Vec<T>,
fill1: fn(T) -> V,
fill2: fn(U) -> W,
cast: fn(W) -> X,
test_fun: fn(V, V) -> W,
verify_fun: fn(T, T) -> U,
) where
T: Copy + core::fmt::Debug + std::cmp::PartialEq,
U: Copy + core::fmt::Debug + std::cmp::PartialEq,
V: Copy + core::fmt::Debug,
W: Copy + core::fmt::Debug,
X: Copy + core::fmt::Debug + std::cmp::PartialEq,
{
let pairs = vals.iter().zip(vals.iter());
for (i, j) in pairs {
let a: V = fill1(*i);
let b: V = fill1(*j);
let actual_pre: W = test_fun(a, b);
let expected_pre: W = fill2(verify_fun(*i, *j));
let actual: X = cast(actual_pre);
let expected: X = cast(expected_pre);
assert_eq!(
actual, expected,
"[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
*i, *j, &a, &b, actual_pre, &a, &b, expected_pre
);
}
}
macro_rules! gen_test_fn {
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
unsafe {
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
};
}
};
}
macro_rules! gen_fill_fn {
($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
pub(crate) fn $id(val: $in_t) -> $out_t {
let initial: [$in_t; $num_els] = [val; $num_els];
let result: $cmp_t = unsafe { transmute(initial) };
let result_out: $out_t = unsafe { transmute(result) };
// println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
result_out
}
};
}
gen_fill_fn!(fill_u8, 8, 8, u8, uint8x8_t, u64);
gen_fill_fn!(fill_s8, 8, 8, i8, int8x8_t, u64);
gen_fill_fn!(fillq_u8, 8, 16, u8, uint8x16_t, u128);
gen_fill_fn!(fillq_s8, 8, 16, i8, int8x16_t, u128);
gen_fill_fn!(fill_u16, 16, 4, u16, uint16x4_t, u64);
gen_fill_fn!(fill_s16, 16, 4, i16, int16x4_t, u64);
gen_fill_fn!(fillq_u16, 16, 8, u16, uint16x8_t, u128);
gen_fill_fn!(fillq_s16, 16, 8, i16, int16x8_t, u128);
gen_fill_fn!(fill_u32, 32, 2, u32, uint32x2_t, u64);
gen_fill_fn!(fill_s32, 32, 2, i32, int32x2_t, u64);
gen_fill_fn!(fillq_u32, 32, 4, u32, uint32x4_t, u128);
gen_fill_fn!(fillq_s32, 32, 4, i32, int32x4_t, u128);
gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
gen_fill_fn!(fill_s64, 64, 1, i64, int64x1_t, u64);
gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
gen_fill_fn!(fillq_s64, 64, 2, i64, int64x2_t, u128);
gen_fill_fn!(fill_f32, 32, 2, f32, float32x2_t, u64);
gen_fill_fn!(fillq_f32, 32, 4, f32, float32x4_t, u128);
gen_test_fn!(
test_ari_u8,
u8,
u8,
uint8x8_t,
uint8x8_t,
u64,
V_u8!(),
fill_u8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
test_bit_u8,
u8,
u8,
uint8x8_t,
uint8x8_t,
u64,
V_u8!(),
fill_u8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
test_cmp_u8,
u8,
u8,
uint8x8_t,
uint8x8_t,
u64,
V_u8!(),
fill_u8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
testq_ari_u8,
u8,
u8,
uint8x16_t,
uint8x16_t,
u128,
V_u8!(),
fillq_u8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
testq_bit_u8,
u8,
u8,
uint8x16_t,
uint8x16_t,
u128,
V_u8!(),
fillq_u8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
testq_cmp_u8,
u8,
u8,
uint8x16_t,
uint8x16_t,
u128,
V_u8!(),
fillq_u8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
test_ari_s8,
i8,
i8,
int8x8_t,
int8x8_t,
u64,
V_i8!(),
fill_s8,
fill_s8,
to64!(int8x8_t)
);
gen_test_fn!(
test_bit_s8,
i8,
i8,
int8x8_t,
int8x8_t,
u64,
V_i8!(),
fill_s8,
fill_s8,
to64!(int8x8_t)
);
gen_test_fn!(
test_cmp_s8,
i8,
u8,
int8x8_t,
uint8x8_t,
u64,
V_i8!(),
fill_s8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
testq_ari_s8,
i8,
i8,
int8x16_t,
int8x16_t,
u128,
V_i8!(),
fillq_s8,
fillq_s8,
to128!(int8x16_t)
);
gen_test_fn!(
testq_bit_s8,
i8,
i8,
int8x16_t,
int8x16_t,
u128,
V_i8!(),
fillq_s8,
fillq_s8,
to128!(int8x16_t)
);
gen_test_fn!(
testq_cmp_s8,
i8,
u8,
int8x16_t,
uint8x16_t,
u128,
V_i8!(),
fillq_s8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
test_ari_u16,
u16,
u16,
uint16x4_t,
uint16x4_t,
u64,
V_u16!(),
fill_u16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
test_bit_u16,
u16,
u16,
uint16x4_t,
uint16x4_t,
u64,
V_u16!(),
fill_u16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
test_cmp_u16,
u16,
u16,
uint16x4_t,
uint16x4_t,
u64,
V_u16!(),
fill_u16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
testq_ari_u16,
u16,
u16,
uint16x8_t,
uint16x8_t,
u128,
V_u16!(),
fillq_u16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
testq_bit_u16,
u16,
u16,
uint16x8_t,
uint16x8_t,
u128,
V_u16!(),
fillq_u16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
testq_cmp_u16,
u16,
u16,
uint16x8_t,
uint16x8_t,
u128,
V_u16!(),
fillq_u16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
test_ari_s16,
i16,
i16,
int16x4_t,
int16x4_t,
u64,
V_i16!(),
fill_s16,
fill_s16,
to64!(int16x4_t)
);
gen_test_fn!(
test_bit_s16,
i16,
i16,
int16x4_t,
int16x4_t,
u64,
V_i16!(),
fill_s16,
fill_s16,
to64!(int16x4_t)
);
gen_test_fn!(
test_cmp_s16,
i16,
u16,
int16x4_t,
uint16x4_t,
u64,
V_i16!(),
fill_s16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
testq_ari_s16,
i16,
i16,
int16x8_t,
int16x8_t,
u128,
V_i16!(),
fillq_s16,
fillq_s16,
to128!(int16x8_t)
);
gen_test_fn!(
testq_bit_s16,
i16,
i16,
int16x8_t,
int16x8_t,
u128,
V_i16!(),
fillq_s16,
fillq_s16,
to128!(int16x8_t)
);
gen_test_fn!(
testq_cmp_s16,
i16,
u16,
int16x8_t,
uint16x8_t,
u128,
V_i16!(),
fillq_s16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
test_ari_u32,
u32,
u32,
uint32x2_t,
uint32x2_t,
u64,
V_u32!(),
fill_u32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
test_bit_u32,
u32,
u32,
uint32x2_t,
uint32x2_t,
u64,
V_u32!(),
fill_u32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
test_cmp_u32,
u32,
u32,
uint32x2_t,
uint32x2_t,
u64,
V_u32!(),
fill_u32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
testq_ari_u32,
u32,
u32,
uint32x4_t,
uint32x4_t,
u128,
V_u32!(),
fillq_u32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
testq_bit_u32,
u32,
u32,
uint32x4_t,
uint32x4_t,
u128,
V_u32!(),
fillq_u32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
testq_cmp_u32,
u32,
u32,
uint32x4_t,
uint32x4_t,
u128,
V_u32!(),
fillq_u32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
test_ari_s32,
i32,
i32,
int32x2_t,
int32x2_t,
u64,
V_i32!(),
fill_s32,
fill_s32,
to64!(int32x2_t)
);
gen_test_fn!(
test_bit_s32,
i32,
i32,
int32x2_t,
int32x2_t,
u64,
V_i32!(),
fill_s32,
fill_s32,
to64!(int32x2_t)
);
gen_test_fn!(
test_cmp_s32,
i32,
u32,
int32x2_t,
uint32x2_t,
u64,
V_i32!(),
fill_s32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
testq_ari_s32,
i32,
i32,
int32x4_t,
int32x4_t,
u128,
V_i32!(),
fillq_s32,
fillq_s32,
to128!(int32x4_t)
);
gen_test_fn!(
testq_bit_s32,
i32,
i32,
int32x4_t,
int32x4_t,
u128,
V_i32!(),
fillq_s32,
fillq_s32,
to128!(int32x4_t)
);
gen_test_fn!(
testq_cmp_s32,
i32,
u32,
int32x4_t,
uint32x4_t,
u128,
V_i32!(),
fillq_s32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
test_ari_u64,
u64,
u64,
uint64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_u64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
test_bit_u64,
u64,
u64,
uint64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_u64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
test_cmp_u64,
u64,
u64,
uint64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_u64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_ari_u64,
u64,
u64,
uint64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_u64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
testq_bit_u64,
u64,
u64,
uint64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_u64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
testq_cmp_u64,
u64,
u64,
uint64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_u64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
test_ari_s64,
i64,
i64,
int64x1_t,
int64x1_t,
u64,
V_i64!(),
fill_s64,
fill_s64,
to64!(int64x1_t)
);
gen_test_fn!(
test_bit_s64,
i64,
i64,
int64x1_t,
int64x1_t,
u64,
V_i64!(),
fill_s64,
fill_s64,
to64!(int64x1_t)
);
gen_test_fn!(
test_cmp_s64,
i64,
u64,
int64x1_t,
uint64x1_t,
u64,
V_i64!(),
fill_s64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_ari_s64,
i64,
i64,
int64x2_t,
int64x2_t,
u128,
V_i64!(),
fillq_s64,
fillq_s64,
to128!(int64x2_t)
);
gen_test_fn!(
testq_bit_s64,
i64,
i64,
int64x2_t,
int64x2_t,
u128,
V_i64!(),
fillq_s64,
fillq_s64,
to128!(int64x2_t)
);
gen_test_fn!(
testq_cmp_s64,
i64,
u64,
int64x2_t,
uint64x2_t,
u128,
V_i64!(),
fillq_s64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
test_ari_f32,
f32,
f32,
float32x2_t,
float32x2_t,
u64,
V_f32!(),
fill_f32,
fill_f32,
to64!(float32x2_t)
);
gen_test_fn!(
test_cmp_f32,
f32,
u32,
float32x2_t,
uint32x2_t,
u64,
V_f32!(),
fill_f32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
testq_ari_f32,
f32,
f32,
float32x4_t,
float32x4_t,
u128,
V_f32!(),
fillq_f32,
fillq_f32,
to128!(float32x4_t)
);
gen_test_fn!(
testq_cmp_f32,
f32,
u32,
float32x4_t,
uint32x4_t,
u128,
V_f32!(),
fillq_f32,
fillq_u32,
to128!(uint32x4_t)
);

View file

@ -0,0 +1,350 @@
SIMD and vendor intrinsics module.
This module is intended to be the gateway to architecture-specific
intrinsic functions, typically related to SIMD (but not always!). Each
architecture that Rust compiles to may contain a submodule here, which
means that this is not a portable module! If you're writing a portable
library take care when using these APIs!
Under this module you'll find an architecture-named module, such as
`x86_64`. Each `#[cfg(target_arch)]` that Rust can compile to may have a
module entry here, only present on that particular target. For example the
`i686-pc-windows-msvc` target will have an `x86` module here, whereas
`x86_64-pc-windows-msvc` has `x86_64`.
[rfc]: https://github.com/rust-lang/rfcs/pull/2325
[tracked]: https://github.com/rust-lang/rust/issues/48556
# Overview
This module exposes vendor-specific intrinsics that typically correspond to
a single machine instruction. These intrinsics are not portable: their
availability is architecture-dependent, and not all machines of that
architecture might provide the intrinsic.
The `arch` module is intended to be a low-level implementation detail for
higher-level APIs. Using it correctly can be quite tricky as you need to
ensure at least a few guarantees are upheld:
* The correct architecture's module is used. For example the `arm` module
isn't available on the `x86_64-unknown-linux-gnu` target. This is
typically done by ensuring that `#[cfg]` is used appropriately when using
this module.
* The CPU the program is currently running on supports the function being
called. For example it is unsafe to call an AVX2 function on a CPU that
doesn't actually support AVX2.
As a result of the latter of these guarantees all intrinsics in this module
are `unsafe` and extra care needs to be taken when calling them!
# CPU Feature Detection
In order to call these APIs in a safe fashion there's a number of
mechanisms available to ensure that the correct CPU feature is available
to call an intrinsic. Let's consider, for example, the `_mm256_add_epi64`
intrinsics on the `x86` and `x86_64` architectures. This function requires
the AVX2 feature as [documented by Intel][intel-dox] so to correctly call
this function we need to (a) guarantee we only call it on `x86`/`x86_64`
and (b) ensure that the CPU feature is available
[intel-dox]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64&expand=100
## Static CPU Feature Detection
The first option available to us is to conditionally compile code via the
`#[cfg]` attribute. CPU features correspond to the `target_feature` cfg
available, and can be used like so:
```ignore
#[cfg(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx2"
)
)]
fn foo() {
#[cfg(target_arch = "x86")]
use std::arch::x86::_mm256_add_epi64;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::_mm256_add_epi64;
unsafe {
_mm256_add_epi64(...);
}
}
```
Here we're using `#[cfg(target_feature = "avx2")]` to conditionally compile
this function into our module. This means that if the `avx2` feature is
*enabled statically* then we'll use the `_mm256_add_epi64` function at
runtime. The `unsafe` block here can be justified through the usage of
`#[cfg]` to only compile the code in situations where the safety guarantees
are upheld.
Statically enabling a feature is typically done with the `-C
target-feature` or `-C target-cpu` flags to the compiler. For example if
your local CPU supports AVX2 then you can compile the above function with:
```sh
$ RUSTFLAGS='-C target-cpu=native' cargo build
```
Or otherwise you can specifically enable just the AVX2 feature:
```sh
$ RUSTFLAGS='-C target-feature=+avx2' cargo build
```
Note that when you compile a binary with a particular feature enabled it's
important to ensure that you only run the binary on systems which satisfy
the required feature set.
## Dynamic CPU Feature Detection
Sometimes statically dispatching isn't quite what you want. Instead you
might want to build a portable binary that runs across a variety of CPUs,
but at runtime it selects the most optimized implementation available. This
allows you to build a "least common denominator" binary which has certain
sections more optimized for different CPUs.
Taking our previous example from before, we're going to compile our binary
*without* AVX2 support, but we'd like to enable it for just one function.
We can do that in a manner like:
```ignore
fn foo() {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") {
return unsafe { foo_avx2() };
}
}
// fallback implementation without using AVX2
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn foo_avx2() {
#[cfg(target_arch = "x86")]
use std::arch::x86::_mm256_add_epi64;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::_mm256_add_epi64;
unsafe { _mm256_add_epi64(...); }
}
```
There's a couple of components in play here, so let's go through them in
detail!
* First up we notice the `is_x86_feature_detected!` macro. Provided by
the standard library, this macro will perform necessary runtime detection
to determine whether the CPU the program is running on supports the
specified feature. In this case the macro will expand to a boolean
expression evaluating to whether the local CPU has the AVX2 feature or
not.
Note that this macro, like the `arch` module, is platform-specific. For
example calling `is_x86_feature_detected!("avx2")` on ARM will be a
compile time error. To ensure we don't hit this error a statement level
`#[cfg]` is used to only compile usage of the macro on `x86`/`x86_64`.
* Next up we see our AVX2-enabled function, `foo_avx2`. This function is
decorated with the `#[target_feature]` attribute which enables a CPU
feature for just this one function. Using a compiler flag like `-C
target-feature=+avx2` will enable AVX2 for the entire program, but using
an attribute will only enable it for the one function. Usage of the
`#[target_feature]` attribute currently requires the function to also be
`unsafe`, as we see here. This is because the function can only be
correctly called on systems which have the AVX2 (like the intrinsics
themselves).
And with all that we should have a working program! This program will run
across all machines and it'll use the optimized AVX2 implementation on
machines where support is detected.
# Ergonomics
It's important to note that using the `arch` module is not the easiest
thing in the world, so if you're curious to try it out you may want to
brace yourself for some wordiness!
The primary purpose of this module is to enable stable crates on crates.io
to build up much more ergonomic abstractions which end up using SIMD under
the hood. Over time these abstractions may also move into the standard
library itself, but for now this module is tasked with providing the bare
minimum necessary to use vendor intrinsics on stable Rust.
# Other architectures
This documentation is only for one particular architecture, you can find
others at:
* [`x86`]
* [`x86_64`]
* [`arm`]
* [`aarch64`]
* [`riscv32`]
* [`riscv64`]
* [`mips`]
* [`mips64`]
* [`powerpc`]
* [`powerpc64`]
* [`nvptx`]
* [`wasm32`]
* [`loongarch64`]
* [`s390x`]
[`x86`]: ../../core/arch/x86/index.html
[`x86_64`]: ../../core/arch/x86_64/index.html
[`arm`]: ../../core/arch/arm/index.html
[`aarch64`]: ../../core/arch/aarch64/index.html
[`riscv32`]: ../../core/arch/riscv32/index.html
[`riscv64`]: ../../core/arch/riscv64/index.html
[`mips`]: ../../core/arch/mips/index.html
[`mips64`]: ../../core/arch/mips64/index.html
[`powerpc`]: ../../core/arch/powerpc/index.html
[`powerpc64`]: ../../core/arch/powerpc64/index.html
[`nvptx`]: ../../core/arch/nvptx/index.html
[`wasm32`]: ../../core/arch/wasm32/index.html
[`loongarch64`]: ../../core/arch/loongarch64/index.html
[`s390x`]: ../../core/arch/s390x/index.html
# Examples
First let's take a look at not actually using any intrinsics but instead
using LLVM's auto-vectorization to produce optimized vectorized code for
AVX2 and also for the default platform.
```rust
fn main() {
let mut dst = [0];
add_quickly(&[1], &[2], &mut dst);
assert_eq!(dst[0], 3);
}
fn add_quickly(a: &[u8], b: &[u8], c: &mut [u8]) {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
// Note that this `unsafe` block is safe because we're testing
// that the `avx2` feature is indeed available on our CPU.
if is_x86_feature_detected!("avx2") {
return unsafe { add_quickly_avx2(a, b, c) };
}
}
add_quickly_fallback(a, b, c)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn add_quickly_avx2(a: &[u8], b: &[u8], c: &mut [u8]) {
add_quickly_fallback(a, b, c) // the function below is inlined here
}
fn add_quickly_fallback(a: &[u8], b: &[u8], c: &mut [u8]) {
for ((a, b), c) in a.iter().zip(b).zip(c) {
*c = *a + *b;
}
}
```
Next up let's take a look at an example of manually using intrinsics. Here
we'll be using SSE4.1 features to implement hex encoding.
```
fn main() {
let mut dst = [0; 32];
hex_encode(b"\x01\x02\x03", &mut dst);
assert_eq!(&dst[..6], b"010203");
let mut src = [0; 16];
for i in 0..16 {
src[i] = (i + 1) as u8;
}
hex_encode(&src, &mut dst);
assert_eq!(&dst, b"0102030405060708090a0b0c0d0e0f10");
}
pub fn hex_encode(src: &[u8], dst: &mut [u8]) {
let len = src.len().checked_mul(2).unwrap();
assert!(dst.len() >= len);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("sse4.1") {
return unsafe { hex_encode_sse41(src, dst) };
}
}
hex_encode_fallback(src, dst)
}
// translated from
// <https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp>
#[target_feature(enable = "sse4.1")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_sse41(mut src: &[u8], dst: &mut [u8]) {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
unsafe {
let ascii_zero = _mm_set1_epi8(b'0' as i8);
let nines = _mm_set1_epi8(9);
let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
let and4bits = _mm_set1_epi8(0xf);
let mut i = 0_isize;
while src.len() >= 16 {
let invec = _mm_loadu_si128(src.as_ptr() as *const _);
let masked1 = _mm_and_si128(invec, and4bits);
let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
// return 0xff corresponding to the elements > 9, or 0x00 otherwise
let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
// add '0' or the offset depending on the masks
let masked1 = _mm_add_epi8(
masked1,
_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
);
let masked2 = _mm_add_epi8(
masked2,
_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
);
// interleave masked1 and masked2 bytes
let res1 = _mm_unpacklo_epi8(masked2, masked1);
let res2 = _mm_unpackhi_epi8(masked2, masked1);
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
_mm_storeu_si128(
dst.as_mut_ptr().offset(i * 2 + 16) as *mut _,
res2,
);
src = &src[16..];
i += 16;
}
let i = i as usize;
hex_encode_fallback(src, &mut dst[i * 2..]);
}
}
fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) {
fn hex(byte: u8) -> u8 {
static TABLE: &[u8] = b"0123456789abcdef";
TABLE[byte as usize]
}
for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
slots[0] = hex((*byte >> 4) & 0xf);
slots[1] = hex(*byte & 0xf);
}
}
```

View file

@ -0,0 +1,94 @@
#![doc = include_str!("core_arch_docs.md")]
#![allow(improper_ctypes_definitions)]
#![allow(dead_code)]
#![allow(unused_features)]
#![allow(internal_features)]
#![allow(unsafe_op_in_unsafe_fn)]
#![deny(rust_2018_idioms)]
#![feature(
custom_inner_attributes,
link_llvm_intrinsics,
repr_simd,
simd_ffi,
proc_macro_hygiene,
stmt_expr_attributes,
core_intrinsics,
no_core,
fmt_helpers_for_derive,
rustc_attrs,
staged_api,
doc_cfg,
tbm_target_feature,
sse4a_target_feature,
riscv_target_feature,
arm_target_feature,
mips_target_feature,
powerpc_target_feature,
s390x_target_feature,
loongarch_target_feature,
wasm_target_feature,
abi_unadjusted,
rtm_target_feature,
allow_internal_unstable,
decl_macro,
asm_experimental_arch,
x86_amx_intrinsics,
f16,
aarch64_unstable_target_feature,
bigint_helper_methods
)]
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
#![deny(clippy::missing_inline_in_public_items)]
#![allow(
clippy::identity_op,
clippy::inline_always,
clippy::too_many_arguments,
clippy::cast_sign_loss,
clippy::cast_lossless,
clippy::cast_possible_wrap,
clippy::cast_possible_truncation,
clippy::cast_precision_loss,
clippy::cognitive_complexity,
clippy::many_single_char_names,
clippy::missing_safety_doc,
clippy::shadow_reuse,
clippy::similar_names,
clippy::unusual_byte_groupings,
clippy::wrong_self_convention
)]
#![cfg_attr(test, allow(unused_imports))]
#![no_std]
#![stable(feature = "stdsimd", since = "1.27.0")]
#![doc(
test(attr(deny(warnings))),
test(attr(allow(dead_code, deprecated, unused_variables, unused_mut)))
)]
#![cfg_attr(
test,
feature(
stdarch_arm_feature_detection,
stdarch_powerpc_feature_detection,
stdarch_s390x_feature_detection
)
)]
#[cfg(test)]
#[macro_use]
extern crate std;
#[cfg(test)]
#[macro_use]
extern crate std_detect;
#[path = "mod.rs"]
mod core_arch;
#[stable(feature = "stdsimd", since = "1.27.0")]
pub mod arch {
#[stable(feature = "stdsimd", since = "1.27.0")]
#[allow(unused_imports)]
pub use crate::core_arch::arch::*;
#[stable(feature = "stdsimd", since = "1.27.0")]
pub use core::arch::asm;
}
#[allow(unused_imports)]
use core::{array, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,21 @@
//! LoongArch64 LASX intrinsics
#![allow(non_camel_case_types)]
#[rustfmt::skip]
mod types;
#[rustfmt::skip]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub use self::types::*;
#[rustfmt::skip]
mod generated;
#[rustfmt::skip]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub use self::generated::*;
#[rustfmt::skip]
#[cfg(test)]
mod tests;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,33 @@
types! {
#![unstable(feature = "stdarch_loongarch", issue = "117427")]
/// LOONGARCH-specific 256-bit wide vector of 32 packed `i8`.
pub struct v32i8(32 x pub(crate) i8);
/// LOONGARCH-specific 256-bit wide vector of 16 packed `i16`.
pub struct v16i16(16 x pub(crate) i16);
/// LOONGARCH-specific 256-bit wide vector of 8 packed `i32`.
pub struct v8i32(8 x pub(crate) i32);
/// LOONGARCH-specific 256-bit wide vector of 4 packed `i64`.
pub struct v4i64(4 x pub(crate) i64);
/// LOONGARCH-specific 256-bit wide vector of 32 packed `u8`.
pub struct v32u8(32 x pub(crate) u8);
/// LOONGARCH-specific 256-bit wide vector of 16 packed `u16`.
pub struct v16u16(16 x pub(crate) u16);
/// LOONGARCH-specific 256-bit wide vector of 8 packed `u32`.
pub struct v8u32(8 x pub(crate) u32);
/// LOONGARCH-specific 256-bit wide vector of 4 packed `u64`.
pub struct v4u64(4 x pub(crate) u64);
/// LOONGARCH-specific 128-bit wide vector of 8 packed `f32`.
pub struct v8f32(8 x pub(crate) f32);
/// LOONGARCH-specific 256-bit wide vector of 4 packed `f64`.
pub struct v4f64(4 x pub(crate) f64);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,21 @@
//! LoongArch64 LSX intrinsics
#![allow(non_camel_case_types)]
#[rustfmt::skip]
mod types;
#[rustfmt::skip]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub use self::types::*;
#[rustfmt::skip]
mod generated;
#[rustfmt::skip]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub use self::generated::*;
#[rustfmt::skip]
#[cfg(test)]
mod tests;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,33 @@
types! {
#![unstable(feature = "stdarch_loongarch", issue = "117427")]
/// LOONGARCH-specific 128-bit wide vector of 16 packed `i8`.
pub struct v16i8(16 x pub(crate) i8);
/// LOONGARCH-specific 128-bit wide vector of 8 packed `i16`.
pub struct v8i16(8 x pub(crate) i16);
/// LOONGARCH-specific 128-bit wide vector of 4 packed `i32`.
pub struct v4i32(4 x pub(crate) i32);
/// LOONGARCH-specific 128-bit wide vector of 2 packed `i64`.
pub struct v2i64(2 x pub(crate) i64);
/// LOONGARCH-specific 128-bit wide vector of 16 packed `u8`.
pub struct v16u8(16 x pub(crate) u8);
/// LOONGARCH-specific 128-bit wide vector of 8 packed `u16`.
pub struct v8u16(8 x pub(crate) u16);
/// LOONGARCH-specific 128-bit wide vector of 4 packed `u32`.
pub struct v4u32(4 x pub(crate) u32);
/// LOONGARCH-specific 128-bit wide vector of 2 packed `u64`.
pub struct v2u64(2 x pub(crate) u64);
/// LOONGARCH-specific 128-bit wide vector of 4 packed `f32`.
pub struct v4f32(4 x pub(crate) f32);
/// LOONGARCH-specific 128-bit wide vector of 2 packed `f64`.
pub struct v2f64(2 x pub(crate) f64);
}

View file

@ -0,0 +1,376 @@
//! `LoongArch` intrinsics
mod lasx;
mod lsx;
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub use self::lasx::*;
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub use self::lsx::*;
use crate::arch::asm;
/// Reads the 64-bit stable counter value and the counter ID
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn rdtime_d() -> (i64, isize) {
let val: i64;
let tid: isize;
asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
(val, tid)
}
/// Reads the lower 32-bit stable counter value and the counter ID
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn rdtimel_w() -> (i32, isize) {
let val: i32;
let tid: isize;
asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
(val, tid)
}
/// Reads the upper 32-bit stable counter value and the counter ID
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn rdtimeh_w() -> (i32, isize) {
let val: i32;
let tid: isize;
asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
(val, tid)
}
#[allow(improper_ctypes)]
unsafe extern "unadjusted" {
#[link_name = "llvm.loongarch.crc.w.b.w"]
fn __crc_w_b_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crc.w.h.w"]
fn __crc_w_h_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crc.w.w.w"]
fn __crc_w_w_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crc.w.d.w"]
fn __crc_w_d_w(a: i64, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.b.w"]
fn __crcc_w_b_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.h.w"]
fn __crcc_w_h_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.w.w"]
fn __crcc_w_w_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.d.w"]
fn __crcc_w_d_w(a: i64, b: i32) -> i32;
#[link_name = "llvm.loongarch.cacop.d"]
fn __cacop(a: i64, b: i64, c: i64);
#[link_name = "llvm.loongarch.dbar"]
fn __dbar(a: i32);
#[link_name = "llvm.loongarch.ibar"]
fn __ibar(a: i32);
#[link_name = "llvm.loongarch.movgr2fcsr"]
fn __movgr2fcsr(a: i32, b: i32);
#[link_name = "llvm.loongarch.movfcsr2gr"]
fn __movfcsr2gr(a: i32) -> i32;
#[link_name = "llvm.loongarch.csrrd.d"]
fn __csrrd(a: i32) -> i64;
#[link_name = "llvm.loongarch.csrwr.d"]
fn __csrwr(a: i64, b: i32) -> i64;
#[link_name = "llvm.loongarch.csrxchg.d"]
fn __csrxchg(a: i64, b: i64, c: i32) -> i64;
#[link_name = "llvm.loongarch.iocsrrd.b"]
fn __iocsrrd_b(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrrd.h"]
fn __iocsrrd_h(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrrd.w"]
fn __iocsrrd_w(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrrd.d"]
fn __iocsrrd_d(a: i32) -> i64;
#[link_name = "llvm.loongarch.iocsrwr.b"]
fn __iocsrwr_b(a: i32, b: i32);
#[link_name = "llvm.loongarch.iocsrwr.h"]
fn __iocsrwr_h(a: i32, b: i32);
#[link_name = "llvm.loongarch.iocsrwr.w"]
fn __iocsrwr_w(a: i32, b: i32);
#[link_name = "llvm.loongarch.iocsrwr.d"]
fn __iocsrwr_d(a: i64, b: i32);
#[link_name = "llvm.loongarch.break"]
fn __break(a: i32);
#[link_name = "llvm.loongarch.cpucfg"]
fn __cpucfg(a: i32) -> i32;
#[link_name = "llvm.loongarch.syscall"]
fn __syscall(a: i32);
#[link_name = "llvm.loongarch.asrtle.d"]
fn __asrtle(a: i64, b: i64);
#[link_name = "llvm.loongarch.asrtgt.d"]
fn __asrtgt(a: i64, b: i64);
#[link_name = "llvm.loongarch.lddir.d"]
fn __lddir(a: i64, b: i64) -> i64;
#[link_name = "llvm.loongarch.ldpte.d"]
fn __ldpte(a: i64, b: i64);
#[link_name = "llvm.loongarch.frecipe.s"]
fn __frecipe_s(a: f32) -> f32;
#[link_name = "llvm.loongarch.frecipe.d"]
fn __frecipe_d(a: f64) -> f64;
#[link_name = "llvm.loongarch.frsqrte.s"]
fn __frsqrte_s(a: f32) -> f32;
#[link_name = "llvm.loongarch.frsqrte.d"]
fn __frsqrte_d(a: f64) -> f64;
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crc_w_b_w(a: i32, b: i32) -> i32 {
__crc_w_b_w(a, b)
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crc_w_h_w(a: i32, b: i32) -> i32 {
__crc_w_h_w(a, b)
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crc_w_w_w(a: i32, b: i32) -> i32 {
__crc_w_w_w(a, b)
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crc_w_d_w(a: i64, b: i32) -> i32 {
__crc_w_d_w(a, b)
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crcc_w_b_w(a: i32, b: i32) -> i32 {
__crcc_w_b_w(a, b)
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crcc_w_h_w(a: i32, b: i32) -> i32 {
__crcc_w_h_w(a, b)
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crcc_w_w_w(a: i32, b: i32) -> i32 {
__crcc_w_w_w(a, b)
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crcc_w_d_w(a: i64, b: i32) -> i32 {
__crcc_w_d_w(a, b)
}
/// Generates the cache operation instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn cacop<const IMM12: i64>(a: i64, b: i64) {
static_assert_simm_bits!(IMM12, 12);
__cacop(a, b, IMM12);
}
/// Generates the memory barrier instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn dbar<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__dbar(IMM15);
}
/// Generates the instruction-fetch barrier instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn ibar<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__ibar(IMM15);
}
/// Moves data from a GPR to the FCSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
static_assert_uimm_bits!(IMM5, 5);
__movgr2fcsr(IMM5, a);
}
/// Moves data from a FCSR to the GPR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn movfcsr2gr<const IMM5: i32>() -> i32 {
static_assert_uimm_bits!(IMM5, 5);
__movfcsr2gr(IMM5)
}
/// Reads the CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn csrrd<const IMM14: i32>() -> i64 {
static_assert_uimm_bits!(IMM14, 14);
__csrrd(IMM14)
}
/// Writes the CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn csrwr<const IMM14: i32>(a: i64) -> i64 {
static_assert_uimm_bits!(IMM14, 14);
__csrwr(a, IMM14)
}
/// Exchanges the CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn csrxchg<const IMM14: i32>(a: i64, b: i64) -> i64 {
static_assert_uimm_bits!(IMM14, 14);
__csrxchg(a, b, IMM14)
}
/// Reads the 8-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_b(a: i32) -> i32 {
__iocsrrd_b(a)
}
/// Reads the 16-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_h(a: i32) -> i32 {
__iocsrrd_h(a)
}
/// Reads the 32-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_w(a: i32) -> i32 {
__iocsrrd_w(a)
}
/// Reads the 64-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_d(a: i32) -> i64 {
__iocsrrd_d(a)
}
/// Writes the 8-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_b(a: i32, b: i32) {
__iocsrwr_b(a, b)
}
/// Writes the 16-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_h(a: i32, b: i32) {
__iocsrwr_h(a, b)
}
/// Writes the 32-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_w(a: i32, b: i32) {
__iocsrwr_w(a, b)
}
/// Writes the 64-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_d(a: i64, b: i32) {
__iocsrwr_d(a, b)
}
/// Generates the breakpoint instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn brk<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__break(IMM15);
}
/// Reads the CPU configuration register
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn cpucfg(a: i32) -> i32 {
__cpucfg(a)
}
/// Generates the syscall instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn syscall<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__syscall(IMM15);
}
/// Generates the less-than-or-equal asseration instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn asrtle(a: i64, b: i64) {
__asrtle(a, b);
}
/// Generates the greater-than asseration instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn asrtgt(a: i64, b: i64) {
__asrtgt(a, b);
}
/// Loads the page table directory entry
#[inline]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lddir<const B: i64>(a: i64) -> i64 {
__lddir(a, B)
}
/// Loads the page table entry
#[inline]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn ldpte<const B: i64>(a: i64) {
__ldpte(a, B)
}
/// Calculate the approximate single-precision result of 1.0 divided
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn frecipe_s(a: f32) -> f32 {
__frecipe_s(a)
}
/// Calculate the approximate double-precision result of 1.0 divided
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn frecipe_d(a: f64) -> f64 {
__frecipe_d(a)
}
/// Calculate the approximate single-precision result of dividing 1.0 by the square root
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn frsqrte_s(a: f32) -> f32 {
__frsqrte_s(a)
}
/// Calculate the approximate double-precision result of dividing 1.0 by the square root
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn frsqrte_d(a: f64) -> f64 {
__frsqrte_d(a)
}

View file

@ -0,0 +1,165 @@
//! Utility macros.
#[allow(unused)]
macro_rules! static_assert {
($e:expr) => {
const {
assert!($e);
}
};
($e:expr, $msg:expr) => {
const {
assert!($e, $msg);
}
};
}
#[allow(unused_macros)]
macro_rules! static_assert_uimm_bits {
($imm:ident, $bits:expr) => {
// `0 <= $imm` produces a warning if the immediate has an unsigned type
#[allow(unused_comparisons)]
{
static_assert!(
0 <= $imm && $imm < (1 << $bits),
concat!(
stringify!($imm),
" doesn't fit in ",
stringify!($bits),
" bits",
)
)
}
};
}
#[allow(unused_macros)]
macro_rules! static_assert_simm_bits {
($imm:ident, $bits:expr) => {
static_assert!(
(-1 << ($bits - 1)) - 1 <= $imm && $imm < (1 << ($bits - 1)),
concat!(
stringify!($imm),
" doesn't fit in ",
stringify!($bits),
" bits",
)
)
};
}
#[allow(unused)]
macro_rules! types {
(
#![$stability_first:meta]
$(
#![$stability_more:meta]
)*
$(
$(#[$doc:meta])*
$(stability: [$stability_already: meta])*
pub struct $name:ident($len:literal x $v:vis $elem_type:ty);
)*
) => (types! {
$(
#![$stability_more]
)*
$(
$(#[$doc])*
$(stability: [$stability_already])*
stability: [$stability_first]
pub struct $name($len x $v $elem_type);
)*
});
(
$(
$(#[$doc:meta])*
$(stability: [$stability: meta])+
pub struct $name:ident($len:literal x $v:vis $elem_type:ty);
)*
) => ($(
$(#[$doc])*
$(#[$stability])+
#[derive(Copy, Clone)]
#[allow(non_camel_case_types)]
#[repr(simd)]
#[allow(clippy::missing_inline_in_public_items)]
pub struct $name($v [$elem_type; $len]);
impl $name {
/// Using `my_simd([x; N])` seemingly fails tests,
/// so use this internal helper for it instead.
#[inline(always)]
$v fn splat(value: $elem_type) -> $name {
#[derive(Copy, Clone)]
#[repr(simd)]
struct JustOne([$elem_type; 1]);
let one = JustOne([value]);
// SAFETY: 0 is always in-bounds because we're shuffling
// a simd type with exactly one element.
unsafe { simd_shuffle!(one, one, [0; $len]) }
}
/// Returns an array reference containing the entire SIMD vector.
$v const fn as_array(&self) -> &[$elem_type; $len] {
// SAFETY: this type is just an overaligned `[T; N]` with
// potential padding at the end, so pointer casting to a
// `&[T; N]` is safe.
//
// NOTE: This deliberately doesn't just use `&self.0` because it may soon be banned
// see https://github.com/rust-lang/compiler-team/issues/838
unsafe { &*(self as *const Self as *const [$elem_type; $len]) }
}
/// Returns a mutable array reference containing the entire SIMD vector.
#[inline]
$v fn as_mut_array(&mut self) -> &mut [$elem_type; $len] {
// SAFETY: this type is just an overaligned `[T; N]` with
// potential padding at the end, so pointer casting to a
// `&mut [T; N]` is safe.
//
// NOTE: This deliberately doesn't just use `&mut self.0` because it may soon be banned
// see https://github.com/rust-lang/compiler-team/issues/838
unsafe { &mut *(self as *mut Self as *mut [$elem_type; $len]) }
}
}
$(#[$stability])+
impl crate::fmt::Debug for $name {
#[inline]
fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
}
}
)*);
}
#[allow(unused)]
#[repr(simd)]
pub(crate) struct SimdShuffleIdx<const LEN: usize>(pub(crate) [u32; LEN]);
#[allow(unused)]
macro_rules! simd_shuffle {
($x:expr, $y:expr, $idx:expr $(,)?) => {{
$crate::intrinsics::simd::simd_shuffle(
$x,
$y,
const { $crate::core_arch::macros::SimdShuffleIdx($idx) },
)
}};
}
#[allow(unused)]
macro_rules! simd_insert {
($x:expr, $idx:expr, $val:expr $(,)?) => {{ $crate::intrinsics::simd::simd_insert($x, const { $idx }, $val) }};
}
#[allow(unused)]
macro_rules! simd_extract {
($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }};
($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }};
}

View file

@ -0,0 +1,20 @@
//! MIPS
// Building this module (even if unused) for non-fp64 targets fails with an LLVM
// error.
#[cfg(target_feature = "fp64")]
mod msa;
#[cfg(target_feature = "fp64")]
#[unstable(feature = "stdarch_mips", issue = "111198")]
pub use self::msa::*;
#[cfg(test)]
use stdarch_test::assert_instr;
/// Generates the trap instruction `BREAK`
#[cfg_attr(test, assert_instr(break))]
#[inline]
#[unstable(feature = "stdarch_mips", issue = "111198")]
pub unsafe fn break_() -> ! {
crate::intrinsics::abort()
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,343 @@
//! `core_arch`
#![allow(unknown_lints, unnecessary_transmutes)]
#[macro_use]
mod macros;
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
mod riscv_shared;
#[cfg(any(
target_arch = "arm",
target_arch = "aarch64",
target_arch = "arm64ec",
doc
))]
mod arm_shared;
mod simd;
#[doc = include_str!("core_arch_docs.md")]
#[stable(feature = "simd_arch", since = "1.27.0")]
pub mod arch {
/// Platform-specific intrinsics for the `x86` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "x86", doc))]
#[doc(cfg(target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub mod x86 {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use crate::core_arch::x86::*;
}
/// Platform-specific intrinsics for the `x86_64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "x86_64", doc))]
#[doc(cfg(target_arch = "x86_64"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub mod x86_64 {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use crate::core_arch::x86::*;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use crate::core_arch::x86_64::*;
}
/// Platform-specific intrinsics for the `arm` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "arm", doc))]
#[doc(cfg(target_arch = "arm"))]
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
pub mod arm {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
pub use crate::core_arch::arm::*;
}
/// Platform-specific intrinsics for the `aarch64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))]
#[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub mod aarch64 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub use crate::core_arch::aarch64::*;
}
/// Platform-specific intrinsics for the `riscv32` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "riscv32", doc))]
#[doc(cfg(any(target_arch = "riscv32")))]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub mod riscv32 {
pub use crate::core_arch::riscv_shared::*;
pub use crate::core_arch::riscv32::*;
}
/// Platform-specific intrinsics for the `riscv64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "riscv64", doc))]
#[doc(cfg(any(target_arch = "riscv64")))]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub mod riscv64 {
pub use crate::core_arch::riscv64::*;
// RISC-V RV64 supports all RV32 instructions as well in current specifications (2022-01-05).
// Module `riscv_shared` includes instructions available under all RISC-V platforms,
// i.e. RISC-V RV32 instructions.
pub use crate::core_arch::riscv_shared::*;
}
/// Platform-specific intrinsics for the `wasm32` platform.
///
/// This module provides intrinsics specific to the WebAssembly
/// architecture. Here you'll find intrinsics specific to WebAssembly that
/// aren't otherwise surfaced somewhere in a cross-platform abstraction of
/// `std`, and you'll also find functions for leveraging WebAssembly
/// proposals such as [atomics] and [simd].
///
/// Intrinsics in the `wasm32` module are modeled after the WebAssembly
/// instructions that they represent. Most functions are named after the
/// instruction they intend to correspond to, and the arguments/results
/// correspond to the type signature of the instruction itself. Stable
/// WebAssembly instructions are [documented online][instrdoc].
///
/// [instrdoc]: https://webassembly.github.io/spec/core/valid/instructions.html
///
/// If a proposal is not yet stable in WebAssembly itself then the functions
/// within this function may be unstable and require the nightly channel of
/// Rust to use. As the proposal itself stabilizes the intrinsics in this
/// module should stabilize as well.
///
/// [atomics]: https://github.com/webassembly/threads
/// [simd]: https://github.com/webassembly/simd
///
/// See the [module documentation](../index.html) for general information
/// about the `arch` module and platform intrinsics.
///
/// ## Atomics
///
/// The [threads proposal][atomics] for WebAssembly adds a number of
/// instructions for dealing with multithreaded programs. Most instructions
/// added in the [atomics] proposal are exposed in Rust through the
/// `std::sync::atomic` module. Some instructions, however, don't have
/// direct equivalents in Rust so they're exposed here instead.
///
/// Note that the instructions added in the [atomics] proposal can work in
/// either a context with a shared wasm memory and without. These intrinsics
/// are always available in the standard library, but you likely won't be
/// able to use them too productively unless you recompile the standard
/// library (and all your code) with `-Ctarget-feature=+atomics`.
///
/// It's also worth pointing out that multi-threaded WebAssembly and its
/// story in Rust is still in a somewhat "early days" phase as of the time
/// of this writing. Pieces should mostly work but it generally requires a
/// good deal of manual setup. At this time it's not as simple as "just call
/// `std::thread::spawn`", but it will hopefully get there one day!
///
/// ## SIMD
///
/// The [simd proposal][simd] for WebAssembly added a new `v128` type for a
/// 128-bit SIMD register. It also added a large array of instructions to
/// operate on the `v128` type to perform data processing. Using SIMD on
/// wasm is intended to be similar to as you would on `x86_64`, for example.
/// You'd write a function such as:
///
/// ```rust,ignore
/// #[cfg(target_arch = "wasm32")]
/// #[target_feature(enable = "simd128")]
/// unsafe fn uses_simd() {
/// use std::arch::wasm32::*;
/// // ...
/// }
/// ```
///
/// Unlike `x86_64`, however, WebAssembly does not currently have dynamic
/// detection at runtime as to whether SIMD is supported (this is one of the
/// motivators for the [conditional sections][condsections] and [feature
/// detection] proposals, but that is still pretty early days). This means
/// that your binary will either have SIMD and can only run on engines
/// which support SIMD, or it will not have SIMD at all. For compatibility
/// the standard library itself does not use any SIMD internally.
/// Determining how best to ship your WebAssembly binary with SIMD is
/// largely left up to you as it can be pretty nuanced depending on
/// your situation.
///
/// [condsections]: https://github.com/webassembly/conditional-sections
/// [feature detection]: https://github.com/WebAssembly/feature-detection
///
/// To enable SIMD support at compile time you need to do one of two things:
///
/// * First you can annotate functions with `#[target_feature(enable =
/// "simd128")]`. This causes just that one function to have SIMD support
/// available to it, and intrinsics will get inlined as usual in this
/// situation.
///
/// * Second you can compile your program with `-Ctarget-feature=+simd128`.
/// This compilation flag blanket enables SIMD support for your entire
/// compilation. Note that this does not include the standard library
/// unless you [recompile the standard library][buildstd].
///
/// [buildstd]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#build-std
///
/// If you enable SIMD via either of these routes then you'll have a
/// WebAssembly binary that uses SIMD instructions, and you'll need to ship
/// that accordingly. Also note that if you call SIMD intrinsics but don't
/// enable SIMD via either of these mechanisms, you'll still have SIMD
/// generated in your program. This means to generate a binary without SIMD
/// you'll need to avoid both options above plus calling into any intrinsics
/// in this module.
#[cfg(any(target_arch = "wasm32", doc))]
#[doc(cfg(target_arch = "wasm32"))]
#[stable(feature = "simd_wasm32", since = "1.33.0")]
pub mod wasm32 {
#[stable(feature = "simd_wasm32", since = "1.33.0")]
pub use crate::core_arch::wasm32::*;
}
/// Platform-specific intrinsics for the `wasm64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "wasm64", doc))]
#[doc(cfg(target_arch = "wasm64"))]
#[unstable(feature = "simd_wasm64", issue = "90599")]
pub mod wasm64 {
#[unstable(feature = "simd_wasm64", issue = "90599")]
pub use crate::core_arch::wasm32::*;
}
/// Platform-specific intrinsics for the `wasm` target family.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_family = "wasm", doc))]
#[doc(cfg(target_family = "wasm"))]
#[unstable(feature = "simd_wasm64", issue = "90599")]
pub mod wasm {
#[unstable(feature = "simd_wasm64", issue = "90599")]
pub use crate::core_arch::wasm32::*;
}
/// Platform-specific intrinsics for the `mips` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "mips", doc))]
#[doc(cfg(target_arch = "mips"))]
#[unstable(feature = "stdarch_mips", issue = "111198")]
pub mod mips {
pub use crate::core_arch::mips::*;
}
/// Platform-specific intrinsics for the `mips64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "mips64", doc))]
#[doc(cfg(target_arch = "mips64"))]
#[unstable(feature = "stdarch_mips", issue = "111198")]
pub mod mips64 {
pub use crate::core_arch::mips::*;
}
/// Platform-specific intrinsics for the `PowerPC` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "powerpc", doc))]
#[doc(cfg(target_arch = "powerpc"))]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub mod powerpc {
pub use crate::core_arch::powerpc::*;
}
/// Platform-specific intrinsics for the `PowerPC64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "powerpc64", doc))]
#[doc(cfg(target_arch = "powerpc64"))]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub mod powerpc64 {
pub use crate::core_arch::powerpc64::*;
}
/// Platform-specific intrinsics for the `NVPTX` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "nvptx64", doc))]
#[doc(cfg(target_arch = "nvptx64"))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub mod nvptx {
pub use crate::core_arch::nvptx::*;
}
/// Platform-specific intrinsics for the `loongarch` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "loongarch64", doc))]
#[doc(cfg(target_arch = "loongarch64"))]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub mod loongarch64 {
pub use crate::core_arch::loongarch64::*;
}
/// Platform-specific intrinsics for the `s390x` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "s390x", doc))]
#[doc(cfg(target_arch = "s390x"))]
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub mod s390x {
pub use crate::core_arch::s390x::*;
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))]
#[doc(cfg(any(target_arch = "x86", target_arch = "x86_64")))]
mod x86;
#[cfg(any(target_arch = "x86_64", doc))]
#[doc(cfg(target_arch = "x86_64"))]
mod x86_64;
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))]
#[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))]
mod aarch64;
#[cfg(any(target_arch = "arm", doc))]
#[doc(cfg(any(target_arch = "arm")))]
mod arm;
#[cfg(any(target_arch = "riscv32", doc))]
#[doc(cfg(any(target_arch = "riscv32")))]
mod riscv32;
#[cfg(any(target_arch = "riscv64", doc))]
#[doc(cfg(any(target_arch = "riscv64")))]
mod riscv64;
#[cfg(any(target_family = "wasm", doc))]
#[doc(cfg(target_family = "wasm"))]
mod wasm32;
#[cfg(any(target_arch = "mips", target_arch = "mips64", doc))]
#[doc(cfg(any(target_arch = "mips", target_arch = "mips64")))]
mod mips;
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64", doc))]
#[doc(cfg(any(target_arch = "powerpc", target_arch = "powerpc64")))]
mod powerpc;
#[cfg(any(target_arch = "powerpc64", doc))]
#[doc(cfg(target_arch = "powerpc64"))]
mod powerpc64;
#[cfg(any(target_arch = "nvptx64", doc))]
#[doc(cfg(target_arch = "nvptx64"))]
mod nvptx;
#[cfg(any(target_arch = "loongarch64", doc))]
#[doc(cfg(target_arch = "loongarch64"))]
mod loongarch64;
#[cfg(any(target_arch = "s390x", doc))]
#[doc(cfg(target_arch = "s390x"))]
mod s390x;

View file

@ -0,0 +1,236 @@
//! NVPTX intrinsics (experimental)
//!
//! These intrinsics form the foundation of the CUDA
//! programming model.
//!
//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also
//! the [LLVM NVPTX Backend documentation][llvm_docs].
//!
//! [cuda_c]:
//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
//! [llvm_docs]:
//! https://llvm.org/docs/NVPTXUsage.html
use crate::ffi::c_void;
mod packed;
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub use packed::*;
#[allow(improper_ctypes)]
unsafe extern "C" {
#[link_name = "llvm.nvvm.barrier0"]
fn syncthreads() -> ();
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
fn block_dim_x() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
fn block_dim_y() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
fn block_dim_z() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
fn block_idx_x() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
fn block_idx_y() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
fn block_idx_z() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
fn grid_dim_x() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
fn grid_dim_y() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
fn grid_dim_z() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
fn thread_idx_x() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
fn thread_idx_y() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
fn thread_idx_z() -> i32;
}
/// Synchronizes all threads in the block.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _syncthreads() -> () {
syncthreads()
}
/// x-th thread-block dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_dim_x() -> i32 {
block_dim_x()
}
/// y-th thread-block dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_dim_y() -> i32 {
block_dim_y()
}
/// z-th thread-block dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_dim_z() -> i32 {
block_dim_z()
}
/// x-th thread-block index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_idx_x() -> i32 {
block_idx_x()
}
/// y-th thread-block index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_idx_y() -> i32 {
block_idx_y()
}
/// z-th thread-block index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_idx_z() -> i32 {
block_idx_z()
}
/// x-th block-grid dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _grid_dim_x() -> i32 {
grid_dim_x()
}
/// y-th block-grid dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _grid_dim_y() -> i32 {
grid_dim_y()
}
/// z-th block-grid dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _grid_dim_z() -> i32 {
grid_dim_z()
}
/// x-th thread index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _thread_idx_x() -> i32 {
thread_idx_x()
}
/// y-th thread index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _thread_idx_y() -> i32 {
thread_idx_y()
}
/// z-th thread index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _thread_idx_z() -> i32 {
thread_idx_z()
}
/// Generates the trap instruction `TRAP`
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn trap() -> ! {
crate::intrinsics::abort()
}
// Basic CUDA syscall declarations.
unsafe extern "C" {
/// Print formatted output from a kernel to a host-side output stream.
///
/// Syscall arguments:
/// * `status`: The status value that is returned by `vprintf`.
/// * `format`: A pointer to the format specifier input (uses common `printf` format).
/// * `valist`: A pointer to the valist input.
///
/// ```
/// #[repr(C)]
/// struct PrintArgs(f32, f32, f32, i32);
///
/// vprintf(
/// "int(%f + %f) = int(%f) = %d\n".as_ptr(),
/// transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
/// );
/// ```
///
/// Sources:
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub fn vprintf(format: *const u8, valist: *const c_void) -> i32;
/// Allocate memory dynamically from a fixed-size heap in global memory.
///
/// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
/// from the device heap and returns a pointer to the allocated memory
/// or `NULL` if insufficient memory exists to fulfill the request.
///
/// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
///
/// The memory allocated by a given CUDA thread via `malloc()` remains allocated
/// for the lifetime of the CUDA context, or until it is explicitly released
/// by a call to `free()`. It can be used by any other CUDA threads
/// even from subsequent kernel launches.
///
/// Sources:
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
// FIXME(denzp): assign `malloc` and `nothrow` attributes.
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub fn malloc(size: usize) -> *mut c_void;
/// Free previously dynamically allocated memory.
///
/// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
/// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
/// the call to `free()` is ignored.
///
/// Any CUDA thread may free memory allocated by another thread, but care should be taken
/// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
/// with the same `ptr` has undefined behavior.
///
/// Sources:
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
// FIXME(denzp): assign `nothrow` attribute.
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub fn free(ptr: *mut c_void);
// Internal declaration of the syscall. Exported variant has
// the `char_size` parameter set to `1` (single char size in bytes).
fn __assertfail(
message: *const u8,
file: *const u8,
line: u32,
function: *const u8,
char_size: usize,
);
}
/// Syscall to be used whenever the *assert expression produces a `false` value*.
///
/// Syscall arguments:
/// * `message`: The pointer to the string that should be output.
/// * `file`: The pointer to the file name string associated with the assert.
/// * `line`: The line number associated with the assert.
/// * `function`: The pointer to the function name string associated with the assert.
///
/// Source:
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) {
__assertfail(message, file, line, function, 1)
}

View file

@ -0,0 +1,139 @@
//! NVPTX Packed data types (SIMD)
//!
//! Packed Data Types is what PTX calls SIMD types. See [PTX ISA (Packed Data Types)](https://docs.nvidia.com/cuda/parallel-thread-execution/#packed-data-types) for a full reference.
// Note: #[assert_instr] tests are not actually being run on nvptx due to being a `no_std` target incapable of running tests. Something like FileCheck would be appropriate for verifying the correct instruction is used.
use crate::intrinsics::simd::*;
#[allow(improper_ctypes)]
unsafe extern "C" {
#[link_name = "llvm.minimum.v2f16"]
fn llvm_f16x2_minimum(a: f16x2, b: f16x2) -> f16x2;
#[link_name = "llvm.maximum.v2f16"]
fn llvm_f16x2_maximum(a: f16x2, b: f16x2) -> f16x2;
}
types! {
#![unstable(feature = "stdarch_nvptx", issue = "111199")]
/// PTX-specific 32-bit wide floating point (f16 x 2) vector type
pub struct f16x2(2 x f16);
}
/// Add two values, round to nearest even
///
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add>
///
/// Corresponds to the CUDA C intrinsics:
/// - [`__hadd2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g921c795176eaa31265bd80ef4fe4b8e6)
/// - [`__hadd2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g6cd8ddb2c3d670e1a10c3eb2e7644f82)
#[inline]
#[cfg_attr(test, assert_instr(add.rn.f16x22))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn f16x2_add(a: f16x2, b: f16x2) -> f16x2 {
simd_add(a, b)
}
/// Subtract two values, round to nearest even
///
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub>
///
/// Corresponds to the CUDA C intrinsics:
/// - [`__hsub2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1ga5536c9c3d853d8c8b9de60e18b41e54)
/// - [`__hsub2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g8adc164c68d553354f749f0f0645a874)
#[inline]
#[cfg_attr(test, assert_instr(sub.rn.f16x2))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn f16x2_sub(a: f16x2, b: f16x2) -> f16x2 {
simd_sub(a, b)
}
/// Multiply two values, round to nearest even
///
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-mul>
///
/// Corresponds to the CUDA C intrinsics:
/// - [`__hmul2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g70de3f2ee48babe4e0969397ac17708e)
/// - [`__hmul2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g99f8fe23a4b4c6898d6faf999afaa76e)
#[inline]
#[cfg_attr(test, assert_instr(mul.rn.f16x2))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn f16x2_mul(a: f16x2, b: f16x2) -> f16x2 {
simd_mul(a, b)
}
/// Fused multiply-add, round to nearest even
///
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-fma>
///
/// Corresponds to the CUDA C intrinsics:
/// - [`__fma2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g43628ba21ded8b1e188a367348008dab)
/// - [`__fma2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g43628ba21ded8b1e188a367348008dab)
#[inline]
#[cfg_attr(test, assert_instr(fma.rn.f16x2))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn f16x2_fma(a: f16x2, b: f16x2, c: f16x2) -> f16x2 {
simd_fma(a, b, c)
}
/// Arithmetic negate
///
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-neg>
///
/// Corresponds to the CUDA C intrinsic [`__hmin2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g9e17a33f96061804166f3fbd395422b6)
#[inline]
#[cfg_attr(test, assert_instr(neg.f16x2))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn f16x2_neg(a: f16x2) -> f16x2 {
simd_neg(a)
}
/// Find the minimum of two values
///
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-min>
///
/// Corresponds to the CUDA C intrinsic [`__hmin2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g9e17a33f96061804166f3fbd395422b6)
#[inline]
#[cfg_attr(test, assert_instr(min.f16x2))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn f16x2_min(a: f16x2, b: f16x2) -> f16x2 {
simd_fmin(a, b)
}
/// Find the minimum of two values, NaNs pass through.
///
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-min>
///
/// Corresponds to the CUDA C intrinsic [`__hmin2_nan`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g8bb8f58e9294cc261d2f42c4d5aecd6b)
#[inline]
#[cfg_attr(test, assert_instr(min.NaN.f16x2))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn f16x2_min_nan(a: f16x2, b: f16x2) -> f16x2 {
llvm_f16x2_minimum(a, b)
}
/// Find the maximum of two values
///
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-max>
///
/// Corresponds to the CUDA C intrinsic [`__hmax2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g59fc7fc7975d8127b202444a05e57e3d)
#[inline]
#[cfg_attr(test, assert_instr(max.f16x2))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn f16x2_max(a: f16x2, b: f16x2) -> f16x2 {
simd_fmax(a, b)
}
/// Find the maximum of two values, NaNs pass through.
///
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-max>
///
/// Corresponds to the CUDA C intrinsic [`__hmax2_nan`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g41623db7850e3074fd9daa80a14c3897)
#[inline]
#[cfg_attr(test, assert_instr(max.NaN.f16x2))]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn f16x2_max_nan(a: f16x2, b: f16x2) -> f16x2 {
llvm_f16x2_maximum(a, b)
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,315 @@
macro_rules! test_impl {
($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr:ident]) => {
#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr($instr))]
pub unsafe fn $fun ($($v : $ty),*) -> $r {
$call ($($v),*)
}
};
($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr_altivec:ident / $instr_vsx:ident]) => {
test_impl! { $fun ($($v : $ty),*) -> $r [$call, $instr_altivec / $instr_vsx / $instr_vsx] }
};
($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr_altivec:ident / $instr_vsx:ident / $instr_pwr9:ident]) => {
#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(all(test, not(target_feature="vsx"), not(target_feature = "power9-vector")), assert_instr($instr_altivec))]
#[cfg_attr(all(test, target_feature="vsx", not(target_feature = "power9-vector")), assert_instr($instr_vsx))]
#[cfg_attr(all(test, not(target_feature="vsx"), target_feature = "power9-vector"), assert_instr($instr_pwr9))]
pub unsafe fn $fun ($($v : $ty),*) -> $r {
$call ($($v),*)
}
}
}
#[allow(unknown_lints, unused_macro_rules)]
macro_rules! impl_vec_trait {
([$Trait:ident $m:ident] $fun:ident ($a:ty)) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl $Trait for $a {
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn $m(self) -> Self {
$fun(transmute(self))
}
}
};
([$Trait:ident $m:ident] $fun:ident ($a:ty) -> $r:ty) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl $Trait for $a {
type Result = $r;
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn $m(self) -> Self::Result {
$fun(transmute(self))
}
}
};
([$Trait:ident $m:ident]+ $fun:ident ($a:ty) -> $r:ty) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl $Trait for $a {
type Result = $r;
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn $m(self) -> Self::Result {
transmute($fun(transmute(self)))
}
}
};
([$Trait:ident $m:ident] 1 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident, $sf: ident)) => {
impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char) -> vector_unsigned_char }
impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char) -> vector_signed_char }
impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short) -> vector_unsigned_short }
impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short) -> vector_signed_short }
impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int) -> vector_unsigned_int }
impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int) -> vector_signed_int }
impl_vec_trait!{ [$Trait $m] $sf (vector_float) -> vector_float }
};
([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty) -> $r:ty) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl $Trait<$b> for $a {
type Result = $r;
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn $m(self, b: $b) -> Self::Result {
$fun(transmute(self), transmute(b))
}
}
};
([$Trait:ident $m:ident]+ $fun:ident ($a:ty, $b:ty) -> $r:ty) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl $Trait<$b> for $a {
type Result = $r;
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn $m(self, b: $b) -> Self::Result {
transmute($fun(transmute(self), transmute(b)))
}
}
};
([$Trait:ident $m:ident] $fun:ident ($a:ty, ~$b:ty) -> $r:ty) => {
impl_vec_trait!{ [$Trait $m] $fun ($a, $a) -> $r }
impl_vec_trait!{ [$Trait $m] $fun ($a, $b) -> $r }
impl_vec_trait!{ [$Trait $m] $fun ($b, $a) -> $r }
};
([$Trait:ident $m:ident] ~($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => {
impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, ~vector_bool_char) -> vector_unsigned_char }
impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, ~vector_bool_char) -> vector_signed_char }
impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, ~vector_bool_short) -> vector_unsigned_short }
impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, ~vector_bool_short) -> vector_signed_short }
impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, ~vector_bool_int) -> vector_unsigned_int }
impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, ~vector_bool_int) -> vector_signed_int }
};
([$Trait:ident $m:ident] ~($fn:ident)) => {
impl_vec_trait!{ [$Trait $m] ~($fn, $fn, $fn, $fn, $fn, $fn) }
};
([$Trait:ident $m:ident] 2 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => {
impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char }
impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, vector_signed_char) -> vector_signed_char }
impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short }
impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, vector_signed_short) -> vector_signed_short }
impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int }
impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, vector_signed_int) -> vector_signed_int }
};
([$Trait:ident $m:ident] 2 ($fn:ident)) => {
impl_vec_trait!{ [$Trait $m] ($fn, $fn, $fn, $fn, $fn, $fn) }
};
([$Trait:ident $m:ident]+ 2b ($b:ident, $h:ident, $w:ident)) => {
impl_vec_trait!{ [$Trait $m]+ $b (vector_bool_char, vector_bool_char) -> vector_bool_char }
impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char }
impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_signed_char) -> vector_signed_char }
impl_vec_trait!{ [$Trait $m]+ $h (vector_bool_short, vector_bool_short) -> vector_bool_short }
impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short }
impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_signed_short) -> vector_signed_short }
impl_vec_trait!{ [$Trait $m]+ $w (vector_bool_int, vector_bool_int) -> vector_bool_int }
impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int }
impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_signed_int) -> vector_signed_int }
};
([$Trait:ident $m:ident]+ 2b ($fn:ident)) => {
impl_vec_trait!{ [$Trait $m]+ 2b ($fn, $fn, $fn) }
};
}
macro_rules! s_t_l {
(i32x4) => {
vector_signed_int
};
(i16x8) => {
vector_signed_short
};
(i8x16) => {
vector_signed_char
};
(u32x4) => {
vector_unsigned_int
};
(u16x8) => {
vector_unsigned_short
};
(u8x16) => {
vector_unsigned_char
};
(f32x4) => {
vector_float
};
}
macro_rules! t_t_l {
(i32) => {
vector_signed_int
};
(i16) => {
vector_signed_short
};
(i8) => {
vector_signed_char
};
(u32) => {
vector_unsigned_int
};
(u16) => {
vector_unsigned_short
};
(u8) => {
vector_unsigned_char
};
(f32) => {
vector_float
};
}
macro_rules! t_t_s {
(i32) => {
i32x4
};
(i16) => {
i16x8
};
(i8) => {
i8x16
};
(u32) => {
u32x4
};
(u16) => {
u16x8
};
(u8) => {
u8x16
};
(f32) => {
f32x4
};
}
macro_rules! t_u {
(vector_bool_char) => {
vector_unsigned_char
};
(vector_bool_short) => {
vector_unsigned_short
};
(vector_bool_int) => {
vector_unsigned_int
};
(vector_unsigned_char) => {
vector_unsigned_char
};
(vector_unsigned_short) => {
vector_unsigned_short
};
(vector_unsigned_int) => {
vector_unsigned_int
};
(vector_signed_char) => {
vector_unsigned_char
};
(vector_signed_short) => {
vector_unsigned_short
};
(vector_signed_int) => {
vector_unsigned_int
};
(vector_float) => {
vector_unsigned_int
};
}
macro_rules! t_b {
(vector_bool_char) => {
vector_bool_char
};
(vector_bool_short) => {
vector_bool_short
};
(vector_bool_int) => {
vector_bool_int
};
(vector_signed_char) => {
vector_bool_char
};
(vector_signed_short) => {
vector_bool_short
};
(vector_signed_int) => {
vector_bool_int
};
(vector_unsigned_char) => {
vector_bool_char
};
(vector_unsigned_short) => {
vector_bool_short
};
(vector_unsigned_int) => {
vector_bool_int
};
(vector_float) => {
vector_bool_int
};
}
macro_rules! impl_from {
($s: ident) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl From<$s> for s_t_l!($s) {
fn from (v: $s) -> Self {
unsafe {
transmute(v)
}
}
}
};
($($s: ident),*) => {
$(
impl_from! { $s }
)*
};
}
macro_rules! impl_neg {
($s: ident : $zero: expr) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl crate::ops::Neg for s_t_l!($s) {
type Output = s_t_l!($s);
fn neg(self) -> Self::Output {
unsafe { simd_neg(self) }
}
}
};
}
pub(crate) use impl_from;
pub(crate) use impl_neg;
pub(crate) use impl_vec_trait;
pub(crate) use s_t_l;
pub(crate) use t_b;
pub(crate) use t_t_l;
pub(crate) use t_t_s;
pub(crate) use t_u;
pub(crate) use test_impl;

View file

@ -0,0 +1,22 @@
//! PowerPC intrinsics
pub(crate) mod macros;
mod altivec;
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub use self::altivec::*;
mod vsx;
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub use self::vsx::*;
#[cfg(test)]
use stdarch_test::assert_instr;
/// Generates the trap instruction `TRAP`
#[cfg_attr(test, assert_instr(trap))]
#[inline]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub unsafe fn trap() -> ! {
crate::intrinsics::abort()
}

View file

@ -0,0 +1,240 @@
//! PowerPC Vector Scalar eXtensions (VSX) intrinsics.
//!
//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA
//! NVlink)] and [POWER ISA v3.0B (for POWER9)].
//!
//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u
//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv
#![allow(non_camel_case_types)]
use crate::core_arch::powerpc::*;
#[cfg(test)]
use stdarch_test::assert_instr;
use crate::mem::transmute;
types! {
#![unstable(feature = "stdarch_powerpc", issue = "111145")]
// pub struct vector_Float16 = f16x8;
/// PowerPC-specific 128-bit wide vector of two packed `i64`
pub struct vector_signed_long(2 x i64);
/// PowerPC-specific 128-bit wide vector of two packed `u64`
pub struct vector_unsigned_long(2 x u64);
/// PowerPC-specific 128-bit wide vector mask of two `i64`
pub struct vector_bool_long(2 x i64);
/// PowerPC-specific 128-bit wide vector of two packed `f64`
pub struct vector_double(2 x f64);
// pub struct vector_signed_long_long = vector_signed_long;
// pub struct vector_unsigned_long_long = vector_unsigned_long;
// pub struct vector_bool_long_long = vector_bool_long;
// pub struct vector_signed___int128 = i128x1;
// pub struct vector_unsigned___int128 = i128x1;
}
#[allow(improper_ctypes)]
unsafe extern "C" {
#[link_name = "llvm.ppc.altivec.vperm"]
fn vperm(
a: vector_signed_int,
b: vector_signed_int,
c: vector_unsigned_char,
) -> vector_signed_int;
}
mod sealed {
use super::*;
use crate::core_arch::simd::*;
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub trait VectorPermDI {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self;
}
// xxpermdi has an big-endian bias and extended mnemonics
#[inline]
#[target_feature(enable = "vsx")]
#[cfg_attr(all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0))]
#[cfg_attr(all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0))]
unsafe fn xxpermdi(a: vector_signed_long, b: vector_signed_long, dm: u8) -> vector_signed_long {
let a: i64x2 = transmute(a);
let b: i64x2 = transmute(b);
let r: i64x2 = match dm & 0b11 {
0 => simd_shuffle!(a, b, [0b00, 0b10]),
1 => simd_shuffle!(a, b, [0b01, 0b10]),
2 => simd_shuffle!(a, b, [0b00, 0b11]),
_ => simd_shuffle!(a, b, [0b01, 0b11]),
};
transmute(r)
}
macro_rules! vec_xxpermdi {
{$impl: ident} => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl VectorPermDI for $impl {
#[inline]
#[target_feature(enable = "vsx")]
unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self {
transmute(xxpermdi(transmute(self), transmute(b), dm))
}
}
}
}
vec_xxpermdi! { vector_unsigned_long }
vec_xxpermdi! { vector_signed_long }
vec_xxpermdi! { vector_bool_long }
vec_xxpermdi! { vector_double }
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub trait VectorMergeEo {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
unsafe fn vec_mergee(self, b: Self) -> Self;
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
unsafe fn vec_mergeo(self, b: Self) -> Self;
}
#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(
all(test, target_endian = "little", target_feature = "power8-vector"),
assert_instr(vmrgow)
)]
#[cfg_attr(
all(test, target_endian = "big", target_feature = "power8-vector"),
assert_instr(vmrgew)
)]
unsafe fn mergee(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
let p = transmute(u8x16::new(
0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
0x1A, 0x1B,
));
vec_perm(a, b, p)
}
#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(
all(test, target_endian = "little", target_feature = "power8-vector"),
assert_instr(vmrgew)
)]
#[cfg_attr(
all(test, target_endian = "big", target_feature = "power8-vector"),
assert_instr(vmrgow)
)]
unsafe fn mergeo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
let p = transmute(u8x16::new(
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D,
0x1E, 0x1F,
));
vec_perm(a, b, p)
}
macro_rules! vec_mergeeo {
{ $impl: ident, $even: ident, $odd: ident } => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl VectorMergeEo for $impl {
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_mergee(self, b: Self) -> Self {
transmute(mergee(transmute(self), transmute(b)))
}
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_mergeo(self, b: Self) -> Self {
transmute(mergeo(transmute(self), transmute(b)))
}
}
}
}
vec_mergeeo! { vector_signed_int, mergee, mergeo }
vec_mergeeo! { vector_unsigned_int, mergee, mergeo }
vec_mergeeo! { vector_bool_int, mergee, mergeo }
vec_mergeeo! { vector_float, mergee, mergeo }
}
/// Vector permute.
#[inline]
#[target_feature(enable = "vsx")]
//#[rustc_legacy_const_generics(2)]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub unsafe fn vec_xxpermdi<T, const DM: i32>(a: T, b: T) -> T
where
T: sealed::VectorPermDI,
{
static_assert_uimm_bits!(DM, 2);
a.vec_xxpermdi(b, DM as u8)
}
/// Vector Merge Even
///
/// ## Purpose
/// Merges the even-numbered values from two vectors.
///
/// ## Result value
/// The even-numbered elements of a are stored into the even-numbered elements of r.
/// The even-numbered elements of b are stored into the odd-numbered elements of r.
#[inline]
#[target_feature(enable = "altivec")]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub unsafe fn vec_mergee<T>(a: T, b: T) -> T
where
T: sealed::VectorMergeEo,
{
a.vec_mergee(b)
}
/// Vector Merge Odd
///
/// ## Purpose
/// Merges the odd-numbered values from two vectors.
///
/// ## Result value
/// The odd-numbered elements of a are stored into the even-numbered elements of r.
/// The odd-numbered elements of b are stored into the odd-numbered elements of r.
#[inline]
#[target_feature(enable = "altivec")]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub unsafe fn vec_mergeo<T>(a: T, b: T) -> T
where
T: sealed::VectorMergeEo,
{
a.vec_mergeo(b)
}
#[cfg(test)]
mod tests {
#[cfg(target_arch = "powerpc")]
use crate::core_arch::arch::powerpc::*;
#[cfg(target_arch = "powerpc64")]
use crate::core_arch::arch::powerpc64::*;
use crate::core_arch::simd::*;
use crate::mem::transmute;
use stdarch_test::simd_test;
macro_rules! test_vec_xxpermdi {
{$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
#[simd_test(enable = "vsx")]
unsafe fn $name() {
let a: $longtype = transmute($shorttype::new($($a),+, $($b),+));
let b = transmute($shorttype::new($($c),+, $($d),+));
assert_eq!($shorttype::new($($a),+, $($c),+), transmute(vec_xxpermdi::<_, 0>(a, b)));
assert_eq!($shorttype::new($($b),+, $($c),+), transmute(vec_xxpermdi::<_, 1>(a, b)));
assert_eq!($shorttype::new($($a),+, $($d),+), transmute(vec_xxpermdi::<_, 2>(a, b)));
assert_eq!($shorttype::new($($b),+, $($d),+), transmute(vec_xxpermdi::<_, 3>(a, b)));
}
}
}
test_vec_xxpermdi! {test_vec_xxpermdi_u64x2, u64x2, vector_unsigned_long, [0], [1], [2], [3]}
test_vec_xxpermdi! {test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]}
test_vec_xxpermdi! {test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]}
test_vec_xxpermdi! {test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]}
}

View file

@ -0,0 +1,14 @@
//! PowerPC 64
//!
//! The reference is the [64-Bit ELF V2 ABI Specification - Power
//! Architecture].
//!
//! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
mod vsx;
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub use crate::core_arch::powerpc::*;
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub use self::vsx::*;

View file

@ -0,0 +1,156 @@
//! PowerPC Vector Scalar eXtensions (VSX) intrinsics.
//!
//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA
//! NVlink)] and [POWER ISA v3.0B (for POWER9)].
//!
//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u
//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv
#![allow(non_camel_case_types)]
use crate::core_arch::powerpc::macros::*;
use crate::core_arch::powerpc::*;
#[cfg(test)]
use stdarch_test::assert_instr;
use crate::mem::transmute;
#[allow(improper_ctypes)]
unsafe extern "C" {
#[link_name = "llvm.ppc.vsx.lxvl"]
fn lxvl(a: *const u8, l: usize) -> vector_signed_int;
#[link_name = "llvm.ppc.vsx.stxvl"]
fn stxvl(v: vector_signed_int, a: *mut u8, l: usize);
}
mod sealed {
use super::*;
#[inline]
#[target_feature(enable = "power9-vector")]
#[cfg_attr(test, assert_instr(lxvl))]
unsafe fn vec_lxvl(p: *const u8, l: usize) -> vector_signed_int {
lxvl(p, l << 56)
}
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub trait VectorXloads {
type Result;
unsafe fn vec_xl_len(self, l: usize) -> Self::Result;
}
macro_rules! impl_vsx_loads {
($ty:ident) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl VectorXloads for *const $ty {
type Result = t_t_l!($ty);
#[inline]
#[target_feature(enable = "power9-vector")]
unsafe fn vec_xl_len(self, l: usize) -> Self::Result {
transmute(vec_lxvl(self as *const u8, l))
}
}
};
}
impl_vsx_loads! { i8 }
impl_vsx_loads! { u8 }
impl_vsx_loads! { i16 }
impl_vsx_loads! { u16 }
impl_vsx_loads! { i32 }
impl_vsx_loads! { u32 }
impl_vsx_loads! { f32 }
#[inline]
#[target_feature(enable = "power9-vector")]
#[cfg_attr(test, assert_instr(stxvl))]
unsafe fn vec_stxvl(v: vector_signed_int, a: *mut u8, l: usize) {
stxvl(v, a, l << 56);
}
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub trait VectorXstores {
type Out;
unsafe fn vec_xst_len(self, p: Self::Out, l: usize);
}
macro_rules! impl_stores {
($ty:ident) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl VectorXstores for t_t_l!($ty) {
type Out = *mut $ty;
#[inline]
#[target_feature(enable = "power9-vector")]
unsafe fn vec_xst_len(self, a: Self::Out, l: usize) {
stxvl(transmute(self), a as *mut u8, l)
}
}
};
}
impl_stores! { i8 }
impl_stores! { u8 }
impl_stores! { i16 }
impl_stores! { u16 }
impl_stores! { i32 }
impl_stores! { u32 }
impl_stores! { f32 }
}
/// Vector Load with Length
///
/// ## Purpose
/// Loads a vector of a specified byte length.
///
/// ## Result value
/// Loads the number of bytes specified by b from the address specified in a.
/// Initializes elements in order from the byte stream (as defined by the endianness of the
/// target). Any bytes of elements that cannot be initialized from the number of loaded bytes have
/// a zero value.
///
/// Between 0 and 16 bytes, inclusive, will be loaded. The length is specified by the
/// least-significant byte of b, as min (b mod 256, 16). The behavior is undefined if the length
/// argument is outside of the range 0255, or if it is not a multiple of the vector element size.
///
/// ## Notes
/// vec_xl_len should not be used to load from cache-inhibited memory.
#[inline]
#[target_feature(enable = "power9-vector")]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub unsafe fn vec_xl_len<T>(p: T, len: usize) -> <T as sealed::VectorXloads>::Result
where
T: sealed::VectorXloads,
{
p.vec_xl_len(len)
}
/// Vector Store with Length
///
/// ## Purpose
///
/// Stores a vector of a specified byte length.
///
/// ## Operation
///
/// Stores the number of bytes specified by c of the vector a to the address specified
/// in b. The bytes are obtained starting from the lowest-numbered byte of the lowest-numbered
/// element (as defined by the endianness of the target). All bytes of an element are accessed
/// before proceeding to the next higher element.
///
/// Between 0 and 16 bytes, inclusive, will be stored. The length is specified by the
/// least-significant byte of c, as min (c mod 256, 16). The behavior is undefined if the length
/// argument is outside of the range 0255, or if it is not a multiple of the vector element size.
///
/// ## Notes
/// vec_xst_len should not be used to store to cache-inhibited memory.
#[inline]
#[target_feature(enable = "power9-vector")]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub unsafe fn vec_xst_len<T>(v: T, a: <T as sealed::VectorXstores>::Out, l: usize)
where
T: sealed::VectorXstores,
{
v.vec_xst_len(a, l)
}

View file

@ -0,0 +1,6 @@
//! RISC-V RV32 specific intrinsics
mod zk;
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub use zk::*;

View file

@ -0,0 +1,331 @@
#[cfg(test)]
use stdarch_test::assert_instr;
unsafe extern "unadjusted" {
#[link_name = "llvm.riscv.aes32esi"]
fn _aes32esi(rs1: i32, rs2: i32, bs: i32) -> i32;
#[link_name = "llvm.riscv.aes32esmi"]
fn _aes32esmi(rs1: i32, rs2: i32, bs: i32) -> i32;
#[link_name = "llvm.riscv.aes32dsi"]
fn _aes32dsi(rs1: i32, rs2: i32, bs: i32) -> i32;
#[link_name = "llvm.riscv.aes32dsmi"]
fn _aes32dsmi(rs1: i32, rs2: i32, bs: i32) -> i32;
#[link_name = "llvm.riscv.zip.i32"]
fn _zip(rs1: i32) -> i32;
#[link_name = "llvm.riscv.unzip.i32"]
fn _unzip(rs1: i32) -> i32;
#[link_name = "llvm.riscv.sha512sig0h"]
fn _sha512sig0h(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sig0l"]
fn _sha512sig0l(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sig1h"]
fn _sha512sig1h(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sig1l"]
fn _sha512sig1l(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sum0r"]
fn _sha512sum0r(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sum1r"]
fn _sha512sum1r(rs1: i32, rs2: i32) -> i32;
}
/// AES final round encryption instruction for RV32.
///
/// This instruction sources a single byte from rs2 according to bs. To this it applies the
/// forward AES SBox operation, before XORing the result with rs1. This instruction must
/// always be implemented such that its execution latency does not depend on the data being
/// operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.3
///
/// # Note
///
/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
/// used.
#[target_feature(enable = "zkne")]
#[rustc_legacy_const_generics(2)]
// See #1464
// #[cfg_attr(test, assert_instr(aes32esi, BS = 0))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes32esi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
static_assert!(BS < 4);
unsafe { _aes32esi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
}
/// AES middle round encryption instruction for RV32 with.
///
/// This instruction sources a single byte from rs2 according to bs. To this it applies the
/// forward AES SBox operation, and a partial forward MixColumn, before XORing the result with
/// rs1. This instruction must always be implemented such that its execution latency does not
/// depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.4
///
/// # Note
///
/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
/// used.
#[target_feature(enable = "zkne")]
#[rustc_legacy_const_generics(2)]
// See #1464
// #[cfg_attr(test, assert_instr(aes32esmi, BS = 0))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes32esmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
static_assert!(BS < 4);
unsafe { _aes32esmi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
}
/// AES final round decryption instruction for RV32.
///
/// This instruction sources a single byte from rs2 according to bs. To this it applies the
/// inverse AES SBox operation, and XORs the result with rs1. This instruction must always be
/// implemented such that its execution latency does not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.1
///
/// # Note
///
/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
/// used.
#[target_feature(enable = "zknd")]
#[rustc_legacy_const_generics(2)]
// See #1464
// #[cfg_attr(test, assert_instr(aes32dsi, BS = 0))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes32dsi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
static_assert!(BS < 4);
unsafe { _aes32dsi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
}
/// AES middle round decryption instruction for RV32.
///
/// This instruction sources a single byte from rs2 according to bs. To this it applies the
/// inverse AES SBox operation, and a partial inverse MixColumn, before XORing the result with
/// rs1. This instruction must always be implemented such that its execution latency does not
/// depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.2
///
/// # Note
///
/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
/// used.
#[target_feature(enable = "zknd")]
#[rustc_legacy_const_generics(2)]
// See #1464
// #[cfg_attr(test, assert_instr(aes32dsmi, BS = 0))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
static_assert!(BS < 4);
unsafe { _aes32dsmi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
}
/// Place upper/lower halves of the source register into odd/even bits of the destination
/// respectivley.
///
/// This instruction places bits in the low half of the source register into the even bit
/// positions of the destination, and bits in the high half of the source register into the odd
/// bit positions of the destination. It is the inverse of the unzip instruction. This
/// instruction is available only on RV32.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.49
#[target_feature(enable = "zbkb")]
// See #1464
// #[cfg_attr(test, assert_instr(zip))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn zip(rs: u32) -> u32 {
unsafe { _zip(rs as i32) as u32 }
}
/// Place odd and even bits of the source word into upper/lower halves of the destination.
///
/// This instruction places the even bits of the source register into the low half of the
/// destination, and the odd bits of the source into the high bits of the destination. It is
/// the inverse of the zip instruction. This instruction is available only on RV32.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.45
#[target_feature(enable = "zbkb")]
#[cfg_attr(test, assert_instr(unzip))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn unzip(rs: u32) -> u32 {
unsafe { _unzip(rs as i32) as u32 }
}
/// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash
/// function \[49\] (Section 4.1.3).
///
/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
/// SHA2-512 hash function in conjunction with the sha512sig0l instruction. The transform is a
/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
/// registers. This instruction must always be implemented such that its execution latency does
/// not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.31
#[target_feature(enable = "zknh")]
// See #1464
// #[cfg_attr(test, assert_instr(sha512sig0h))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
unsafe { _sha512sig0h(rs1 as i32, rs2 as i32) as u32 }
}
/// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function
/// \[49\] (Section 4.1.3).
///
/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
/// SHA2-512 hash function in conjunction with the sha512sig0h instruction. The transform is a
/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
/// registers. This instruction must always be implemented such that its execution latency does
/// not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.32
#[target_feature(enable = "zknh")]
// See #1464
// #[cfg_attr(test, assert_instr(sha512sig0l))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
unsafe { _sha512sig0l(rs1 as i32, rs2 as i32) as u32 }
}
/// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash
/// function \[49\] (Section 4.1.3).
///
/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
/// SHA2-512 hash function in conjunction with the sha512sig1l instruction. The transform is a
/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
/// registers. This instruction must always be implemented such that its execution latency does
/// not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.33
#[target_feature(enable = "zknh")]
// See #1464
// #[cfg_attr(test, assert_instr(sha512sig1h))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
unsafe { _sha512sig1h(rs1 as i32, rs2 as i32) as u32 }
}
/// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function
/// \[49\] (Section 4.1.3).
///
/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
/// SHA2-512 hash function in conjunction with the sha512sig1h instruction. The transform is a
/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
/// registers. This instruction must always be implemented such that its execution latency does
/// not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.34
#[target_feature(enable = "zknh")]
#[cfg_attr(test, assert_instr(sha512sig1l))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
unsafe { _sha512sig1l(rs1 as i32, rs2 as i32) as u32 }
}
/// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section
/// 4.1.3).
///
/// This instruction is implemented on RV32 only. Used to compute the Sum0 transform of the
/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
/// output is represented by two 32-bit registers. This instruction must always be implemented
/// such that its execution latency does not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.35
#[target_feature(enable = "zknh")]
// See #1464
// #[cfg_attr(test, assert_instr(sha512sum0r))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
unsafe { _sha512sum0r(rs1 as i32, rs2 as i32) as u32 }
}
/// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section
/// 4.1.3).
///
/// This instruction is implemented on RV32 only. Used to compute the Sum1 transform of the
/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
/// output is represented by two 32-bit registers. This instruction must always be implemented
/// such that its execution latency does not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.36
#[target_feature(enable = "zknh")]
// See #1464
// #[cfg_attr(test, assert_instr(sha512sum1r))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sum1r(rs1: u32, rs2: u32) -> u32 {
unsafe { _sha512sum1r(rs1 as i32, rs2 as i32) as u32 }
}

View file

@ -0,0 +1,57 @@
//! RISC-V RV64 specific intrinsics
use crate::arch::asm;
mod zk;
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub use zk::*;
/// Loads virtual machine memory by unsigned word integer
///
/// This instruction performs an explicit memory access as though `V=1`;
/// i.e., with the address translation and protection, and the endianness, that apply to memory
/// accesses in either VS-mode or VU-mode.
///
/// This operation is not available under RV32 base instruction set.
///
/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.WU`
/// instruction which is effectively a dereference to any memory address.
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub unsafe fn hlv_wu(src: *const u32) -> u32 {
let value: u32;
asm!(".insn i 0x73, 0x4, {}, {}, 0x681", out(reg) value, in(reg) src, options(readonly, nostack));
value
}
/// Loads virtual machine memory by double integer
///
/// This instruction performs an explicit memory access as though `V=1`;
/// i.e., with the address translation and protection, and the endianness, that apply to memory
/// accesses in either VS-mode or VU-mode.
///
/// This operation is not available under RV32 base instruction set.
///
/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.D`
/// instruction which is effectively a dereference to any memory address.
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub unsafe fn hlv_d(src: *const i64) -> i64 {
let value: i64;
asm!(".insn i 0x73, 0x4, {}, {}, 0x6C0", out(reg) value, in(reg) src, options(readonly, nostack));
value
}
/// Stores virtual machine memory by double integer
///
/// This instruction performs an explicit memory access as though `V=1`;
/// i.e., with the address translation and protection, and the endianness, that apply to memory
/// accesses in either VS-mode or VU-mode.
///
/// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.D`
/// instruction which is effectively a dereference to any memory address.
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub unsafe fn hsv_d(dst: *mut i64, src: i64) {
asm!(".insn r 0x73, 0x4, 0x37, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
}

View file

@ -0,0 +1,265 @@
#[cfg(test)]
use stdarch_test::assert_instr;
unsafe extern "unadjusted" {
#[link_name = "llvm.riscv.aes64es"]
fn _aes64es(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.aes64esm"]
fn _aes64esm(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.aes64ds"]
fn _aes64ds(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.aes64dsm"]
fn _aes64dsm(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.aes64ks1i"]
fn _aes64ks1i(rs1: i64, rnum: i32) -> i64;
#[link_name = "llvm.riscv.aes64ks2"]
fn _aes64ks2(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.aes64im"]
fn _aes64im(rs1: i64) -> i64;
#[link_name = "llvm.riscv.sha512sig0"]
fn _sha512sig0(rs1: i64) -> i64;
#[link_name = "llvm.riscv.sha512sig1"]
fn _sha512sig1(rs1: i64) -> i64;
#[link_name = "llvm.riscv.sha512sum0"]
fn _sha512sum0(rs1: i64) -> i64;
#[link_name = "llvm.riscv.sha512sum1"]
fn _sha512sum1(rs1: i64) -> i64;
}
/// AES final round encryption instruction for RV64.
///
/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
/// of the next round output, applying the ShiftRows and SubBytes steps. This instruction must
/// always be implemented such that its execution latency does not depend on the data being
/// operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.7
#[target_feature(enable = "zkne")]
#[cfg_attr(test, assert_instr(aes64es))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes64es(rs1: u64, rs2: u64) -> u64 {
unsafe { _aes64es(rs1 as i64, rs2 as i64) as u64 }
}
/// AES middle round encryption instruction for RV64.
///
/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
/// of the next round output, applying the ShiftRows, SubBytes and MixColumns steps. This
/// instruction must always be implemented such that its execution latency does not depend on
/// the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.8
#[target_feature(enable = "zkne")]
#[cfg_attr(test, assert_instr(aes64esm))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes64esm(rs1: u64, rs2: u64) -> u64 {
unsafe { _aes64esm(rs1 as i64, rs2 as i64) as u64 }
}
/// AES final round decryption instruction for RV64.
///
/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
/// of the next round output, applying the Inverse ShiftRows and SubBytes steps. This
/// instruction must always be implemented such that its execution latency does not depend on
/// the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.5
#[target_feature(enable = "zknd")]
#[cfg_attr(test, assert_instr(aes64ds))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes64ds(rs1: u64, rs2: u64) -> u64 {
unsafe { _aes64ds(rs1 as i64, rs2 as i64) as u64 }
}
/// AES middle round decryption instruction for RV64.
///
/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
/// of the next round output, applying the Inverse ShiftRows, SubBytes and MixColumns steps.
/// This instruction must always be implemented such that its execution latency does not depend
/// on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.6
#[target_feature(enable = "zknd")]
#[cfg_attr(test, assert_instr(aes64dsm))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
unsafe { _aes64dsm(rs1 as i64, rs2 as i64) as u64 }
}
/// This instruction implements part of the KeySchedule operation for the AES Block cipher
/// involving the SBox operation.
///
/// This instruction implements the rotation, SubBytes and Round Constant addition steps of the
/// AES block cipher Key Schedule. This instruction must always be implemented such that its
/// execution latency does not depend on the data being operated on. Note that rnum must be in
/// the range 0x0..0xA. The values 0xB..0xF are reserved.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.10
///
/// # Note
///
/// The `RNUM` parameter is expected to be a constant value inside the range of `0..=10`.
#[target_feature(enable = "zkne", enable = "zknd")]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
static_assert!(RNUM <= 10);
unsafe { _aes64ks1i(rs1 as i64, RNUM as i32) as u64 }
}
/// This instruction implements part of the KeySchedule operation for the AES Block cipher.
///
/// This instruction implements the additional XORing of key words as part of the AES block
/// cipher Key Schedule. This instruction must always be implemented such that its execution
/// latency does not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.11
#[target_feature(enable = "zkne", enable = "zknd")]
#[cfg_attr(test, assert_instr(aes64ks2))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
unsafe { _aes64ks2(rs1 as i64, rs2 as i64) as u64 }
}
/// This instruction accelerates the inverse MixColumns step of the AES Block Cipher, and is used to aid creation of
/// the decryption KeySchedule.
///
/// The instruction applies the inverse MixColumns transformation to two columns of the state array, packed
/// into a single 64-bit register. It is used to create the inverse cipher KeySchedule, according to the equivalent
/// inverse cipher construction in (Page 23, Section 5.3.5). This instruction must always be implemented
/// such that its execution latency does not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.9
#[target_feature(enable = "zkne", enable = "zknd")]
#[cfg_attr(test, assert_instr(aes64im))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn aes64im(rs1: u64) -> u64 {
unsafe { _aes64im(rs1 as i64) as u64 }
}
/// Implements the Sigma0 transformation function as used in the SHA2-512 hash function \[49\]
/// (Section 4.1.3).
///
/// This instruction is supported for the RV64 base architecture. It implements the Sigma0
/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
/// implemented such that its execution latency does not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.37
#[target_feature(enable = "zknh")]
#[cfg_attr(test, assert_instr(sha512sig0))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sig0(rs1: u64) -> u64 {
unsafe { _sha512sig0(rs1 as i64) as u64 }
}
/// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\]
/// (Section 4.1.3).
///
/// This instruction is supported for the RV64 base architecture. It implements the Sigma1
/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
/// implemented such that its execution latency does not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.38
#[target_feature(enable = "zknh")]
#[cfg_attr(test, assert_instr(sha512sig1))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sig1(rs1: u64) -> u64 {
unsafe { _sha512sig1(rs1 as i64) as u64 }
}
/// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\]
/// (Section 4.1.3).
///
/// This instruction is supported for the RV64 base architecture. It implements the Sum0
/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
/// implemented such that its execution latency does not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.39
#[target_feature(enable = "zknh")]
#[cfg_attr(test, assert_instr(sha512sum0))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sum0(rs1: u64) -> u64 {
unsafe { _sha512sum0(rs1 as i64) as u64 }
}
/// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\]
/// (Section 4.1.3).
///
/// This instruction is supported for the RV64 base architecture. It implements the Sum1
/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
/// implemented such that its execution latency does not depend on the data being operated on.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.40
#[target_feature(enable = "zknh")]
#[cfg_attr(test, assert_instr(sha512sum1))]
#[inline]
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
pub fn sha512sum1(rs1: u64) -> u64 {
unsafe { _sha512sum1(rs1 as i64) as u64 }
}

Some files were not shown because too many files have changed in this diff Show more