Import stdarch history as a Josh subtree
This commit is contained in:
commit
e433101882
306 changed files with 729730 additions and 0 deletions
16
library/stdarch/.cirrus.yml
Normal file
16
library/stdarch/.cirrus.yml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
task:
|
||||
name: x86_64-unknown-freebsd
|
||||
freebsd_instance:
|
||||
image_family: freebsd-13-4
|
||||
env:
|
||||
# FIXME(freebsd): FreeBSD has a segfault when `RUST_BACKTRACE` is set
|
||||
# https://github.com/rust-lang/rust/issues/132185
|
||||
RUST_BACKTRACE: "0"
|
||||
setup_script:
|
||||
- curl https://sh.rustup.rs -sSf --output rustup.sh
|
||||
- sh rustup.sh --default-toolchain nightly -y
|
||||
- . $HOME/.cargo/env
|
||||
- rustup default nightly
|
||||
test_script:
|
||||
- . $HOME/.cargo/env
|
||||
- cargo build --all
|
||||
4
library/stdarch/.git-blame-ignore-revs
Normal file
4
library/stdarch/.git-blame-ignore-revs
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
# Use `git config blame.ignorerevsfile .git-blame-ignore-revs` to make `git blame` ignore the following commits.
|
||||
|
||||
# format with style edition 2024
|
||||
fc87bd98d689590a0b6f5ee4110c5b9f962faa66
|
||||
288
library/stdarch/.github/workflows/main.yml
vendored
Normal file
288
library/stdarch/.github/workflows/main.yml
vendored
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
name: CI
|
||||
on:
|
||||
pull_request:
|
||||
merge_group:
|
||||
|
||||
jobs:
|
||||
style:
|
||||
name: Check Style
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install Rust
|
||||
run: rustup update nightly --no-self-update && rustup default nightly
|
||||
- run: ci/style.sh
|
||||
|
||||
docs:
|
||||
name: Build Documentation
|
||||
needs: [style]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install Rust
|
||||
run: rustup update nightly --no-self-update && rustup default nightly
|
||||
- run: ci/dox.sh
|
||||
env:
|
||||
CI: 1
|
||||
|
||||
verify:
|
||||
name: Automatic intrinsic verification
|
||||
needs: [style]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install Rust
|
||||
run: rustup update nightly --no-self-update && rustup default nightly
|
||||
- run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
|
||||
|
||||
test:
|
||||
needs: [style]
|
||||
name: Test
|
||||
runs-on: ${{ matrix.target.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
profile:
|
||||
- dev
|
||||
- release
|
||||
target:
|
||||
# Dockers that are run through docker on linux
|
||||
- tuple: i686-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: x86_64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: arm-unknown-linux-gnueabihf
|
||||
os: ubuntu-latest
|
||||
- tuple: armv7-unknown-linux-gnueabihf
|
||||
os: ubuntu-latest
|
||||
- tuple: aarch64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: aarch64_be-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: riscv32gc-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: riscv64gc-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: powerpc-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: powerpc64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: powerpc64le-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
# MIPS targets disabled since they are dropped to tier 3.
|
||||
# See https://github.com/rust-lang/compiler-team/issues/648
|
||||
#- tuple: mips-unknown-linux-gnu
|
||||
# os: ubuntu-latest
|
||||
#- tuple: mips64-unknown-linux-gnuabi64
|
||||
# os: ubuntu-latest
|
||||
#- tuple: mips64el-unknown-linux-gnuabi64
|
||||
# os: ubuntu-latest
|
||||
#- tuple: mipsel-unknown-linux-musl
|
||||
# os: ubuntu-latest
|
||||
- tuple: s390x-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: i586-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: nvptx64-nvidia-cuda
|
||||
os: ubuntu-latest
|
||||
- tuple: thumbv6m-none-eabi
|
||||
os: ubuntu-latest
|
||||
- tuple: thumbv7m-none-eabi
|
||||
os: ubuntu-latest
|
||||
- tuple: thumbv7em-none-eabi
|
||||
os: ubuntu-latest
|
||||
- tuple: thumbv7em-none-eabihf
|
||||
os: ubuntu-latest
|
||||
- tuple: loongarch64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- tuple: wasm32-wasip1
|
||||
os: ubuntu-latest
|
||||
|
||||
# macOS targets
|
||||
- tuple: x86_64-apple-darwin
|
||||
os: macos-15-large
|
||||
- tuple: x86_64-apple-ios-macabi
|
||||
os: macos-15-large
|
||||
- tuple: aarch64-apple-darwin
|
||||
os: macos-15
|
||||
- tuple: aarch64-apple-ios-macabi
|
||||
os: macos-15
|
||||
# FIXME: gh-actions build environment doesn't have linker support
|
||||
# - tuple: i686-apple-darwin
|
||||
# os: macos-13
|
||||
|
||||
# Windows targets
|
||||
- tuple: x86_64-pc-windows-msvc
|
||||
os: windows-2025
|
||||
- tuple: i686-pc-windows-msvc
|
||||
os: windows-2025
|
||||
- tuple: aarch64-pc-windows-msvc
|
||||
os: windows-11-arm
|
||||
- tuple: x86_64-pc-windows-gnu
|
||||
os: windows-2025
|
||||
# - tuple: i686-pc-windows-gnu
|
||||
# os: windows-latest
|
||||
|
||||
# Add additional variables to the matrix variations generated above using `include`:
|
||||
include:
|
||||
# `TEST_EVERYTHING` setups - there should be at least 1 for each architecture
|
||||
- target:
|
||||
tuple: aarch64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
- target:
|
||||
tuple: aarch64_be-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
build_std: true
|
||||
- target:
|
||||
tuple: armv7-unknown-linux-gnueabihf
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
- target:
|
||||
tuple: loongarch64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
- target:
|
||||
tuple: powerpc-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
disable_assert_instr: true
|
||||
test_everything: true
|
||||
- target:
|
||||
tuple: powerpc64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
disable_assert_instr: true
|
||||
test_everything: true
|
||||
- target:
|
||||
tuple: powerpc64le-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
- target:
|
||||
tuple: riscv32gc-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
build_std: true
|
||||
- target:
|
||||
tuple: riscv64gc-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
- target:
|
||||
tuple: s390x-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
- target:
|
||||
tuple: x86_64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
# MIPS targets disabled since they are dropped to tier 3.
|
||||
# See https://github.com/rust-lang/compiler-team/issues/648
|
||||
#- target:
|
||||
# tuple: mips-unknown-linux-gnu
|
||||
# os: ubuntu-latest
|
||||
# norun: true
|
||||
#- target:
|
||||
# tuple: mips64-unknown-linux-gnuabi64
|
||||
# os: ubuntu-latest
|
||||
# norun: true
|
||||
#- target:
|
||||
# tuple: mips64el-unknown-linux-gnuabi64
|
||||
# os: ubuntu-latest
|
||||
# norun: true
|
||||
#- target:
|
||||
# tuple: mipsel-unknown-linux-musl
|
||||
# os: ubuntu-latest
|
||||
# norun: true
|
||||
- target:
|
||||
tuple: aarch64-apple-darwin
|
||||
os: macos-15
|
||||
norun: true # https://github.com/rust-lang/stdarch/issues/1206
|
||||
- target:
|
||||
tuple: aarch64-apple-ios-macabi
|
||||
os: macos-15
|
||||
norun: true # https://github.com/rust-lang/stdarch/issues/1206
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install Rust
|
||||
run: |
|
||||
rustup update nightly --no-self-update
|
||||
rustup default nightly
|
||||
shell: bash
|
||||
if: matrix.target.os != 'windows-11-arm'
|
||||
- name: Install Rust for `windows-11-arm` runners
|
||||
# The arm runners don't have Rust pre-installed (https://github.com/actions/partner-runner-images/issues/77)
|
||||
run: |
|
||||
curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
shell: bash
|
||||
if: matrix.target.os == 'windows-11-arm'
|
||||
|
||||
- run: rustup target add ${{ matrix.target.tuple }}
|
||||
shell: bash
|
||||
if: matrix.build_std == ''
|
||||
- run: |
|
||||
rustup component add rust-src
|
||||
echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
if: matrix.build_std != ''
|
||||
|
||||
# Configure some env vars based on matrix configuration
|
||||
- run: echo "PROFILE=--profile=${{matrix.profile}}" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
- run: echo "NORUN=1" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
if: matrix.norun != '' || startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda'
|
||||
- run: echo "STDARCH_TEST_EVERYTHING=1" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
if: matrix.test_everything != ''
|
||||
- run: echo "STDARCH_DISABLE_ASSERT_INSTR=1" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
if: matrix.disable_assert_instr != ''
|
||||
- run: echo "NOSTD=1" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
if: startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda'
|
||||
|
||||
# Windows & OSX go straight to `run.sh` ...
|
||||
- run: ./ci/run.sh
|
||||
shell: bash
|
||||
if: matrix.target.os != 'ubuntu-latest' || startsWith(matrix.target.tuple, 'thumb')
|
||||
env:
|
||||
TARGET: ${{ matrix.target.tuple }}
|
||||
|
||||
# ... while Linux goes to `run-docker.sh`
|
||||
- run: ./ci/run-docker.sh ${{ matrix.target.tuple }}
|
||||
shell: bash
|
||||
if: matrix.target.os == 'ubuntu-latest' && !startsWith(matrix.target.tuple, 'thumb')
|
||||
env:
|
||||
TARGET: ${{ matrix.target.tuple }}
|
||||
|
||||
build-std-detect:
|
||||
needs: [style]
|
||||
name: Build std_detect
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install Rust
|
||||
run: rustup update nightly && rustup default nightly
|
||||
- run: ./ci/build-std-detect.sh
|
||||
|
||||
conclusion:
|
||||
needs:
|
||||
- docs
|
||||
- verify
|
||||
- test
|
||||
- build-std-detect
|
||||
runs-on: ubuntu-latest
|
||||
# We need to ensure this job does *not* get skipped if its dependencies fail,
|
||||
# because a skipped job is considered a success by GitHub. So we have to
|
||||
# overwrite `if:`. We use `!cancelled()` to ensure the job does still not get run
|
||||
# when the workflow is canceled manually.
|
||||
#
|
||||
# ALL THE PREVIOUS JOBS NEED TO BE ADDED TO THE `needs` SECTION OF THIS JOB!
|
||||
if: ${{ !cancelled() }} # make sure this is never "skipped"
|
||||
steps:
|
||||
- name: Conclusion
|
||||
run: |
|
||||
# Print the dependent jobs to see them in the CI log
|
||||
jq -C <<< '${{ toJson(needs) }}'
|
||||
# Check if all jobs that we depend on (in the needs array) were successful.
|
||||
jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}'
|
||||
9
library/stdarch/.gitignore
vendored
Normal file
9
library/stdarch/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
.*.swp
|
||||
target
|
||||
tags
|
||||
crates/stdarch-gen-arm/aarch64.rs
|
||||
crates/stdarch-gen-arm/arm.rs
|
||||
crates/stdarch-gen-loongarch/lasx.c
|
||||
crates/stdarch-gen-loongarch/lsx.c
|
||||
c_programs/*
|
||||
rust_programs/*
|
||||
0
library/stdarch/.gitmodules
vendored
Normal file
0
library/stdarch/.gitmodules
vendored
Normal file
93
library/stdarch/CONTRIBUTING.md
Normal file
93
library/stdarch/CONTRIBUTING.md
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
# Contributing to stdarch
|
||||
|
||||
The `stdarch` crate is more than willing to accept contributions! First you'll
|
||||
probably want to check out the repository and make sure that tests pass for you:
|
||||
|
||||
```
|
||||
$ git clone https://github.com/rust-lang/stdarch
|
||||
$ cd stdarch
|
||||
$ TARGET="<your-target-arch>" ci/run.sh
|
||||
```
|
||||
|
||||
Where `<your-target-arch>` is the target triple as used by `rustup`, e.g. `x86_64-unknown-linux-gnu` (without any preceding `nightly-` or similar).
|
||||
Also remember that this repository requires the nightly channel of Rust!
|
||||
The above tests do in fact require nightly rust to be the default on your system, to set that use `rustup default nightly` (and `rustup default stable` to revert).
|
||||
|
||||
If any of the above steps don't work, [please let us know][new]!
|
||||
|
||||
Next up you can [find an issue][issues] to help out on, we've selected a few
|
||||
with the [`help wanted`][help] tag which could
|
||||
particularly use some help. You may be most interested in [#40][vendor],
|
||||
implementing all vendor intrinsics on x86. That issue's got some good pointers
|
||||
about where to get started!
|
||||
|
||||
If you've got general questions feel free to [join us on gitter][gitter] and ask
|
||||
around! Feel free to ping either @BurntSushi or @alexcrichton with questions.
|
||||
|
||||
[gitter]: https://gitter.im/rust-impl-period/WG-libs-simd
|
||||
|
||||
# How to write examples for stdarch intrinsics
|
||||
|
||||
There are a few features that must be enabled for the given intrinsic to work
|
||||
properly and the example must only be run by `cargo test --doc` when the feature
|
||||
is supported by the CPU. As a result, the default `fn main` that is generated by
|
||||
`rustdoc` will not work (in most cases). Consider using the following as a guide
|
||||
to ensure your example works as expected.
|
||||
|
||||
```rust
|
||||
/// # // We need cfg_target_feature to ensure the example is only
|
||||
/// # // run by `cargo test --doc` when the CPU supports the feature
|
||||
/// # #![feature(cfg_target_feature)]
|
||||
/// # // We need target_feature for the intrinsic to work
|
||||
/// # #![feature(target_feature)]
|
||||
/// #
|
||||
/// # // rustdoc by default uses `extern crate stdarch`, but we need the
|
||||
/// # // `#[macro_use]`
|
||||
/// # #[macro_use] extern crate stdarch;
|
||||
/// #
|
||||
/// # // The real main function
|
||||
/// # fn main() {
|
||||
/// # // Only run this if `<target feature>` is supported
|
||||
/// # if cfg_feature_enabled!("<target feature>") {
|
||||
/// # // Create a `worker` function that will only be run if the target feature
|
||||
/// # // is supported and ensure that `target_feature` is enabled for your worker
|
||||
/// # // function
|
||||
/// # #[target_feature(enable = "<target feature>")]
|
||||
/// # unsafe fn worker() {
|
||||
///
|
||||
/// // Write your example here. Feature specific intrinsics will work here! Go wild!
|
||||
///
|
||||
/// # }
|
||||
/// # unsafe { worker(); }
|
||||
/// # }
|
||||
/// # }
|
||||
```
|
||||
|
||||
If some of the above syntax does not look familiar, the [Documentation as tests] section
|
||||
of the [Rust Book] describes the `rustdoc` syntax quite well. As always, feel free
|
||||
to [join us on gitter][gitter] and ask us if you hit any snags, and thank you for helping
|
||||
to improve the documentation of `stdarch`!
|
||||
|
||||
# Alternative Testing Instructions
|
||||
|
||||
It is generally recommended that you use `ci/run-docker.sh` to run the tests.
|
||||
However this might not work for you, e.g. if you are on Windows.
|
||||
|
||||
In that case you can fall back to running `cargo +nightly test` and `cargo +nightly test --release -p core_arch` for testing the code generation.
|
||||
Note that these require the nightly toolchain to be installed and for `rustc` to know about your target triple and its CPU.
|
||||
In particular you need to set the `TARGET` environment variable as you would for `ci/run.sh`.
|
||||
In addition you need to set `RUSTCFLAGS` (need the `C`) to indicate target features, e.g. `RUSTCFLAGS="-C -target-features=+avx2"`.
|
||||
You can also set `-C -target-cpu=native` if you're "just" developing against your current CPU.
|
||||
|
||||
Be warned that when you use these alternative instructions, [things may go less smoothly than they would with `ci/run-docker.sh`][ci-run-good], e.g. instruction generation tests may fail because the disassembler named them differently, e.g. it may generate `vaesenc` instead of `aesenc` instructions despite them behaving the same.
|
||||
Also these instructions execute less tests than would normally be done, so don't be surprised that when you eventually pull-request some errors may show up for tests not covered here.
|
||||
|
||||
|
||||
[new]: https://github.com/rust-lang/stdarch/issues/new
|
||||
[issues]: https://github.com/rust-lang/stdarch/issues
|
||||
[help]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22
|
||||
[impl]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3Aimpl-period
|
||||
[vendor]: https://github.com/rust-lang/stdarch/issues/40
|
||||
[Documentation as tests]: https://doc.rust-lang.org/book/first-edition/documentation.html#documentation-as-tests
|
||||
[Rust Book]: https://doc.rust-lang.org/book/first-edition
|
||||
[ci-run-good]: https://github.com/rust-lang/stdarch/issues/931#issuecomment-711412126
|
||||
965
library/stdarch/Cargo.lock
Normal file
965
library/stdarch/Cargo.lock
Normal file
|
|
@ -0,0 +1,965 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"once_cell_polyfill",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.98"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
|
||||
|
||||
[[package]]
|
||||
name = "assert-instr-macro"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "956a5e21988b87f372569b66183b78babf23ebc2e744b733e4350a752c4dafac"
|
||||
dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.5.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim 0.11.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.5.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||
|
||||
[[package]]
|
||||
name = "core_arch"
|
||||
version = "0.1.5"
|
||||
dependencies = [
|
||||
"std_detect",
|
||||
"stdarch-test",
|
||||
"syscalls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
||||
dependencies = [
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
|
||||
dependencies = [
|
||||
"csv-core",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv-core"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.13.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"darling_macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_core"
|
||||
version = "0.13.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim 0.10.0",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.13.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580"
|
||||
dependencies = [
|
||||
"humantime",
|
||||
"is-terminal",
|
||||
"log",
|
||||
"regex",
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
|
||||
|
||||
[[package]]
|
||||
name = "ident_case"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown 0.12.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.15.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "intrinsic-test"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"csv",
|
||||
"diff",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"pretty_env_logger",
|
||||
"rayon",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
version = "0.4.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.172"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
|
||||
|
||||
[[package]]
|
||||
name = "linked-hash-map"
|
||||
version = "0.5.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell_polyfill"
|
||||
version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
|
||||
dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pretty_env_logger"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c"
|
||||
dependencies = [
|
||||
"env_logger 0.10.2",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.95"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ca7dd09b5f4a9029c35e323b086d0a68acdc673317b9c4d002c6f1d4a7278c6"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quickcheck"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
|
||||
dependencies = [
|
||||
"env_logger 0.8.4",
|
||||
"log",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
||||
dependencies = [
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-std-workspace-alloc"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9d441c3b2ebf55cebf796bfdc265d67fa09db17b7bb6bd4be75c509e1e8fec3"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-std-workspace-core"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa9c45b374136f52f2d6311062c7146bff20fec063c3f5d46a410bd937746955"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.219"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.219"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.140"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"memchr",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_with"
|
||||
version = "1.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_with_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_with_macros"
|
||||
version = "1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_yaml"
|
||||
version = "0.8.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b"
|
||||
dependencies = [
|
||||
"indexmap 1.9.3",
|
||||
"ryu",
|
||||
"serde",
|
||||
"yaml-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "simd-test-macro"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "std_detect"
|
||||
version = "0.1.5"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"rustc-std-workspace-alloc",
|
||||
"rustc-std-workspace-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stdarch-gen-arm"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_with",
|
||||
"serde_yaml",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stdarch-gen-loongarch"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stdarch-test"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert-instr-macro",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"lazy_static",
|
||||
"rustc-demangle",
|
||||
"simd-test-macro",
|
||||
"wasmprinter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stdarch-verify"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quick-xml",
|
||||
"quote",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"syn 2.0.102",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stdarch_examples"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"core_arch",
|
||||
"quickcheck",
|
||||
"rand",
|
||||
"std_detect",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syscalls"
|
||||
version = "0.6.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43d0e35dc7d73976a53c7e6d7d177ef804a0c0ee774ec77bcc520c2216fd7cbe"
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.1+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
||||
|
||||
[[package]]
|
||||
name = "wasmparser"
|
||||
version = "0.113.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "286049849b5a5bd09a8773171be96824afabffc7cc3df6caaf33a38db6cd07ae"
|
||||
dependencies = [
|
||||
"indexmap 2.9.0",
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasmprinter"
|
||||
version = "0.2.67"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6615a5587149e753bf4b93f90fa3c3f41c88597a7a2da72879afcabeda9648f"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"wasmparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "yaml-rust"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
|
||||
dependencies = [
|
||||
"linked-hash-map",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.8.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
|
||||
dependencies = [
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.8.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
]
|
||||
19
library/stdarch/Cargo.toml
Normal file
19
library/stdarch/Cargo.toml
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
[workspace]
|
||||
resolver = "1"
|
||||
members = [
|
||||
"crates/*",
|
||||
"examples",
|
||||
]
|
||||
exclude = [
|
||||
"crates/wasm-assert-instr-tests"
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
opt-level = 3
|
||||
incremental = true
|
||||
|
||||
[profile.bench]
|
||||
debug = 1
|
||||
opt-level = 3
|
||||
incremental = true
|
||||
201
library/stdarch/LICENSE-APACHE
Normal file
201
library/stdarch/LICENSE-APACHE
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
25
library/stdarch/LICENSE-MIT
Normal file
25
library/stdarch/LICENSE-MIT
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2017 The Rust Project Developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
18
library/stdarch/README.md
Normal file
18
library/stdarch/README.md
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
stdarch - Rust's standard library SIMD components
|
||||
=======
|
||||
|
||||
[](https://github.com/rust-lang/stdarch/actions)
|
||||
|
||||
|
||||
# Crates
|
||||
|
||||
This repository contains two main crates:
|
||||
|
||||
* [`core_arch`](crates/core_arch/README.md) implements `core::arch` - Rust's
|
||||
core library architecture-specific intrinsics, and
|
||||
|
||||
* [`std_detect`](crates/std_detect/README.md) implements `std::detect` - Rust's
|
||||
standard library run-time CPU feature detection.
|
||||
|
||||
The `std::simd` component now lives in the
|
||||
[`packed_simd_2`](https://github.com/rust-lang/packed_simd) crate.
|
||||
46
library/stdarch/ci/build-std-detect.sh
Executable file
46
library/stdarch/ci/build-std-detect.sh
Executable file
|
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Build std_detect on non-Linux & non-x86 targets.
|
||||
#
|
||||
# In std_detect, non-x86 targets have OS-specific implementations,
|
||||
# but we can test only Linux in CI. This script builds targets supported
|
||||
# by std_detect but cannot be tested in CI.
|
||||
|
||||
set -ex
|
||||
cd "$(dirname "$0")"/..
|
||||
|
||||
targets=(
|
||||
# Linux
|
||||
aarch64-unknown-linux-musl
|
||||
armv5te-unknown-linux-musleabi
|
||||
aarch64-unknown-linux-ohos
|
||||
armv7-unknown-linux-ohos
|
||||
|
||||
# Android
|
||||
aarch64-linux-android
|
||||
arm-linux-androideabi
|
||||
|
||||
# FreeBSD
|
||||
aarch64-unknown-freebsd
|
||||
armv6-unknown-freebsd
|
||||
powerpc-unknown-freebsd
|
||||
powerpc64-unknown-freebsd
|
||||
|
||||
# OpenBSD
|
||||
aarch64-unknown-openbsd
|
||||
|
||||
# Windows
|
||||
aarch64-pc-windows-msvc
|
||||
)
|
||||
|
||||
rustup component add rust-src # for -Z build-std
|
||||
|
||||
cd crates/std_detect
|
||||
for target in "${targets[@]}"; do
|
||||
if rustup target add "${target}" &>/dev/null; then
|
||||
cargo build --target "${target}"
|
||||
else
|
||||
# tier 3 targets requires -Z build-std.
|
||||
cargo build -Z build-std="core,alloc" --target "${target}"
|
||||
fi
|
||||
done
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
FROM ubuntu:25.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-aarch64-linux-gnu \
|
||||
g++-aarch64-linux-gnu \
|
||||
libc6-dev-arm64-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang-19 \
|
||||
lld
|
||||
|
||||
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
|
||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
|
||||
OBJDUMP=aarch64-linux-gnu-objdump \
|
||||
STDARCH_TEST_SKIP_FEATURE=tme
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
libc6-dev-arm64-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang-19 \
|
||||
curl \
|
||||
xz-utils \
|
||||
lld
|
||||
|
||||
ENV TOOLCHAIN="arm-gnu-toolchain-14.2.rel1-x86_64-aarch64_be-none-linux-gnu"
|
||||
|
||||
# Download the aarch64_be gcc toolchain
|
||||
RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.2.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
|
||||
RUN tar -xvf "${TOOLCHAIN}.tar.xz"
|
||||
RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
|
||||
|
||||
ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
|
||||
ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
|
||||
|
||||
ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc"
|
||||
ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}"
|
||||
ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump"
|
||||
ENV STDARCH_TEST_SKIP_FEATURE=tme
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
FROM ubuntu:25.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
FROM ubuntu:24.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
g++-arm-linux-gnueabihf \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang-19 \
|
||||
lld
|
||||
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
FROM ubuntu:25.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
FROM ubuntu:25.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user-static ca-certificates \
|
||||
gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
|
||||
|
||||
|
||||
ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \
|
||||
CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-loongarch64-static -cpu max -L /usr/loongarch64-linux-gnu" \
|
||||
OBJDUMP=loongarch64-linux-gnu-objdump \
|
||||
STDARCH_TEST_SKIP_FEATURE=frecipe
|
||||
13
library/stdarch/ci/docker/mips-unknown-linux-gnu/Dockerfile
Normal file
13
library/stdarch/ci/docker/mips-unknown-linux-gnu/Dockerfile
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-mips-linux-gnu libc6-dev-mips-cross \
|
||||
qemu-system-mips \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
|
||||
CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
|
||||
OBJDUMP=mips-linux-gnu-objdump
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
|
||||
qemu-system-mips64 qemu-user
|
||||
|
||||
ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
|
||||
CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
|
||||
OBJDUMP=mips64-linux-gnuabi64-objdump
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
|
||||
qemu-system-mips64el
|
||||
|
||||
ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
|
||||
CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
|
||||
OBJDUMP=mips64el-linux-gnuabi64-objdump
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
make \
|
||||
qemu-user \
|
||||
qemu-system-mips \
|
||||
bzip2 \
|
||||
curl \
|
||||
file
|
||||
|
||||
RUN mkdir /toolchain
|
||||
|
||||
# Note that this originally came from:
|
||||
# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
|
||||
RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
|
||||
tar xjf - -C /toolchain --strip-components=2
|
||||
|
||||
ENV PATH=$PATH:/rust/bin:/toolchain/bin \
|
||||
CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
|
||||
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
|
||||
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"
|
||||
5
library/stdarch/ci/docker/nvptx64-nvidia-cuda/Dockerfile
Normal file
5
library/stdarch/ci/docker/nvptx64-nvidia-cuda/Dockerfile
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
FROM ubuntu:25.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
ca-certificates
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
|
||||
qemu-system-ppc make file
|
||||
|
||||
ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
|
||||
CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu mpc8610 -L /usr/powerpc-linux-gnu" \
|
||||
CC=powerpc-linux-gnu-gcc \
|
||||
OBJDUMP=powerpc-linux-gnu-objdump \
|
||||
STDARCH_TEST_SKIP_FEATURE=vsx
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \
|
||||
file make
|
||||
|
||||
ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
|
||||
CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -cpu power11 -L /usr/powerpc64-linux-gnu" \
|
||||
CC=powerpc64-linux-gnu-gcc \
|
||||
OBJDUMP=powerpc64-linux-gnu-objdump \
|
||||
STDARCH_TEST_SKIP_FEATURE=vsx \
|
||||
# These 2 tests have erratic behaviour with qemu, see https://gitlab.com/qemu-project/qemu/-/issues/1623#note_2449012173
|
||||
STDARCH_TEST_SKIP_FUNCTION=vec_lde_u16,vec_lde_u32
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
|
||||
file make
|
||||
|
||||
# Work around qemu triggering a sigill on vec_subs if the cpu target is not defined.
|
||||
ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
|
||||
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -cpu power11 -L /usr/powerpc64le-linux-gnu" \
|
||||
CC=powerpc64le-linux-gnu-gcc \
|
||||
OBJDUMP=powerpc64le-linux-gnu-objdump
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
wget xz-utils make file llvm
|
||||
|
||||
ENV VERSION=2025.01.20
|
||||
|
||||
RUN wget "https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${VERSION}/riscv32-glibc-ubuntu-24.04-gcc-nightly-${VERSION}-nightly.tar.xz" \
|
||||
-O riscv-toolchain.tar.xz
|
||||
RUN tar -xJf riscv-toolchain.tar.xz
|
||||
|
||||
ENV CARGO_TARGET_RISCV32GC_UNKNOWN_LINUX_GNU_LINKER=/riscv/bin/riscv32-unknown-linux-gnu-gcc \
|
||||
CARGO_TARGET_RISCV32GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv32 -cpu max -L /riscv/sysroot" \
|
||||
OBJDUMP=llvm-objdump
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \
|
||||
llvm
|
||||
|
||||
ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc \
|
||||
CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv64 -cpu max -L /usr/riscv64-linux-gnu" \
|
||||
OBJDUMP=llvm-objdump
|
||||
14
library/stdarch/ci/docker/s390x-unknown-linux-gnu/Dockerfile
Normal file
14
library/stdarch/ci/docker/s390x-unknown-linux-gnu/Dockerfile
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl ca-certificates \
|
||||
gcc libc6-dev \
|
||||
gcc-s390x-linux-gnu libc6-dev-s390x-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
clang \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
|
||||
CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -cpu max -L /usr/s390x-linux-gnu" \
|
||||
OBJDUMP=s390x-linux-gnu-objdump
|
||||
13
library/stdarch/ci/docker/wasm32-wasip1/Dockerfile
Normal file
13
library/stdarch/ci/docker/wasm32-wasip1/Dockerfile
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
FROM ubuntu:25.04
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
xz-utils \
|
||||
clang
|
||||
|
||||
RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasmtime-v18.0.2-x86_64-linux.tar.xz | tar xJf -
|
||||
ENV PATH=$PATH:/wasmtime-v18.0.2-x86_64-linux
|
||||
|
||||
ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime --dir /checkout/target/wasm32-wasip1/release/deps::."
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
FROM ubuntu:25.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates \
|
||||
wget \
|
||||
xz-utils
|
||||
|
||||
RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.53.0-2025-03-16-lin.tar.xz -O sde.tar.xz
|
||||
RUN mkdir intel-sde
|
||||
RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
|
||||
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
|
||||
-cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \
|
||||
-rtm-mode full -tsx --"
|
||||
# These tests fail with SDE as it doesn't support saving register data
|
||||
ENV STDARCH_TEST_SKIP_FUNCTION="xsave,xsaveopt,xsave64,xsaveopt64"
|
||||
71
library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
Normal file
71
library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
# Copyright (C) 2024-2024 Intel Corporation.
|
||||
#
|
||||
# This software and the related documents are Intel copyrighted materials, and your
|
||||
# use of them is governed by the express license under which they were provided to
|
||||
# you ("License"). Unless the License provides otherwise, you may not use, modify,
|
||||
# copy, publish, distribute, disclose or transmit this software or the related
|
||||
# documents without Intel's prior written permission.
|
||||
#
|
||||
# This software and the related documents are provided as is, with no express or
|
||||
# implied warranties, other than those that are expressly stated in the License.
|
||||
#
|
||||
# The CPUID information in this file is for software enabling purposes only and
|
||||
# it is not a full and accurate representation of the CPU under development which
|
||||
# it represents.
|
||||
# The CPUID information in this file is not a guarantee of the availability of
|
||||
# features or characteristics in the final released CPU.
|
||||
#
|
||||
# CPUID_VERSION = 1.0
|
||||
# Input => Output
|
||||
# EAX ECX => EAX EBX ECX EDX
|
||||
00000000 ******** => 00000024 68747541 444d4163 69746e65
|
||||
00000001 ******** => 000d06f0 00100800 7ffaf3ff bfebfbff
|
||||
00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000
|
||||
00000003 ******** => 00000000 00000000 00000000 00000000
|
||||
00000004 00000000 => 7c004121 02c0003f 0000003f 00000000 #Deterministic Cache
|
||||
00000004 00000001 => 7c004122 01c0003f 0000003f 00000000
|
||||
00000004 00000002 => 7c004143 03c0003f 000007ff 00000000
|
||||
00000004 00000003 => 7c0fc163 04c0003f 0005ffff 00000004
|
||||
00000004 00000004 => 00000000 00000000 00000000 00000000
|
||||
00000005 ******** => 00000040 00000040 00000003 00042120 #MONITOR/MWAIT
|
||||
00000006 ******** => 00000077 00000002 00000001 00000000 #Thermal and Power
|
||||
00000007 00000000 => 00000001 f3bfbfbf bbc05ffe 03d55130 #Extended Features
|
||||
00000007 00000001 => 88ee00bf 00000002 00000000 1d29cd3e
|
||||
00000008 ******** => 00000000 00000000 00000000 00000000
|
||||
00000009 ******** => 00000000 00000000 00000000 00000000 #Direct Cache
|
||||
0000000a ******** => 07300403 00000000 00000000 00000603
|
||||
0000000b 00000000 => 00000001 00000002 00000100 0000001e #Extended Topology
|
||||
0000000b 00000001 => 00000004 00000002 00000201 0000001e
|
||||
0000000c ******** => 00000000 00000000 00000000 00000000
|
||||
0000000d 00000000 => 000e02e7 00002b00 00002b00 00000000 #xcr0
|
||||
0000000d 00000001 => 0000001f 00000240 00000100 00000000
|
||||
0000000d 00000002 => 00000100 00000240 00000000 00000000
|
||||
0000000d 00000005 => 00000040 00000440 00000000 00000000 #zmasks
|
||||
0000000d 00000006 => 00000200 00000480 00000000 00000000 #zmmh
|
||||
0000000d 00000007 => 00000400 00000680 00000000 00000000 #zmm
|
||||
0000000d 00000011 => 00000040 00000ac0 00000002 00000000 #tileconfig
|
||||
0000000d 00000012 => 00002000 00000b00 00000006 00000000 #tiles
|
||||
0000000d 00000013 => 00000080 000003c0 00000000 00000000 #APX
|
||||
00000014 00000000 => 00000000 00000010 00000000 00000000 #ptwrite
|
||||
00000019 ******** => 00000000 00000005 00000000 00000000 #Key Locker
|
||||
0000001d 00000000 => 00000001 00000000 00000000 00000000 #AMX Tile
|
||||
0000001d 00000001 => 04002000 00080040 00000010 00000000 #AMX Palette1
|
||||
0000001e 00000000 => 00000001 00004010 00000000 00000000 #AMX Tmul
|
||||
0000001e 00000001 => 000001ff 00000000 00000000 00000000
|
||||
0000001f 00000000 => 00000001 00000002 00000100 0000001e
|
||||
0000001f 00000001 => 00000007 00000070 00000201 0000001e
|
||||
0000001f 00000002 => 00000000 00000000 00000002 0000001e
|
||||
00000024 00000000 => 00000000 00070002 00000000 00000000 #AVX10
|
||||
80000000 ******** => 80000008 00000000 00000000 00000000
|
||||
80000001 ******** => 00000000 00000000 00200961 2c100000
|
||||
80000002 ******** => 00000000 00000000 00000000 00000000
|
||||
80000003 ******** => 00000000 00000000 00000000 00000000
|
||||
80000004 ******** => 00000000 00000000 00000000 00000000
|
||||
80000005 ******** => 00000000 00000000 00000000 00000000
|
||||
80000006 ******** => 00000000 00000000 01006040 00000000
|
||||
80000007 ******** => 00000000 00000000 00000000 00000100
|
||||
80000008 ******** => 00003028 00000200 00000200 00000000
|
||||
|
||||
# This file was copied from intel-sde/misc/cpuid/dmr/cpuid.def, and modified to
|
||||
# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM`,
|
||||
# `AVX512_VP2INTERSECT` and the VEX variants of AVX512 was added in the CPUID.
|
||||
41
library/stdarch/ci/dox.sh
Executable file
41
library/stdarch/ci/dox.sh
Executable file
|
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Builds documentation for all target triples that we have a registered URL for
|
||||
# in liblibc. This scrapes the list of triples to document from `src/lib.rs`
|
||||
# which has a bunch of `html_root_url` directives we pick up.
|
||||
|
||||
set -ex
|
||||
|
||||
export RUSTDOCFLAGS="-D warnings"
|
||||
|
||||
dox() {
|
||||
if [ "$CI" != "" ]; then
|
||||
rustup target add "${1}" || true
|
||||
fi
|
||||
|
||||
cargo clean --target "${1}"
|
||||
|
||||
cargo build --verbose --target "${1}" --manifest-path crates/core_arch/Cargo.toml
|
||||
cargo build --verbose --target "${1}" --manifest-path crates/std_detect/Cargo.toml
|
||||
|
||||
cargo doc --verbose --target "${1}" --manifest-path crates/core_arch/Cargo.toml
|
||||
cargo doc --verbose --target "${1}" --manifest-path crates/std_detect/Cargo.toml
|
||||
}
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
dox i686-unknown-linux-gnu
|
||||
dox x86_64-unknown-linux-gnu
|
||||
dox armv7-unknown-linux-gnueabihf
|
||||
dox aarch64-unknown-linux-gnu
|
||||
dox powerpc-unknown-linux-gnu
|
||||
dox powerpc64le-unknown-linux-gnu
|
||||
dox loongarch64-unknown-linux-gnu
|
||||
# MIPS targets disabled since they are dropped to tier 3.
|
||||
# See https://github.com/rust-lang/compiler-team/issues/648
|
||||
#dox mips-unknown-linux-gnu
|
||||
#dox mips64-unknown-linux-gnuabi64
|
||||
dox wasm32-unknown-unknown
|
||||
dox nvptx64-nvidia-cuda
|
||||
else
|
||||
dox "${1}"
|
||||
fi
|
||||
60
library/stdarch/ci/run-docker.sh
Executable file
60
library/stdarch/ci/run-docker.sh
Executable file
|
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
# Small script to run tests for a target (or all targets) inside all the
|
||||
# respective docker images.
|
||||
|
||||
set -ex
|
||||
|
||||
if [ $# -lt 1 ]; then
|
||||
>&2 echo "Usage: $0 <TARGET>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run() {
|
||||
# Set the linker that is used for the host (e.g. when compiling a build.rs)
|
||||
# This overrides any configuration in e.g. `.cargo/config.toml`, which will
|
||||
# probably not work within the docker container.
|
||||
HOST_LINKER="CARGO_TARGET_$(rustc --print host-tuple | tr '[:lower:]-' '[:upper:]_')_LINKER"
|
||||
|
||||
# Prevent `Read-only file system (os error 30)`.
|
||||
cargo generate-lockfile
|
||||
|
||||
echo "Building docker container for TARGET=${1}"
|
||||
docker build -t stdarch -f "ci/docker/${1}/Dockerfile" ci/
|
||||
mkdir -p target c_programs rust_programs
|
||||
echo "Running docker"
|
||||
# shellcheck disable=SC2016
|
||||
docker run \
|
||||
--rm \
|
||||
--user "$(id -u)":"$(id -g)" \
|
||||
--env CARGO_HOME=/cargo \
|
||||
--env CARGO_TARGET_DIR=/checkout/target \
|
||||
--env TARGET="${1}" \
|
||||
--env "${HOST_LINKER}"="cc" \
|
||||
--env STDARCH_TEST_EVERYTHING \
|
||||
--env STDARCH_DISABLE_ASSERT_INSTR \
|
||||
--env NOSTD \
|
||||
--env NORUN \
|
||||
--env RUSTFLAGS \
|
||||
--env CARGO_UNSTABLE_BUILD_STD \
|
||||
--env RUST_STD_DETECT_UNSTABLE \
|
||||
--volume "${HOME}/.cargo":/cargo \
|
||||
--volume "$(rustc --print sysroot)":/rust:ro \
|
||||
--volume "$(pwd)":/checkout:ro \
|
||||
--volume "$(pwd)"/target:/checkout/target \
|
||||
--volume "$(pwd)"/c_programs:/checkout/c_programs \
|
||||
--volume "$(pwd)"/rust_programs:/checkout/rust_programs \
|
||||
--init \
|
||||
--workdir /checkout \
|
||||
--privileged \
|
||||
stdarch \
|
||||
sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh ${1}"
|
||||
}
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
for d in ci/docker/*; do
|
||||
run "${d}"
|
||||
done
|
||||
else
|
||||
run "${1}"
|
||||
fi
|
||||
203
library/stdarch/ci/run.sh
Executable file
203
library/stdarch/ci/run.sh
Executable file
|
|
@ -0,0 +1,203 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
set -ex
|
||||
|
||||
: "${TARGET?The TARGET environment variable must be set.}"
|
||||
|
||||
# Tests are all super fast anyway, and they fault often enough on travis that
|
||||
# having only one thread increases debuggability to be worth it.
|
||||
#export RUST_BACKTRACE=full
|
||||
#export RUST_TEST_NOCAPTURE=1
|
||||
#export RUST_TEST_THREADS=1
|
||||
|
||||
export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir"
|
||||
export HOST_RUSTFLAGS="${RUSTFLAGS}"
|
||||
export PROFILE="${PROFILE:="--profile=release"}"
|
||||
|
||||
case ${TARGET} in
|
||||
# On Windows the linker performs identical COMDAT folding (ICF) by default
|
||||
# in release mode which removes identical COMDAT sections. This interferes
|
||||
# with our instruction assertions just like LLVM's MergeFunctions pass so
|
||||
# we disable it.
|
||||
*-pc-windows-msvc)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Clink-args=/OPT:NOICF"
|
||||
;;
|
||||
# On 32-bit use a static relocation model which avoids some extra
|
||||
# instructions when dealing with static data, notably allowing some
|
||||
# instruction assertion checks to pass below the 20 instruction limit. If
|
||||
# this is the default, dynamic, then too many instructions are generated
|
||||
# when we assert the instruction for a function and it causes tests to fail.
|
||||
i686-* | i586-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static"
|
||||
;;
|
||||
# Some x86_64 targets enable by default more features beyond SSE2,
|
||||
# which cause some instruction assertion checks to fail.
|
||||
x86_64-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=-sse3"
|
||||
;;
|
||||
#Unoptimized build uses fast-isel which breaks with msa
|
||||
mips-* | mipsel-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
|
||||
;;
|
||||
armv7-*eabihf | thumbv7-*eabihf)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
|
||||
;;
|
||||
# Some of our test dependencies use the deprecated `gcc` crates which
|
||||
# doesn't detect RISC-V compilers automatically, so do it manually here.
|
||||
riscv*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "RUSTFLAGS=${RUSTFLAGS}"
|
||||
echo "OBJDUMP=${OBJDUMP}"
|
||||
echo "STDARCH_DISABLE_ASSERT_INSTR=${STDARCH_DISABLE_ASSERT_INSTR}"
|
||||
echo "STDARCH_TEST_EVERYTHING=${STDARCH_TEST_EVERYTHING}"
|
||||
echo "STDARCH_TEST_SKIP_FEATURE=${STDARCH_TEST_SKIP_FEATURE}"
|
||||
echo "STDARCH_TEST_SKIP_FUNCTION=${STDARCH_TEST_SKIP_FUNCTION}"
|
||||
echo "PROFILE=${PROFILE}"
|
||||
|
||||
cargo_test() {
|
||||
cmd="cargo"
|
||||
subcmd="test"
|
||||
if [ "$NORUN" = "1" ]; then
|
||||
export subcmd="build"
|
||||
fi
|
||||
cmd="$cmd ${subcmd} --target=$TARGET $1"
|
||||
cmd="$cmd -- $2"
|
||||
|
||||
case ${TARGET} in
|
||||
# wasm targets can't catch panics so if a test failures make sure the test
|
||||
# harness isn't trying to capture output, otherwise we won't get any useful
|
||||
# output.
|
||||
wasm32*)
|
||||
cmd="$cmd --nocapture"
|
||||
;;
|
||||
esac
|
||||
$cmd
|
||||
}
|
||||
|
||||
CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml"
|
||||
STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml"
|
||||
STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml"
|
||||
INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml"
|
||||
|
||||
cargo_test "${CORE_ARCH} ${PROFILE}"
|
||||
|
||||
if [ "$NOSTD" != "1" ]; then
|
||||
cargo_test "${STD_DETECT} ${PROFILE}"
|
||||
|
||||
cargo_test "${STD_DETECT} --no-default-features"
|
||||
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_file_io"
|
||||
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval"
|
||||
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval,std_detect_file_io"
|
||||
|
||||
cargo_test "${STDARCH_EXAMPLES} ${PROFILE}"
|
||||
fi
|
||||
|
||||
|
||||
# Test targets compiled with extra features.
|
||||
case ${TARGET} in
|
||||
x86_64-unknown-linux-gnu)
|
||||
export STDARCH_DISABLE_ASSERT_INSTR=1
|
||||
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
|
||||
cargo_test "${PROFILE}"
|
||||
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx512f"
|
||||
cargo_test "${PROFILE}"
|
||||
;;
|
||||
x86_64* | i686*)
|
||||
export STDARCH_DISABLE_ASSERT_INSTR=1
|
||||
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
|
||||
cargo_test "${PROFILE}"
|
||||
;;
|
||||
# FIXME: don't build anymore
|
||||
#mips-*gnu* | mipsel-*gnu*)
|
||||
# export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa,+fp64,+mips32r5"
|
||||
# cargo_test "${PROFILE}"
|
||||
# ;;
|
||||
mips64*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa"
|
||||
cargo_test "${PROFILE}"
|
||||
;;
|
||||
s390x*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vector-enhancements-1"
|
||||
cargo_test "${PROFILE}"
|
||||
;;
|
||||
powerpc64*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec"
|
||||
cargo_test "${PROFILE}"
|
||||
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vsx"
|
||||
cargo_test "${PROFILE}"
|
||||
;;
|
||||
powerpc*)
|
||||
# qemu has a bug in PPC32 which leads to a crash when compiled with `vsx`
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec"
|
||||
cargo_test "${PROFILE}"
|
||||
;;
|
||||
|
||||
# Setup aarch64 & armv7 specific variables, the runner, along with some
|
||||
# tests to skip
|
||||
aarch64-unknown-linux-gnu*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
|
||||
TEST_CXX_COMPILER="clang++-19"
|
||||
TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
;;
|
||||
|
||||
aarch64_be-unknown-linux-gnu*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
|
||||
TEST_CXX_COMPILER="clang++-19"
|
||||
TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
;;
|
||||
|
||||
armv7-unknown-linux-gnueabihf*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
|
||||
TEST_CXX_COMPILER="clang++-19"
|
||||
TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
# Arm specific
|
||||
case "${TARGET}" in
|
||||
aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*)
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
|
||||
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
|
||||
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
|
||||
--runner "${TEST_RUNNER}" \
|
||||
--cppcompiler "${TEST_CXX_COMPILER}" \
|
||||
--skip "${TEST_SKIP_INTRINSICS}" \
|
||||
--target "${TARGET}"
|
||||
;;
|
||||
|
||||
aarch64_be-unknown-linux-gnu*)
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
|
||||
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
|
||||
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
|
||||
--runner "${TEST_RUNNER}" \
|
||||
--cppcompiler "${TEST_CXX_COMPILER}" \
|
||||
--skip "${TEST_SKIP_INTRINSICS}" \
|
||||
--target "${TARGET}" \
|
||||
--linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
|
||||
--cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
|
||||
# Test examples
|
||||
(
|
||||
cd examples
|
||||
cargo test --target "$TARGET" "${PROFILE}"
|
||||
echo test | cargo run --target "$TARGET" "${PROFILE}" hex
|
||||
)
|
||||
fi
|
||||
22
library/stdarch/ci/style.sh
Executable file
22
library/stdarch/ci/style.sh
Executable file
|
|
@ -0,0 +1,22 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
set -ex
|
||||
|
||||
if rustup component add rustfmt-preview ; then
|
||||
command -v rustfmt
|
||||
rustfmt -V
|
||||
cargo fmt --all -- --check
|
||||
fi
|
||||
|
||||
# if rustup component add clippy-preview ; then
|
||||
# cargo clippy -V
|
||||
# cargo clippy --all -- -D clippy::pedantic
|
||||
# fi
|
||||
|
||||
if shellcheck --version ; then
|
||||
shellcheck -e SC2103 ci/*.sh
|
||||
else
|
||||
echo "shellcheck not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
17
library/stdarch/crates/assert-instr-macro/Cargo.toml
Normal file
17
library/stdarch/crates/assert-instr-macro/Cargo.toml
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
[package]
|
||||
name = "assert-instr-macro"
|
||||
version = "0.1.0"
|
||||
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
proc-macro = true
|
||||
test = false
|
||||
|
||||
[dependencies]
|
||||
proc-macro2 = "1.0"
|
||||
quote = "1.0"
|
||||
syn = { version = "2.0", features = ["full"] }
|
||||
|
||||
[lints.rust]
|
||||
unexpected_cfgs = {level = "warn", check-cfg = ['cfg(optimized)'] }
|
||||
12
library/stdarch/crates/assert-instr-macro/build.rs
Normal file
12
library/stdarch/crates/assert-instr-macro/build.rs
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
use std::env;
|
||||
|
||||
fn main() {
|
||||
let opt_level = env::var("OPT_LEVEL")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
let profile = env::var("PROFILE").unwrap_or_default();
|
||||
if profile == "release" || opt_level >= 2 {
|
||||
println!("cargo:rustc-cfg=optimized");
|
||||
}
|
||||
}
|
||||
224
library/stdarch/crates/assert-instr-macro/src/lib.rs
Normal file
224
library/stdarch/crates/assert-instr-macro/src/lib.rs
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
//! Implementation of the `#[assert_instr]` macro
|
||||
//!
|
||||
//! This macro is used when testing the `stdarch` crate and is used to generate
|
||||
//! test cases to assert that functions do indeed contain the instructions that
|
||||
//! we're expecting them to contain.
|
||||
//!
|
||||
//! The procedural macro here is relatively simple, it simply appends a
|
||||
//! `#[test]` function to the original token stream which asserts that the
|
||||
//! function itself contains the relevant instruction.
|
||||
#![deny(rust_2018_idioms)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate quote;
|
||||
|
||||
use proc_macro2::TokenStream;
|
||||
use quote::ToTokens;
|
||||
|
||||
#[proc_macro_attribute]
|
||||
pub fn assert_instr(
|
||||
attr: proc_macro::TokenStream,
|
||||
item: proc_macro::TokenStream,
|
||||
) -> proc_macro::TokenStream {
|
||||
let invoc = match syn::parse::<Invoc>(attr) {
|
||||
Ok(s) => s,
|
||||
Err(e) => return e.to_compile_error().into(),
|
||||
};
|
||||
let item = match syn::parse::<syn::Item>(item) {
|
||||
Ok(s) => s,
|
||||
Err(e) => return e.to_compile_error().into(),
|
||||
};
|
||||
let func = match item {
|
||||
syn::Item::Fn(ref f) => f,
|
||||
_ => panic!("must be attached to a function"),
|
||||
};
|
||||
|
||||
let instr = &invoc.instr;
|
||||
let name = &func.sig.ident;
|
||||
let maybe_allow_deprecated = if func
|
||||
.attrs
|
||||
.iter()
|
||||
.any(|attr| attr.path().is_ident("deprecated"))
|
||||
{
|
||||
quote! { #[allow(deprecated)] }
|
||||
} else {
|
||||
quote! {}
|
||||
};
|
||||
|
||||
// Disable assert_instr for x86 targets compiled with avx enabled, which
|
||||
// causes LLVM to generate different intrinsics that the ones we are
|
||||
// testing for.
|
||||
let disable_assert_instr = std::env::var("STDARCH_DISABLE_ASSERT_INSTR").is_ok();
|
||||
|
||||
// If instruction tests are disabled avoid emitting this shim at all, just
|
||||
// return the original item without our attribute.
|
||||
if !cfg!(optimized) || disable_assert_instr {
|
||||
return (quote! { #item }).into();
|
||||
}
|
||||
|
||||
let instr_str = instr
|
||||
.replace(['.', '/', ':'], "_")
|
||||
.replace(char::is_whitespace, "");
|
||||
let assert_name = syn::Ident::new(&format!("assert_{name}_{instr_str}"), name.span());
|
||||
// These name has to be unique enough for us to find it in the disassembly later on:
|
||||
let shim_name = syn::Ident::new(
|
||||
&format!("stdarch_test_shim_{name}_{instr_str}"),
|
||||
name.span(),
|
||||
);
|
||||
let mut inputs = Vec::new();
|
||||
let mut input_vals = Vec::new();
|
||||
let mut const_vals = Vec::new();
|
||||
let ret = &func.sig.output;
|
||||
for arg in func.sig.inputs.iter() {
|
||||
let capture = match *arg {
|
||||
syn::FnArg::Typed(ref c) => c,
|
||||
ref v => panic!(
|
||||
"arguments must not have patterns: `{:?}`",
|
||||
v.clone().into_token_stream()
|
||||
),
|
||||
};
|
||||
let ident = match *capture.pat {
|
||||
syn::Pat::Ident(ref i) => &i.ident,
|
||||
_ => panic!("must have bare arguments"),
|
||||
};
|
||||
if let Some((_, tokens)) = invoc.args.iter().find(|a| *ident == a.0) {
|
||||
input_vals.push(quote! { #tokens });
|
||||
} else {
|
||||
inputs.push(capture);
|
||||
input_vals.push(quote! { #ident });
|
||||
}
|
||||
}
|
||||
for arg in func.sig.generics.params.iter() {
|
||||
let c = match *arg {
|
||||
syn::GenericParam::Const(ref c) => c,
|
||||
ref v => panic!(
|
||||
"only const generics are allowed: `{:?}`",
|
||||
v.clone().into_token_stream()
|
||||
),
|
||||
};
|
||||
if let Some((_, tokens)) = invoc.args.iter().find(|a| c.ident == a.0) {
|
||||
const_vals.push(quote! { #tokens });
|
||||
} else {
|
||||
panic!("const generics must have a value for tests");
|
||||
}
|
||||
}
|
||||
|
||||
let attrs = func
|
||||
.attrs
|
||||
.iter()
|
||||
.filter(|attr| {
|
||||
attr.path()
|
||||
.segments
|
||||
.first()
|
||||
.expect("attr.path.segments.first() failed")
|
||||
.ident
|
||||
.to_string()
|
||||
.starts_with("target")
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let attrs = Append(&attrs);
|
||||
|
||||
// Use an ABI on Windows that passes SIMD values in registers, like what
|
||||
// happens on Unix (I think?) by default.
|
||||
let abi = if cfg!(windows) {
|
||||
let target = std::env::var("TARGET").unwrap();
|
||||
if target.contains("x86_64") {
|
||||
syn::LitStr::new("sysv64", proc_macro2::Span::call_site())
|
||||
} else if target.contains("86") {
|
||||
syn::LitStr::new("vectorcall", proc_macro2::Span::call_site())
|
||||
} else {
|
||||
syn::LitStr::new("C", proc_macro2::Span::call_site())
|
||||
}
|
||||
} else {
|
||||
syn::LitStr::new("C", proc_macro2::Span::call_site())
|
||||
};
|
||||
let to_test = quote! {
|
||||
#attrs
|
||||
#maybe_allow_deprecated
|
||||
#[unsafe(no_mangle)]
|
||||
#[inline(never)]
|
||||
pub unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
|
||||
#name::<#(#const_vals),*>(#(#input_vals),*)
|
||||
}
|
||||
};
|
||||
|
||||
let tokens: TokenStream = quote! {
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn #assert_name() {
|
||||
#to_test
|
||||
|
||||
::stdarch_test::assert(#shim_name as usize, stringify!(#shim_name), #instr);
|
||||
}
|
||||
};
|
||||
|
||||
let tokens: TokenStream = quote! {
|
||||
#item
|
||||
#tokens
|
||||
};
|
||||
tokens.into()
|
||||
}
|
||||
|
||||
struct Invoc {
|
||||
instr: String,
|
||||
args: Vec<(syn::Ident, syn::Expr)>,
|
||||
}
|
||||
|
||||
impl syn::parse::Parse for Invoc {
|
||||
fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
|
||||
use syn::{Token, ext::IdentExt};
|
||||
|
||||
let mut instr = String::new();
|
||||
while !input.is_empty() {
|
||||
if input.parse::<Token![,]>().is_ok() {
|
||||
break;
|
||||
}
|
||||
if let Ok(ident) = syn::Ident::parse_any(input) {
|
||||
instr.push_str(&ident.to_string());
|
||||
continue;
|
||||
}
|
||||
if input.parse::<Token![.]>().is_ok() {
|
||||
instr.push('.');
|
||||
continue;
|
||||
}
|
||||
if let Ok(s) = input.parse::<syn::LitStr>() {
|
||||
instr.push_str(&s.value());
|
||||
continue;
|
||||
}
|
||||
println!("{:?}", input.cursor().token_stream());
|
||||
return Err(input.error("expected an instruction"));
|
||||
}
|
||||
if instr.is_empty() {
|
||||
return Err(input.error("expected an instruction before comma"));
|
||||
}
|
||||
let mut args = Vec::new();
|
||||
while !input.is_empty() {
|
||||
let name = input.parse::<syn::Ident>()?;
|
||||
input.parse::<Token![=]>()?;
|
||||
let expr = input.parse::<syn::Expr>()?;
|
||||
args.push((name, expr));
|
||||
|
||||
if input.parse::<Token![,]>().is_err() {
|
||||
if !input.is_empty() {
|
||||
return Err(input.error("extra tokens at end"));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(Self { instr, args })
|
||||
}
|
||||
}
|
||||
|
||||
struct Append<T>(T);
|
||||
|
||||
impl<T> quote::ToTokens for Append<T>
|
||||
where
|
||||
T: Clone + IntoIterator,
|
||||
T::Item: quote::ToTokens,
|
||||
{
|
||||
fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
|
||||
for item in self.0.clone() {
|
||||
item.to_tokens(tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
33
library/stdarch/crates/core_arch/Cargo.toml
Normal file
33
library/stdarch/crates/core_arch/Cargo.toml
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
[package]
|
||||
name = "core_arch"
|
||||
version = "0.1.5"
|
||||
authors = [
|
||||
"Alex Crichton <alex@alexcrichton.com>",
|
||||
"Andrew Gallant <jamslam@gmail.com>",
|
||||
"Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>",
|
||||
]
|
||||
description = "`core::arch` - Rust's core library architecture-specific intrinsics."
|
||||
homepage = "https://github.com/rust-lang/stdarch"
|
||||
repository = "https://github.com/rust-lang/stdarch"
|
||||
readme = "README.md"
|
||||
keywords = ["core", "simd", "arch", "intrinsics"]
|
||||
categories = ["hardware-support", "no-std"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
edition = "2024"
|
||||
|
||||
[badges]
|
||||
is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
|
||||
is-it-maintained-open-issues = { repository = "rust-lang/stdarch" }
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
[dev-dependencies]
|
||||
stdarch-test = { version = "0.*", path = "../stdarch-test" }
|
||||
std_detect = { version = "0.*", path = "../std_detect" }
|
||||
|
||||
[target.'cfg(all(target_arch = "x86_64", target_os = "linux"))'.dev-dependencies]
|
||||
syscalls = { version = "0.6.18", default-features = false }
|
||||
|
||||
[lints.clippy]
|
||||
too_long_first_doc_paragraph = "allow"
|
||||
missing_transmute_annotations = "allow"
|
||||
useless_transmute = "allow"
|
||||
201
library/stdarch/crates/core_arch/LICENSE-APACHE
Normal file
201
library/stdarch/crates/core_arch/LICENSE-APACHE
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
25
library/stdarch/crates/core_arch/LICENSE-MIT
Normal file
25
library/stdarch/crates/core_arch/LICENSE-MIT
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2017 The Rust Project Developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
116
library/stdarch/crates/core_arch/MISSING.md
Normal file
116
library/stdarch/crates/core_arch/MISSING.md
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
## The following neon instructions are currently not implemented in stdarch
|
||||
|
||||
### Not implemented on arm:
|
||||
|
||||
`vcadd_rot270_f32`
|
||||
|
||||
`vcadd_rot90_f32`
|
||||
|
||||
`vcaddq_rot270_f32`
|
||||
|
||||
`vcaddq_rot90_f32`
|
||||
|
||||
`vdot_s32`
|
||||
|
||||
`vdot_u32`
|
||||
|
||||
`vdotq_s32`
|
||||
|
||||
`vdotq_u32`
|
||||
|
||||
`vdot_lane_s32`
|
||||
|
||||
`vdot_lane_u32`
|
||||
|
||||
`vdotq_lane_s32`
|
||||
|
||||
`vdotq_lane_u32`
|
||||
|
||||
`vcmla_f32`
|
||||
|
||||
`vcmla_lane_f32`
|
||||
|
||||
`vcmla_laneq_f32`
|
||||
|
||||
`vcmla_rot180_f32`
|
||||
|
||||
`vcmla_rot180_lane_f32`
|
||||
|
||||
`vcmla_rot180_laneq_f32`
|
||||
|
||||
`vcmla_rot270_f32`
|
||||
|
||||
`vcmla_rot270_lane_f32`
|
||||
|
||||
`vcmla_rot270_laneq_f32`
|
||||
|
||||
`vcmla_rot90_f32`
|
||||
|
||||
`vcmla_rot90_lane_f32`
|
||||
|
||||
`vcmla_rot90_laneq_f32`
|
||||
|
||||
`vcmlaq_f32`
|
||||
|
||||
`vcmlaq_lane_f32`
|
||||
|
||||
`vcmlaq_laneq_f32`
|
||||
|
||||
`vcmlaq_rot180_f32`
|
||||
|
||||
`vcmlaq_rot180_lane_f32`
|
||||
|
||||
`vcmlaq_rot180_laneq_f32`
|
||||
|
||||
`vcmlaq_rot270_f32`
|
||||
|
||||
`vcmlaq_rot270_lane_f32`
|
||||
|
||||
`vcmlaq_rot270_laneq_f32`
|
||||
|
||||
`vcmlaq_rot90_f32`
|
||||
|
||||
`vcmlaq_rot90_lane_f32`
|
||||
|
||||
`vcmlaq_rot90_laneq_f32`
|
||||
|
||||
### Not implemented in LLVM:
|
||||
|
||||
`vrnd32x_f64`
|
||||
|
||||
`vrnd32xq_f64`
|
||||
|
||||
`vrnd32z_f64`
|
||||
|
||||
`vrnd32zq_f64`
|
||||
|
||||
`vrnd64x_f64`
|
||||
|
||||
`vrnd64xq_f64`
|
||||
|
||||
`vrnd64z_f64`
|
||||
|
||||
`vrnd64zq_f64`
|
||||
|
||||
### LLVM Select errors may occur:
|
||||
|
||||
`vsudot_lane_s32`
|
||||
|
||||
`vsudot_laneq_s32`
|
||||
|
||||
`vsudotq_lane_s32`
|
||||
|
||||
`vsudotq_laneq_s32`
|
||||
|
||||
`vusdot_lane_s32`
|
||||
|
||||
`vusdot_laneq_s32`
|
||||
|
||||
`vusdot_s32`
|
||||
|
||||
`vusdotq_lane_s32`
|
||||
|
||||
`vusdotq_laneq_s32`
|
||||
|
||||
`vusdotq_s32v`
|
||||
|
||||
58
library/stdarch/crates/core_arch/README.md
Normal file
58
library/stdarch/crates/core_arch/README.md
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
`core::arch` - Rust's core library architecture-specific intrinsics
|
||||
=======
|
||||
|
||||
The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD).
|
||||
|
||||
# Usage
|
||||
|
||||
`core::arch` is available as part of `libcore` and it is re-exported by
|
||||
`libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate.
|
||||
|
||||
Using `core::arch` via this crate requires nightly Rust, and it can (and does)
|
||||
break often. The only cases in which you should consider using it via this crate
|
||||
are:
|
||||
|
||||
* if you need to re-compile `core::arch` yourself, e.g., with particular
|
||||
target-features enabled that are not enabled for `libcore`/`libstd`. Note: if
|
||||
you need to re-compile it for a non-standard target, please prefer using
|
||||
`xargo` and re-compiling `libcore`/`libstd` as appropriate instead of using
|
||||
this crate.
|
||||
|
||||
* using some features that might not be available even behind unstable Rust
|
||||
features. We try to keep these to a minimum. If you need to use some of these
|
||||
features, please open an issue so that we can expose them in nightly Rust and
|
||||
you can use them from there.
|
||||
|
||||
# Documentation
|
||||
|
||||
* [Documentation - i686][i686]
|
||||
* [Documentation - x86\_64][x86_64]
|
||||
* [Documentation - arm][arm]
|
||||
* [Documentation - aarch64][aarch64]
|
||||
* [Documentation - powerpc][powerpc]
|
||||
* [Documentation - powerpc64][powerpc64]
|
||||
* [How to get started][contrib]
|
||||
* [How to help implement intrinsics][help-implement]
|
||||
|
||||
[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md
|
||||
[help-implement]: https://github.com/rust-lang/stdarch/issues/40
|
||||
[i686]: https://rust-lang.github.io/stdarch/i686/core_arch/
|
||||
[x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/
|
||||
[arm]: https://rust-lang.github.io/stdarch/arm/core_arch/
|
||||
[aarch64]: https://rust-lang.github.io/stdarch/aarch64/core_arch/
|
||||
[powerpc]: https://rust-lang.github.io/stdarch/powerpc/core_arch/
|
||||
[powerpc64]: https://rust-lang.github.io/stdarch/powerpc64/core_arch/
|
||||
|
||||
# License
|
||||
|
||||
`core_arch` is primarily distributed under the terms of both the MIT license and
|
||||
the Apache License (Version 2.0), with portions covered by various BSD-like
|
||||
licenses.
|
||||
|
||||
See LICENSE-APACHE, and LICENSE-MIT for details.
|
||||
|
||||
# Contribution
|
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted
|
||||
for inclusion in `core_arch` by you, as defined in the Apache-2.0 license,
|
||||
shall be dual licensed as above, without any additional terms or conditions.
|
||||
258
library/stdarch/crates/core_arch/missing-x86.md
Normal file
258
library/stdarch/crates/core_arch/missing-x86.md
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
|
||||
<details><summary>["AMX-BF16"]</summary><p>
|
||||
|
||||
* [ ] [`__tile_dpbf16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbf16ps)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["AMX-COMPLEX"]</summary><p>
|
||||
|
||||
* [ ] [`__tile_cmmimfp16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_cmmimfp16ps)
|
||||
* [ ] [`__tile_cmmrlfp16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_cmmrlfp16ps)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["AMX-FP16"]</summary><p>
|
||||
|
||||
* [ ] [`__tile_dpfp16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpfp16ps)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["AMX-INT8"]</summary><p>
|
||||
|
||||
* [ ] [`__tile_dpbssd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbssd)
|
||||
* [ ] [`__tile_dpbsud`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbsud)
|
||||
* [ ] [`__tile_dpbusd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbusd)
|
||||
* [ ] [`__tile_dpbuud`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbuud)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["AMX-TILE"]</summary><p>
|
||||
|
||||
* [ ] [`__tile_loadd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_loadd)
|
||||
* [ ] [`__tile_stored`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_stored)
|
||||
* [ ] [`__tile_stream_loadd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_stream_loadd)
|
||||
* [ ] [`__tile_zero`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_zero)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["AVX512_FP16"]</summary><p>
|
||||
|
||||
* [ ] [`_mm256_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pch)
|
||||
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
|
||||
* [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["AVX512_VP2INTERSECT", "AVX512F"]</summary><p>
|
||||
|
||||
* [ ] [`_mm512_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32)
|
||||
* [ ] [`_mm512_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi64)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["AVX512_VP2INTERSECT", "AVX512VL"]</summary><p>
|
||||
|
||||
* [ ] [`_mm256_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi32)
|
||||
* [ ] [`_mm256_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi64)
|
||||
* [ ] [`_mm_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi32)
|
||||
* [ ] [`_mm_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi64)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["CET_SS"]</summary><p>
|
||||
|
||||
* [ ] [`_clrssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clrssbsy)
|
||||
* [ ] [`_get_ssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_get_ssp)
|
||||
* [ ] [`_get_ssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_get_ssp)
|
||||
* [ ] [`_inc_ssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_inc_ssp)
|
||||
* [ ] [`_incsspd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_incsspd)
|
||||
* [ ] [`_incsspq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_incsspq)
|
||||
* [ ] [`_rdsspd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdsspd_i32)
|
||||
* [ ] [`_rdsspq_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdsspq_i64)
|
||||
* [ ] [`_rstorssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rstorssp)
|
||||
* [ ] [`_saveprevssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_saveprevssp)
|
||||
* [ ] [`_setssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_setssbsy)
|
||||
* [ ] [`_wrssd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrssd)
|
||||
* [ ] [`_wrssq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrssq)
|
||||
* [ ] [`_wrussd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrussd)
|
||||
* [ ] [`_wrussq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrussq)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["CLDEMOTE"]</summary><p>
|
||||
|
||||
* [ ] [`_mm_cldemote`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cldemote)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["CLFLUSHOPT"]</summary><p>
|
||||
|
||||
* [ ] [`_mm_clflushopt`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflushopt)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["CLWB"]</summary><p>
|
||||
|
||||
* [ ] [`_mm_clwb`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clwb)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["CMPCCXADD"]</summary><p>
|
||||
|
||||
* [ ] [`_cmpccxadd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cmpccxadd_epi32)
|
||||
* [ ] [`_cmpccxadd_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cmpccxadd_epi64)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["ENQCMD"]</summary><p>
|
||||
|
||||
* [ ] [`_enqcmd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_enqcmd)
|
||||
* [ ] [`_enqcmds`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_enqcmds)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["FSGSBASE"]</summary><p>
|
||||
|
||||
* [ ] [`_readfsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readfsbase_u32)
|
||||
* [ ] [`_readfsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readfsbase_u64)
|
||||
* [ ] [`_readgsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readgsbase_u32)
|
||||
* [ ] [`_readgsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readgsbase_u64)
|
||||
* [ ] [`_writefsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writefsbase_u32)
|
||||
* [ ] [`_writefsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writefsbase_u64)
|
||||
* [ ] [`_writegsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writegsbase_u32)
|
||||
* [ ] [`_writegsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writegsbase_u64)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["HRESET"]</summary><p>
|
||||
|
||||
* [ ] [`_hreset`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_hreset)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["INVPCID"]</summary><p>
|
||||
|
||||
* [ ] [`_invpcid`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_invpcid)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["MONITOR"]</summary><p>
|
||||
|
||||
* [ ] [`_mm_monitor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_monitor)
|
||||
* [ ] [`_mm_mwait`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mwait)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["MOVBE"]</summary><p>
|
||||
|
||||
* [ ] [`_loadbe_i16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_loadbe_i16)
|
||||
* [ ] [`_loadbe_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_loadbe_i32)
|
||||
* [ ] [`_loadbe_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_loadbe_i64)
|
||||
* [ ] [`_storebe_i16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_storebe_i16)
|
||||
* [ ] [`_storebe_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_storebe_i32)
|
||||
* [ ] [`_storebe_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_storebe_i64)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["MOVDIR64B"]</summary><p>
|
||||
|
||||
* [ ] [`_movdir64b`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_movdir64b)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["MOVDIRI"]</summary><p>
|
||||
|
||||
* [ ] [`_directstoreu_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_directstoreu_u32)
|
||||
* [ ] [`_directstoreu_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_directstoreu_u64)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["PCONFIG"]</summary><p>
|
||||
|
||||
* [ ] [`_pconfig_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pconfig_u32)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["POPCNT"]</summary><p>
|
||||
|
||||
* [ ] [`_mm_popcnt_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u32)
|
||||
* [ ] [`_mm_popcnt_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u64)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["PREFETCHI"]</summary><p>
|
||||
|
||||
* [ ] [`_m_prefetchit0`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_prefetchit0)
|
||||
* [ ] [`_m_prefetchit1`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_prefetchit1)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["RAO_INT"]</summary><p>
|
||||
|
||||
* [ ] [`_aadd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aadd_i32)
|
||||
* [ ] [`_aadd_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aadd_i64)
|
||||
* [ ] [`_aand_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aand_i32)
|
||||
* [ ] [`_aand_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aand_i64)
|
||||
* [ ] [`_aor_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aor_i32)
|
||||
* [ ] [`_aor_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aor_i64)
|
||||
* [ ] [`_axor_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_axor_i32)
|
||||
* [ ] [`_axor_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_axor_i64)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["RDPID"]</summary><p>
|
||||
|
||||
* [ ] [`_rdpid_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdpid_u32)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["SERIALIZE"]</summary><p>
|
||||
|
||||
* [ ] [`_serialize`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_serialize)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["SSE"]</summary><p>
|
||||
|
||||
* [ ] [`_mm_free`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free)
|
||||
* [ ] [`_mm_malloc`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_malloc)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["TSXLDTRK"]</summary><p>
|
||||
|
||||
* [ ] [`_xresldtrk`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xresldtrk)
|
||||
* [ ] [`_xsusldtrk`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsusldtrk)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["UINTR"]</summary><p>
|
||||
|
||||
* [ ] [`_clui`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clui)
|
||||
* [ ] [`_senduipi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_senduipi)
|
||||
* [ ] [`_stui`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_stui)
|
||||
* [ ] [`_testui`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_testui)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["USER_MSR"]</summary><p>
|
||||
|
||||
* [ ] [`_urdmsr`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_urdmsr)
|
||||
* [ ] [`_uwrmsr`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_uwrmsr)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["WAITPKG"]</summary><p>
|
||||
|
||||
* [ ] [`_tpause`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tpause)
|
||||
* [ ] [`_umonitor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_umonitor)
|
||||
* [ ] [`_umwait`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_umwait)
|
||||
</p></details>
|
||||
|
||||
|
||||
<details><summary>["WBNOINVD"]</summary><p>
|
||||
|
||||
* [ ] [`_wbnoinvd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wbnoinvd)
|
||||
</p></details>
|
||||
|
||||
3
library/stdarch/crates/core_arch/rustfmt.toml
Normal file
3
library/stdarch/crates/core_arch/rustfmt.toml
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
ignore = [
|
||||
"src/simd.rs",
|
||||
]
|
||||
39
library/stdarch/crates/core_arch/src/aarch64/mod.rs
Normal file
39
library/stdarch/crates/core_arch/src/aarch64/mod.rs
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
//! AArch64 intrinsics.
|
||||
//!
|
||||
//! The reference for NEON is [Arm's NEON Intrinsics Reference][arm_ref]. The
|
||||
//! [Arm's NEON Intrinsics Online Database][arm_dat] is also useful.
|
||||
//!
|
||||
//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
|
||||
//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
|
||||
|
||||
#![cfg_attr(
|
||||
all(target_arch = "aarch64", target_abi = "softfloat"),
|
||||
// Just allow the warning: anyone soundly using the intrinsics has to enable
|
||||
// the target feature, and that will generate a warning for them.
|
||||
allow(aarch64_softfloat_neon)
|
||||
)]
|
||||
|
||||
mod mte;
|
||||
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
|
||||
pub use self::mte::*;
|
||||
|
||||
mod neon;
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub use self::neon::*;
|
||||
|
||||
mod tme;
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub use self::tme::*;
|
||||
|
||||
mod prefetch;
|
||||
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
|
||||
pub use self::prefetch::*;
|
||||
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub use super::arm_shared::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test_support;
|
||||
171
library/stdarch/crates/core_arch/src/aarch64/mte.rs
Normal file
171
library/stdarch/crates/core_arch/src/aarch64/mte.rs
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
//! AArch64 Memory tagging intrinsics
|
||||
//!
|
||||
//! [ACLE documentation](https://arm-software.github.io/acle/main/acle.html#markdown-toc-mte-intrinsics)
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.irg"
|
||||
)]
|
||||
fn irg_(ptr: *const (), exclude: i64) -> *const ();
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.gmi"
|
||||
)]
|
||||
fn gmi_(ptr: *const (), exclude: i64) -> i64;
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.ldg"
|
||||
)]
|
||||
fn ldg_(ptr: *const (), tag_ptr: *const ()) -> *const ();
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.stg"
|
||||
)]
|
||||
fn stg_(tagged_ptr: *const (), addr_to_tag: *const ());
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.addg"
|
||||
)]
|
||||
fn addg_(ptr: *const (), value: i64) -> *const ();
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.subp"
|
||||
)]
|
||||
fn subp_(ptr_a: *const (), ptr_b: *const ()) -> i64;
|
||||
}
|
||||
|
||||
/// Return a pointer containing a randomly generated logical address tag.
|
||||
///
|
||||
/// `src`: A pointer containing an address.
|
||||
/// `mask`: A mask where each of the lower 16 bits specifies logical
|
||||
/// tags which must be excluded from consideration. Zero excludes no
|
||||
/// tags.
|
||||
///
|
||||
/// The returned pointer contains a copy of the `src` address, but with a
|
||||
/// randomly generated logical tag, excluding any specified by `mask`.
|
||||
///
|
||||
/// SAFETY: The pointer provided by this intrinsic will be invalid until the memory
|
||||
/// has been appropriately tagged with `__arm_mte_set_tag`. If using that intrinsic
|
||||
/// on the provided pointer is itself invalid, then it will be permanently invalid
|
||||
/// and Undefined Behavior to dereference it.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mte")]
|
||||
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
|
||||
pub unsafe fn __arm_mte_create_random_tag<T>(src: *const T, mask: u64) -> *const T {
|
||||
irg_(src as *const (), mask as i64) as *const T
|
||||
}
|
||||
|
||||
/// Return a pointer with the logical address tag offset by a value.
|
||||
///
|
||||
/// `src`: A pointer containing an address and a logical tag.
|
||||
/// `OFFSET`: A compile-time constant value in the range [0, 15].
|
||||
///
|
||||
/// Adds offset to the logical address tag in `src`, wrapping if the result is
|
||||
/// outside of the valid 16 tags.
|
||||
///
|
||||
/// SAFETY: See `__arm_mte_create_random_tag`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mte")]
|
||||
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
|
||||
pub unsafe fn __arm_mte_increment_tag<const OFFSET: i64, T>(src: *const T) -> *const T {
|
||||
addg_(src as *const (), OFFSET) as *const T
|
||||
}
|
||||
|
||||
/// Add a logical tag to the set of excluded logical tags.
|
||||
///
|
||||
/// `src`: A pointer containing an address and a logical tag.
|
||||
/// `excluded`: A mask where the lower 16 bits each specify currently-excluded
|
||||
/// logical tags.
|
||||
///
|
||||
/// Adds the logical tag stored in `src` to the set in `excluded`, and returns
|
||||
/// the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mte")]
|
||||
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
|
||||
pub unsafe fn __arm_mte_exclude_tag<T>(src: *const T, excluded: u64) -> u64 {
|
||||
gmi_(src as *const (), excluded as i64) as u64
|
||||
}
|
||||
|
||||
/// Store an allocation tag for the 16-byte granule of memory.
|
||||
///
|
||||
/// `tag_address`: A pointer containing an address and a logical tag, which
|
||||
/// must be 16-byte aligned.
|
||||
///
|
||||
/// SAFETY: `tag_address` must be 16-byte aligned. The tag will apply to the
|
||||
/// entire 16-byte memory granule.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mte")]
|
||||
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
|
||||
pub unsafe fn __arm_mte_set_tag<T>(tag_address: *const T) {
|
||||
stg_(tag_address as *const (), tag_address as *const ());
|
||||
}
|
||||
|
||||
/// Load an allocation tag from memory, returning a new pointer with the
|
||||
/// corresponding logical tag.
|
||||
///
|
||||
/// `address`: A pointer containing an address from which allocation tag memory
|
||||
/// is read. This does not need to be 16-byte aligned.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mte")]
|
||||
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
|
||||
pub unsafe fn __arm_mte_get_tag<T>(address: *const T) -> *const T {
|
||||
ldg_(address as *const (), address as *const ()) as *const T
|
||||
}
|
||||
|
||||
/// Calculate the difference between the address parts of two pointers, ignoring
|
||||
/// the tags, and sign-extending the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mte")]
|
||||
#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
|
||||
pub unsafe fn __arm_mte_ptrdiff<T, U>(a: *const T, b: *const U) -> i64 {
|
||||
subp_(a as *const (), b as *const ())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(irg))] // FIXME: MSVC `dumpbin` doesn't support MTE
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "mte")]
|
||||
unsafe fn test_arm_mte_create_random_tag(src: *const (), mask: u64) -> *const () {
|
||||
__arm_mte_create_random_tag(src, mask)
|
||||
}
|
||||
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(addg))]
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "mte")]
|
||||
unsafe fn test_arm_mte_increment_tag(src: *const ()) -> *const () {
|
||||
__arm_mte_increment_tag::<1, _>(src)
|
||||
}
|
||||
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(gmi))]
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "mte")]
|
||||
unsafe fn test_arm_mte_exclude_tag(src: *const (), excluded: u64) -> u64 {
|
||||
__arm_mte_exclude_tag(src, excluded)
|
||||
}
|
||||
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stg))]
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "mte")]
|
||||
unsafe fn test_arm_mte_set_tag(src: *const ()) {
|
||||
__arm_mte_set_tag(src)
|
||||
}
|
||||
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldg))]
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "mte")]
|
||||
unsafe fn test_arm_mte_get_tag(src: *const ()) -> *const () {
|
||||
__arm_mte_get_tag(src)
|
||||
}
|
||||
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(subp))]
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "mte")]
|
||||
unsafe fn test_arm_mte_ptrdiff(a: *const (), b: *const ()) -> i64 {
|
||||
__arm_mte_ptrdiff(a, b)
|
||||
}
|
||||
}
|
||||
29470
library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
Normal file
29470
library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
Normal file
File diff suppressed because it is too large
Load diff
1006
library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
Normal file
1006
library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
Normal file
File diff suppressed because it is too large
Load diff
80
library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
Normal file
80
library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.prefetch"]
|
||||
fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
|
||||
}
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
|
||||
pub const _PREFETCH_READ: i32 = 0;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
|
||||
pub const _PREFETCH_WRITE: i32 = 1;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
|
||||
pub const _PREFETCH_LOCALITY0: i32 = 0;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
|
||||
pub const _PREFETCH_LOCALITY1: i32 = 1;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
|
||||
pub const _PREFETCH_LOCALITY2: i32 = 2;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
|
||||
pub const _PREFETCH_LOCALITY3: i32 = 3;
|
||||
|
||||
/// Fetch the cache line that contains address `p` using the given `RW` and `LOCALITY`.
|
||||
///
|
||||
/// The `RW` must be one of:
|
||||
///
|
||||
/// * [`_PREFETCH_READ`](constant._PREFETCH_READ.html): the prefetch is preparing
|
||||
/// for a read.
|
||||
///
|
||||
/// * [`_PREFETCH_WRITE`](constant._PREFETCH_WRITE.html): the prefetch is preparing
|
||||
/// for a write.
|
||||
///
|
||||
/// The `LOCALITY` must be one of:
|
||||
///
|
||||
/// * [`_PREFETCH_LOCALITY0`](constant._PREFETCH_LOCALITY0.html): Streaming or
|
||||
/// non-temporal prefetch, for data that is used only once.
|
||||
///
|
||||
/// * [`_PREFETCH_LOCALITY1`](constant._PREFETCH_LOCALITY1.html): Fetch into level 3 cache.
|
||||
///
|
||||
/// * [`_PREFETCH_LOCALITY2`](constant._PREFETCH_LOCALITY2.html): Fetch into level 2 cache.
|
||||
///
|
||||
/// * [`_PREFETCH_LOCALITY3`](constant._PREFETCH_LOCALITY3.html): Fetch into level 1 cache.
|
||||
///
|
||||
/// The prefetch memory instructions signal to the memory system that memory accesses
|
||||
/// from a specified address are likely to occur in the near future. The memory system
|
||||
/// can respond by taking actions that are expected to speed up the memory access when
|
||||
/// they do occur, such as preloading the specified address into one or more caches.
|
||||
/// Because these signals are only hints, it is valid for a particular CPU to treat
|
||||
/// any or all prefetch instructions as a NOP.
|
||||
///
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/documentation/den0024/a/the-a64-instruction-set/memory-access-instructions/prefetching-memory?lang=en)
|
||||
#[inline(always)]
|
||||
#[cfg_attr(test, assert_instr("prfm pldl1strm", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY0))]
|
||||
#[cfg_attr(test, assert_instr("prfm pldl3keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY1))]
|
||||
#[cfg_attr(test, assert_instr("prfm pldl2keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY2))]
|
||||
#[cfg_attr(test, assert_instr("prfm pldl1keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY3))]
|
||||
#[cfg_attr(test, assert_instr("prfm pstl1strm", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY0))]
|
||||
#[cfg_attr(test, assert_instr("prfm pstl3keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY1))]
|
||||
#[cfg_attr(test, assert_instr("prfm pstl2keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY2))]
|
||||
#[cfg_attr(test, assert_instr("prfm pstl1keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY3))]
|
||||
#[rustc_legacy_const_generics(1, 2)]
|
||||
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
|
||||
// FIXME: Replace this with the standard ACLE __pld/__pldx/__pli/__plix intrinsics
|
||||
pub unsafe fn _prefetch<const RW: i32, const LOCALITY: i32>(p: *const i8) {
|
||||
// We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache).
|
||||
static_assert_uimm_bits!(RW, 1);
|
||||
static_assert_uimm_bits!(LOCALITY, 2);
|
||||
prefetch(p, RW, LOCALITY, 1);
|
||||
}
|
||||
184
library/stdarch/crates/core_arch/src/aarch64/test_support.rs
Normal file
184
library/stdarch/crates/core_arch/src/aarch64/test_support.rs
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
use crate::core_arch::{aarch64::neon::*, arm_shared::*, simd::*};
|
||||
use std::{mem::transmute, vec::Vec};
|
||||
|
||||
macro_rules! V_u64 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000000000000000u64,
|
||||
0x0101010101010101u64,
|
||||
0x0202020202020202u64,
|
||||
0x0F0F0F0F0F0F0F0Fu64,
|
||||
0x8080808080808080u64,
|
||||
0xF0F0F0F0F0F0F0F0u64,
|
||||
0xFFFFFFFFFFFFFFFFu64,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! V_f64 {
|
||||
() => {
|
||||
vec![
|
||||
0.0f64,
|
||||
1.0f64,
|
||||
-1.0f64,
|
||||
1.2f64,
|
||||
2.4f64,
|
||||
f64::MAX,
|
||||
f64::MIN,
|
||||
f64::INFINITY,
|
||||
f64::NEG_INFINITY,
|
||||
f64::NAN,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! to64 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u64 { transmute(v) }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! to128 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u128 { transmute(v) }
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) fn test<T, U, V, W, X>(
|
||||
vals: Vec<T>,
|
||||
fill1: fn(T) -> V,
|
||||
fill2: fn(U) -> W,
|
||||
cast: fn(W) -> X,
|
||||
test_fun: fn(V, V) -> W,
|
||||
verify_fun: fn(T, T) -> U,
|
||||
) where
|
||||
T: Copy + core::fmt::Debug,
|
||||
U: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
V: Copy + core::fmt::Debug,
|
||||
W: Copy + core::fmt::Debug,
|
||||
X: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
{
|
||||
let pairs = vals.iter().zip(vals.iter());
|
||||
|
||||
for (i, j) in pairs {
|
||||
let a: V = fill1(*i);
|
||||
let b: V = fill1(*j);
|
||||
|
||||
let actual_pre: W = test_fun(a, b);
|
||||
let expected_pre: W = fill2(verify_fun(*i, *j));
|
||||
|
||||
let actual: X = cast(actual_pre);
|
||||
let expected: X = cast(expected_pre);
|
||||
|
||||
assert_eq!(
|
||||
actual, expected,
|
||||
"[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
|
||||
*i, *j, &a, &b, actual_pre, &a, &b, expected_pre
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! gen_test_fn {
|
||||
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
|
||||
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
|
||||
unsafe {
|
||||
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! gen_fill_fn {
|
||||
($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
|
||||
pub(crate) fn $id(val: $in_t) -> $out_t {
|
||||
let initial: [$in_t; $num_els] = [val; $num_els];
|
||||
let result: $cmp_t = unsafe { transmute(initial) };
|
||||
let result_out: $out_t = unsafe { transmute(result) };
|
||||
|
||||
// println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
|
||||
|
||||
result_out
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
|
||||
gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
|
||||
gen_fill_fn!(fill_f64, 64, 1, f64, float64x1_t, u64);
|
||||
gen_fill_fn!(fillq_f64, 64, 2, f64, float64x2_t, u128);
|
||||
gen_fill_fn!(fill_p64, 64, 1, u64, poly64x1_t, u64);
|
||||
gen_fill_fn!(fillq_p64, 64, 2, u64, poly64x2_t, u128);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_f64,
|
||||
f64,
|
||||
f64,
|
||||
float64x1_t,
|
||||
float64x1_t,
|
||||
u64,
|
||||
V_f64!(),
|
||||
fill_f64,
|
||||
fill_f64,
|
||||
to64!(float64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_f64,
|
||||
f64,
|
||||
u64,
|
||||
float64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_f64!(),
|
||||
fill_f64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_f64,
|
||||
f64,
|
||||
f64,
|
||||
float64x2_t,
|
||||
float64x2_t,
|
||||
u128,
|
||||
V_f64!(),
|
||||
fillq_f64,
|
||||
fillq_f64,
|
||||
to128!(float64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_f64,
|
||||
f64,
|
||||
u64,
|
||||
float64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_f64!(),
|
||||
fillq_f64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_cmp_p64,
|
||||
u64,
|
||||
u64,
|
||||
poly64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_u64!(),
|
||||
fill_p64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_p64,
|
||||
u64,
|
||||
u64,
|
||||
poly64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_u64!(),
|
||||
fillq_p64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
201
library/stdarch/crates/core_arch/src/aarch64/tme.rs
Normal file
201
library/stdarch/crates/core_arch/src/aarch64/tme.rs
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
//! ARM's Transactional Memory Extensions (TME).
|
||||
//!
|
||||
//! This CPU feature is available on Aarch64 - A architecture profile.
|
||||
//! This feature is in the non-neon feature set. TME specific vendor documentation can
|
||||
//! be found [TME Intrinsics Introduction][tme_intrinsics_intro].
|
||||
//!
|
||||
//! The reference is [ACLE Q4 2019][acle_q4_2019_ref].
|
||||
//!
|
||||
//! ACLE has a section for TME extensions and state masks for aborts and failure codes.
|
||||
//! [ARM A64 Architecture Register Datasheet][a_profile_future] also describes possible failure code scenarios.
|
||||
//!
|
||||
//! [acle_q4_2019_ref]: https://static.docs.arm.com/101028/0010/ACLE_2019Q4_release-0010.pdf
|
||||
//! [tme_intrinsics_intro]: https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics
|
||||
//! [llvm_aarch64_int]: https://github.com/llvm/llvm-project/commit/a36d31478c182903523e04eb271bbf102bfab2cc#diff-ff24e1c35f4d54f1110ce5d90c709319R626-R646
|
||||
//! [a_profile_future]: https://static.docs.arm.com/ddi0601/a/SysReg_xml_futureA-2019-04.pdf?_ga=2.116560387.441514988.1590524918-1110153136.1588469296
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.aarch64.tstart"]
|
||||
fn aarch64_tstart() -> u64;
|
||||
#[link_name = "llvm.aarch64.tcommit"]
|
||||
fn aarch64_tcommit();
|
||||
#[link_name = "llvm.aarch64.tcancel"]
|
||||
fn aarch64_tcancel(imm0: u64);
|
||||
#[link_name = "llvm.aarch64.ttest"]
|
||||
fn aarch64_ttest() -> u64;
|
||||
}
|
||||
|
||||
/// Transaction successfully started.
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
|
||||
|
||||
/// Extraction mask for failure reason
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
|
||||
|
||||
/// Transaction retry is possible.
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_RTRY: u64 = 1 << 15;
|
||||
|
||||
/// Transaction executed a TCANCEL instruction
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_CNCL: u64 = 1 << 16;
|
||||
|
||||
/// Transaction aborted because a conflict occurred
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_MEM: u64 = 1 << 17;
|
||||
|
||||
/// Fallback error type for any other reason
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_IMP: u64 = 1 << 18;
|
||||
|
||||
/// Transaction aborted because a non-permissible operation was attempted
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_ERR: u64 = 1 << 19;
|
||||
|
||||
/// Transaction aborted due to read or write set limit was exceeded
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_SIZE: u64 = 1 << 20;
|
||||
|
||||
/// Transaction aborted due to transactional nesting level was exceeded
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_NEST: u64 = 1 << 21;
|
||||
|
||||
/// Transaction aborted due to a debug trap.
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_DBG: u64 = 1 << 22;
|
||||
|
||||
/// Transaction failed from interrupt
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_INT: u64 = 1 << 23;
|
||||
|
||||
/// Indicates a TRIVIAL version of TM is available
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
|
||||
|
||||
// NOTE: Tests for these instructions are disabled on MSVC as dumpbin doesn't
|
||||
// understand these instructions.
|
||||
|
||||
/// Starts a new transaction. When the transaction starts successfully the return value is 0.
|
||||
/// If the transaction fails, all state modifications are discarded and a cause of the failure
|
||||
/// is encoded in the return value.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tstart))]
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub unsafe fn __tstart() -> u64 {
|
||||
aarch64_tstart()
|
||||
}
|
||||
|
||||
/// Commits the current transaction. For a nested transaction, the only effect is that the
|
||||
/// transactional nesting depth is decreased. For an outer transaction, the state modifications
|
||||
/// performed transactionally are committed to the architectural state.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tcommit))]
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub unsafe fn __tcommit() {
|
||||
aarch64_tcommit()
|
||||
}
|
||||
|
||||
/// Cancels the current transaction and discards all state modifications that were performed transactionally.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(
|
||||
all(test, not(target_env = "msvc")),
|
||||
assert_instr(tcancel, IMM16 = 0x0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(0)]
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub unsafe fn __tcancel<const IMM16: u64>() {
|
||||
static_assert!(IMM16 <= 65535);
|
||||
aarch64_tcancel(IMM16);
|
||||
}
|
||||
|
||||
/// Tests if executing inside a transaction. If no transaction is currently executing,
|
||||
/// the return value is 0. Otherwise, this intrinsic returns the depth of the transaction.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ttest))]
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub unsafe fn __ttest() -> u64 {
|
||||
aarch64_ttest()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::aarch64::*;
|
||||
|
||||
const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_tstart() {
|
||||
let mut x = 0;
|
||||
for i in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
x += 1;
|
||||
assert_eq!(x, i + 1);
|
||||
break;
|
||||
}
|
||||
assert_eq!(x, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_tcommit() {
|
||||
let mut x = 0;
|
||||
for i in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
x += 1;
|
||||
assert_eq!(x, i + 1);
|
||||
tme::__tcommit();
|
||||
}
|
||||
assert_eq!(x, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_tcancel() {
|
||||
let mut x = 0;
|
||||
|
||||
for i in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
x += 1;
|
||||
assert_eq!(x, i + 1);
|
||||
tme::__tcancel::<CANCEL_CODE>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(x, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_ttest() {
|
||||
for _ in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
if tme::__ttest() == 2 {
|
||||
tme::__tcancel::<CANCEL_CODE>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
390
library/stdarch/crates/core_arch/src/arm/dsp.rs
Normal file
390
library/stdarch/crates/core_arch/src/arm/dsp.rs
Normal file
|
|
@ -0,0 +1,390 @@
|
|||
//! # References:
|
||||
//!
|
||||
//! - Section 8.3 "16-bit multiplications"
|
||||
//!
|
||||
//! Intrinsics that could live here:
|
||||
//!
|
||||
//! - \[x\] __smulbb
|
||||
//! - \[x\] __smulbt
|
||||
//! - \[x\] __smultb
|
||||
//! - \[x\] __smultt
|
||||
//! - \[x\] __smulwb
|
||||
//! - \[x\] __smulwt
|
||||
//! - \[x\] __qadd
|
||||
//! - \[x\] __qsub
|
||||
//! - \[x\] __qdbl
|
||||
//! - \[x\] __smlabb
|
||||
//! - \[x\] __smlabt
|
||||
//! - \[x\] __smlatb
|
||||
//! - \[x\] __smlatt
|
||||
//! - \[x\] __smlawb
|
||||
//! - \[x\] __smlawt
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.arm.smulbb"]
|
||||
fn arm_smulbb(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smulbt"]
|
||||
fn arm_smulbt(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smultb"]
|
||||
fn arm_smultb(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smultt"]
|
||||
fn arm_smultt(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smulwb"]
|
||||
fn arm_smulwb(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smulwt"]
|
||||
fn arm_smulwt(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qadd"]
|
||||
fn arm_qadd(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qsub"]
|
||||
fn arm_qsub(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlabb"]
|
||||
fn arm_smlabb(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlabt"]
|
||||
fn arm_smlabt(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlatb"]
|
||||
fn arm_smlatb(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlatt"]
|
||||
fn arm_smlatt(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlawb"]
|
||||
fn arm_smlawb(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlawt"]
|
||||
fn arm_smlawt(a: i32, b: i32, c: i32) -> i32;
|
||||
}
|
||||
|
||||
/// Insert a SMULBB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[0\] * b\[0\]
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smulbb))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smulbb(a: i32, b: i32) -> i32 {
|
||||
arm_smulbb(a, b)
|
||||
}
|
||||
|
||||
/// Insert a SMULTB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[0\] * b\[1\]
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smultb))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smultb(a: i32, b: i32) -> i32 {
|
||||
arm_smultb(a, b)
|
||||
}
|
||||
|
||||
/// Insert a SMULTB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[1\] * b\[0\]
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smulbt))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smulbt(a: i32, b: i32) -> i32 {
|
||||
arm_smulbt(a, b)
|
||||
}
|
||||
|
||||
/// Insert a SMULTT instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[1\] * b\[1\]
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smultt))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smultt(a: i32, b: i32) -> i32 {
|
||||
arm_smultt(a, b)
|
||||
}
|
||||
|
||||
/// Insert a SMULWB instruction
|
||||
///
|
||||
/// Multiplies the 32-bit signed first operand with the low halfword
|
||||
/// (as a 16-bit signed integer) of the second operand.
|
||||
/// Return the top 32 bits of the 48-bit product
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smulwb))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smulwb(a: i32, b: i32) -> i32 {
|
||||
arm_smulwb(a, b)
|
||||
}
|
||||
|
||||
/// Insert a SMULWT instruction
|
||||
///
|
||||
/// Multiplies the 32-bit signed first operand with the high halfword
|
||||
/// (as a 16-bit signed integer) of the second operand.
|
||||
/// Return the top 32 bits of the 48-bit product
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smulwt))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smulwt(a: i32, b: i32) -> i32 {
|
||||
arm_smulwt(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating addition
|
||||
///
|
||||
/// Returns the 32-bit saturating signed equivalent of a + b.
|
||||
/// Sets the Q flag if saturation occurs.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qadd))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __qadd(a: i32, b: i32) -> i32 {
|
||||
arm_qadd(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating subtraction
|
||||
///
|
||||
/// Returns the 32-bit saturating signed equivalent of a - b.
|
||||
/// Sets the Q flag if saturation occurs.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qsub))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __qsub(a: i32, b: i32) -> i32 {
|
||||
arm_qsub(a, b)
|
||||
}
|
||||
|
||||
/// Insert a QADD instruction
|
||||
///
|
||||
/// Returns the 32-bit saturating signed equivalent of a + a
|
||||
/// Sets the Q flag if saturation occurs.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qadd))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __qdbl(a: i32) -> i32 {
|
||||
arm_qadd(a, a)
|
||||
}
|
||||
|
||||
/// Insert a SMLABB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[0\] * b\[0\] + c
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlabb))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smlabb(a: i32, b: i32, c: i32) -> i32 {
|
||||
arm_smlabb(a, b, c)
|
||||
}
|
||||
|
||||
/// Insert a SMLABT instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[0\] * b\[1\] + c
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlabt))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smlabt(a: i32, b: i32, c: i32) -> i32 {
|
||||
arm_smlabt(a, b, c)
|
||||
}
|
||||
|
||||
/// Insert a SMLATB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[1\] * b\[0\] + c
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlatb))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smlatb(a: i32, b: i32, c: i32) -> i32 {
|
||||
arm_smlatb(a, b, c)
|
||||
}
|
||||
|
||||
/// Insert a SMLATT instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[1\] * b\[1\] + c
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlatt))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smlatt(a: i32, b: i32, c: i32) -> i32 {
|
||||
arm_smlatt(a, b, c)
|
||||
}
|
||||
|
||||
/// Insert a SMLAWB instruction
|
||||
///
|
||||
/// Returns the equivalent of (a * b\[0\] + (c << 16)) >> 16
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlawb))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smlawb(a: i32, b: i32, c: i32) -> i32 {
|
||||
arm_smlawb(a, b, c)
|
||||
}
|
||||
|
||||
/// Insert a SMLAWT instruction
|
||||
///
|
||||
/// Returns the equivalent of (a * b\[1\] + (c << 16)) >> 16
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlawt))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smlawt(a: i32, b: i32, c: i32) -> i32 {
|
||||
arm_smlawt(a, b, c)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::{
|
||||
arm::*,
|
||||
simd::{i8x4, i16x2, u8x4},
|
||||
};
|
||||
use std::mem::transmute;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[test]
|
||||
fn smulbb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
assert_eq!(super::__smulbb(transmute(a), transmute(b)), 10 * 30);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smulbt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
assert_eq!(super::__smulbt(transmute(a), transmute(b)), 10 * 40);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smultb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
assert_eq!(super::__smultb(transmute(a), transmute(b)), 20 * 30);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smultt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
assert_eq!(super::__smultt(transmute(a), transmute(b)), 20 * 40);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smulwb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = 30;
|
||||
assert_eq!(super::__smulwb(transmute(a), b), 20 * b);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smulwt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = 30;
|
||||
assert_eq!(super::__smulwt(transmute(a), b), (10 * b) >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qadd() {
|
||||
unsafe {
|
||||
assert_eq!(super::__qadd(-10, 60), 50);
|
||||
assert_eq!(super::__qadd(i32::MAX, 10), i32::MAX);
|
||||
assert_eq!(super::__qadd(i32::MIN, -10), i32::MIN);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qsub() {
|
||||
unsafe {
|
||||
assert_eq!(super::__qsub(10, 60), -50);
|
||||
assert_eq!(super::__qsub(i32::MAX, -10), i32::MAX);
|
||||
assert_eq!(super::__qsub(i32::MIN, 10), i32::MIN);
|
||||
}
|
||||
}
|
||||
|
||||
fn qdbl() {
|
||||
unsafe {
|
||||
assert_eq!(super::__qdbl(10), 20);
|
||||
assert_eq!(super::__qdbl(i32::MAX), i32::MAX);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlabb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
let c = 50;
|
||||
let r = (10 * 30) + c;
|
||||
assert_eq!(super::__smlabb(transmute(a), transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlabt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
let c = 50;
|
||||
let r = (10 * 40) + c;
|
||||
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlatb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
let c = 50;
|
||||
let r = (20 * 30) + c;
|
||||
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlatt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
let c = 50;
|
||||
let r = (20 * 40) + c;
|
||||
assert_eq!(super::__smlatt(transmute(a), transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlawb() {
|
||||
unsafe {
|
||||
let a: i32 = 10;
|
||||
let b = i16x2::new(30, 40);
|
||||
let c: i32 = 50;
|
||||
let r: i32 = ((a * 30) + (c << 16)) >> 16;
|
||||
assert_eq!(super::__smlawb(a, transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlawt() {
|
||||
unsafe {
|
||||
let a: i32 = 10;
|
||||
let b = i16x2::new(30, 40);
|
||||
let c: i32 = 50;
|
||||
let r: i32 = ((a * 40) + (c << 16)) >> 16;
|
||||
assert_eq!(super::__smlawt(a, transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
}
|
||||
66
library/stdarch/crates/core_arch/src/arm/mod.rs
Normal file
66
library/stdarch/crates/core_arch/src/arm/mod.rs
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
//! ARM intrinsics.
|
||||
//!
|
||||
//! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
|
||||
//! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
|
||||
//!
|
||||
//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
|
||||
//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
|
||||
|
||||
// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT)
|
||||
#[cfg(any(target_feature = "v6", doc))]
|
||||
mod sat;
|
||||
|
||||
#[cfg(any(target_feature = "v6", doc))]
|
||||
#[unstable(feature = "stdarch_arm_sat", issue = "none")]
|
||||
pub use self::sat::*;
|
||||
|
||||
// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD)
|
||||
// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see
|
||||
// section 5.4.7)
|
||||
// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on
|
||||
// '+v5te' rather than on '+dsp'
|
||||
#[cfg(any(
|
||||
// >= v5TE but excludes v7-M
|
||||
all(target_feature = "v5te", not(target_feature = "mclass")),
|
||||
// v7E-M
|
||||
all(target_feature = "mclass", target_feature = "dsp"),
|
||||
doc,
|
||||
))]
|
||||
mod dsp;
|
||||
|
||||
#[cfg(any(
|
||||
// >= v5TE but excludes v7-M
|
||||
all(target_feature = "v5te", not(target_feature = "mclass")),
|
||||
// v7E-M
|
||||
all(target_feature = "mclass", target_feature = "dsp"),
|
||||
doc,
|
||||
))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub use self::dsp::*;
|
||||
|
||||
// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says
|
||||
// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated
|
||||
#[cfg(any(
|
||||
// v7-A, v7-R
|
||||
all(target_feature = "v6", not(target_feature = "mclass")),
|
||||
// v7E-M
|
||||
all(target_feature = "mclass", target_feature = "dsp"),
|
||||
doc,
|
||||
))]
|
||||
mod simd32;
|
||||
|
||||
#[cfg(any(
|
||||
// v7-A, v7-R
|
||||
all(target_feature = "v6", not(target_feature = "mclass")),
|
||||
// v7E-M
|
||||
all(target_feature = "mclass", target_feature = "dsp"),
|
||||
doc,
|
||||
))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub use self::simd32::*;
|
||||
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
pub use crate::core_arch::arm_shared::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
136
library/stdarch/crates/core_arch/src/arm/neon.rs
Normal file
136
library/stdarch/crates/core_arch/src/arm/neon.rs
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
use crate::core_arch::arm_shared::neon::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.arm.neon.vbsl.v8i8"]
|
||||
fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
|
||||
#[link_name = "llvm.arm.neon.vbsl.v16i8"]
|
||||
fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
|
||||
}
|
||||
|
||||
#[doc = "Shift Left and Insert (immediate)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"]
|
||||
#[doc = "## Safety"]
|
||||
#[doc = " * Neon instrinsic unsafe"]
|
||||
#[inline]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[target_feature(enable = "neon,v7,aes")]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
|
||||
static_assert!(0 <= N && N <= 63);
|
||||
transmute(vshiftins_v1i64(
|
||||
transmute(a),
|
||||
transmute(b),
|
||||
int64x1_t::splat(N as i64),
|
||||
))
|
||||
}
|
||||
|
||||
#[doc = "Shift Left and Insert (immediate)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
|
||||
#[doc = "## Safety"]
|
||||
#[doc = " * Neon instrinsic unsafe"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[target_feature(enable = "neon,v7,aes")]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
|
||||
static_assert!(0 <= N && N <= 63);
|
||||
transmute(vshiftins_v2i64(
|
||||
transmute(a),
|
||||
transmute(b),
|
||||
int64x2_t::splat(N as i64),
|
||||
))
|
||||
}
|
||||
|
||||
#[doc = "Shift Left and Insert (immediate)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
|
||||
#[doc = "## Safety"]
|
||||
#[doc = " * Neon instrinsic unsafe"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[target_feature(enable = "neon,v7,aes")]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
|
||||
static_assert!(0 <= N && N <= 63);
|
||||
let a: poly64x2_t = simd_shuffle!(a, a, [0, 1]);
|
||||
let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
|
||||
let ret_val: poly64x2_t = transmute(vshiftins_v2i64(
|
||||
transmute(a),
|
||||
transmute(b),
|
||||
int64x2_t::splat(N as i64),
|
||||
));
|
||||
simd_shuffle!(ret_val, ret_val, [0, 1])
|
||||
}
|
||||
|
||||
#[doc = "Shift Right and Insert (immediate)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)"]
|
||||
#[doc = "## Safety"]
|
||||
#[doc = " * Neon instrinsic unsafe"]
|
||||
#[inline]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[target_feature(enable = "neon,v7,aes")]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
|
||||
static_assert!(1 <= N && N <= 64);
|
||||
transmute(vshiftins_v1i64(
|
||||
transmute(a),
|
||||
transmute(b),
|
||||
int64x1_t::splat(-N as i64),
|
||||
))
|
||||
}
|
||||
|
||||
#[doc = "Shift Right and Insert (immediate)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"]
|
||||
#[doc = "## Safety"]
|
||||
#[doc = " * Neon instrinsic unsafe"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[target_feature(enable = "neon,v7,aes")]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
|
||||
static_assert!(1 <= N && N <= 64);
|
||||
transmute(vshiftins_v2i64(
|
||||
transmute(a),
|
||||
transmute(b),
|
||||
int64x2_t::splat(-N as i64),
|
||||
))
|
||||
}
|
||||
|
||||
#[doc = "Shift Right and Insert (immediate)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"]
|
||||
#[doc = "## Safety"]
|
||||
#[doc = " * Neon instrinsic unsafe"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[target_feature(enable = "neon,v7,aes")]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
|
||||
static_assert!(1 <= N && N <= 64);
|
||||
let a: poly64x2_t = simd_shuffle!(a, a, [0, 1]);
|
||||
let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
|
||||
let ret_val: poly64x2_t = transmute(vshiftins_v2i64(
|
||||
transmute(a),
|
||||
transmute(b),
|
||||
int64x2_t::splat(-N as i64),
|
||||
));
|
||||
simd_shuffle!(ret_val, ret_val, [0, 1])
|
||||
}
|
||||
62
library/stdarch/crates/core_arch/src/arm/sat.rs
Normal file
62
library/stdarch/crates/core_arch/src/arm/sat.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
//! # References:
|
||||
//!
|
||||
//! - Section 8.4 "Saturating intrinsics"
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Saturates a 32-bit signed integer to a signed integer with a given
|
||||
/// bit width.
|
||||
#[unstable(feature = "stdarch_arm_sat", issue = "none")]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr("ssat", WIDTH = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn __ssat<const WIDTH: u32>(x: i32) -> i32 {
|
||||
static_assert!(matches!(WIDTH, 1..=32));
|
||||
arm_ssat(x, WIDTH as i32)
|
||||
}
|
||||
|
||||
/// Saturates a 32-bit signed integer to an unsigned integer with a given
|
||||
/// bit width.
|
||||
#[unstable(feature = "stdarch_arm_sat", issue = "none")]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr("usat", WIDTH = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn __usat<const WIDTH: u32>(x: i32) -> u32 {
|
||||
static_assert!(matches!(WIDTH, 1..=32));
|
||||
arm_usat(x, WIDTH as i32)
|
||||
}
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.arm.ssat"]
|
||||
fn arm_ssat(x: i32, y: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.usat"]
|
||||
fn arm_usat(x: i32, y: i32) -> u32;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[test]
|
||||
fn test_ssat() {
|
||||
unsafe {
|
||||
assert_eq!(__ssat::<8>(1), 1);
|
||||
assert_eq!(__ssat::<8>(1000), 127);
|
||||
assert_eq!(__ssat::<8>(-1), -1);
|
||||
assert_eq!(__ssat::<8>(-1000), -128);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_usat() {
|
||||
unsafe {
|
||||
assert_eq!(__usat::<8>(1), 1);
|
||||
assert_eq!(__usat::<8>(1000), 255);
|
||||
assert_eq!(__usat::<8>(-1), 0);
|
||||
assert_eq!(__usat::<8>(-1000), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
765
library/stdarch/crates/core_arch/src/arm/simd32.rs
Normal file
765
library/stdarch/crates/core_arch/src/arm/simd32.rs
Normal file
|
|
@ -0,0 +1,765 @@
|
|||
//! # References
|
||||
//!
|
||||
//! - Section 8.5 "32-bit SIMD intrinsics" of ACLE
|
||||
//!
|
||||
//! Intrinsics that could live here
|
||||
//!
|
||||
//! - \[x\] __sel
|
||||
//! - \[ \] __ssat16
|
||||
//! - \[ \] __usat16
|
||||
//! - \[ \] __sxtab16
|
||||
//! - \[ \] __sxtb16
|
||||
//! - \[ \] __uxtab16
|
||||
//! - \[ \] __uxtb16
|
||||
//! - \[x\] __qadd8
|
||||
//! - \[x\] __qsub8
|
||||
//! - \[x\] __sadd8
|
||||
//! - \[x\] __shadd8
|
||||
//! - \[x\] __shsub8
|
||||
//! - \[x\] __ssub8
|
||||
//! - \[ \] __uadd8
|
||||
//! - \[ \] __uhadd8
|
||||
//! - \[ \] __uhsub8
|
||||
//! - \[ \] __uqadd8
|
||||
//! - \[ \] __uqsub8
|
||||
//! - \[x\] __usub8
|
||||
//! - \[x\] __usad8
|
||||
//! - \[x\] __usada8
|
||||
//! - \[x\] __qadd16
|
||||
//! - \[x\] __qasx
|
||||
//! - \[x\] __qsax
|
||||
//! - \[x\] __qsub16
|
||||
//! - \[x\] __sadd16
|
||||
//! - \[x\] __sasx
|
||||
//! - \[x\] __shadd16
|
||||
//! - \[ \] __shasx
|
||||
//! - \[ \] __shsax
|
||||
//! - \[x\] __shsub16
|
||||
//! - \[ \] __ssax
|
||||
//! - \[ \] __ssub16
|
||||
//! - \[ \] __uadd16
|
||||
//! - \[ \] __uasx
|
||||
//! - \[ \] __uhadd16
|
||||
//! - \[ \] __uhasx
|
||||
//! - \[ \] __uhsax
|
||||
//! - \[ \] __uhsub16
|
||||
//! - \[ \] __uqadd16
|
||||
//! - \[ \] __uqasx
|
||||
//! - \[x\] __uqsax
|
||||
//! - \[ \] __uqsub16
|
||||
//! - \[ \] __usax
|
||||
//! - \[ \] __usub16
|
||||
//! - \[x\] __smlad
|
||||
//! - \[ \] __smladx
|
||||
//! - \[ \] __smlald
|
||||
//! - \[ \] __smlaldx
|
||||
//! - \[x\] __smlsd
|
||||
//! - \[ \] __smlsdx
|
||||
//! - \[ \] __smlsld
|
||||
//! - \[ \] __smlsldx
|
||||
//! - \[x\] __smuad
|
||||
//! - \[x\] __smuadx
|
||||
//! - \[x\] __smusd
|
||||
//! - \[x\] __smusdx
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
use crate::mem::transmute;
|
||||
|
||||
/// ARM-specific vector of four packed `i8` packed into a 32-bit integer.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub type int8x4_t = i32;
|
||||
|
||||
/// ARM-specific vector of four packed `u8` packed into a 32-bit integer.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub type uint8x4_t = u32;
|
||||
|
||||
/// ARM-specific vector of two packed `i16` packed into a 32-bit integer.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub type int16x2_t = i32;
|
||||
|
||||
/// ARM-specific vector of two packed `u16` packed into a 32-bit integer.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub type uint16x2_t = u32;
|
||||
|
||||
macro_rules! dsp_call {
|
||||
($name:expr, $a:expr, $b:expr) => {
|
||||
transmute($name(transmute($a), transmute($b)))
|
||||
};
|
||||
}
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.arm.qadd8"]
|
||||
fn arm_qadd8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qsub8"]
|
||||
fn arm_qsub8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qsub16"]
|
||||
fn arm_qsub16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qadd16"]
|
||||
fn arm_qadd16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qasx"]
|
||||
fn arm_qasx(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qsax"]
|
||||
fn arm_qsax(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.sadd16"]
|
||||
fn arm_sadd16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.sadd8"]
|
||||
fn arm_sadd8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlad"]
|
||||
fn arm_smlad(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlsd"]
|
||||
fn arm_smlsd(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.sasx"]
|
||||
fn arm_sasx(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.sel"]
|
||||
fn arm_sel(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.shadd8"]
|
||||
fn arm_shadd8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.shadd16"]
|
||||
fn arm_shadd16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.shsub8"]
|
||||
fn arm_shsub8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.ssub8"]
|
||||
fn arm_ssub8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.usub8"]
|
||||
fn arm_usub8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.shsub16"]
|
||||
fn arm_shsub16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smuad"]
|
||||
fn arm_smuad(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smuadx"]
|
||||
fn arm_smuadx(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smusd"]
|
||||
fn arm_smusd(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smusdx"]
|
||||
fn arm_smusdx(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.usad8"]
|
||||
fn arm_usad8(a: i32, b: i32) -> u32;
|
||||
}
|
||||
|
||||
/// Saturating four 8-bit integer additions
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[0\]
|
||||
/// res\[1\] = a\[1\] + b\[1\]
|
||||
/// res\[2\] = a\[2\] + b\[2\]
|
||||
/// res\[3\] = a\[3\] + b\[3\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qadd8))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_qadd8, a, b)
|
||||
}
|
||||
|
||||
/// Saturating two 8-bit integer subtraction
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - b\[0\]
|
||||
/// res\[1\] = a\[1\] - b\[1\]
|
||||
/// res\[2\] = a\[2\] - b\[2\]
|
||||
/// res\[3\] = a\[3\] - b\[3\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qsub8))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_qsub8, a, b)
|
||||
}
|
||||
|
||||
/// Saturating two 16-bit integer subtraction
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - b\[0\]
|
||||
/// res\[1\] = a\[1\] - b\[1\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qsub16))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_qsub16, a, b)
|
||||
}
|
||||
|
||||
/// Saturating two 16-bit integer additions
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[0\]
|
||||
/// res\[1\] = a\[1\] + b\[1\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qadd16))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_qadd16, a, b)
|
||||
}
|
||||
|
||||
/// Returns the 16-bit signed saturated equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - b\[1\]
|
||||
/// res\[1\] = a\[1\] + b\[0\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qasx))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_qasx, a, b)
|
||||
}
|
||||
|
||||
/// Returns the 16-bit signed saturated equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[1\]
|
||||
/// res\[1\] = a\[1\] - b\[0\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qsax))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_qsax, a, b)
|
||||
}
|
||||
|
||||
/// Returns the 16-bit signed saturated equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[1\]
|
||||
/// res\[1\] = a\[1\] + b\[0\]
|
||||
///
|
||||
/// and the GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(sadd16))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_sadd16, a, b)
|
||||
}
|
||||
|
||||
/// Returns the 8-bit signed saturated equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[1\]
|
||||
/// res\[1\] = a\[1\] + b\[0\]
|
||||
/// res\[2\] = a\[2\] + b\[2\]
|
||||
/// res\[3\] = a\[3\] + b\[3\]
|
||||
///
|
||||
/// and the GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(sadd8))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_sadd8, a, b)
|
||||
}
|
||||
|
||||
/// Dual 16-bit Signed Multiply with Addition of products
|
||||
/// and 32-bit accumulation.
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlad))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlad(transmute(a), transmute(b), c)
|
||||
}
|
||||
|
||||
/// Dual 16-bit Signed Multiply with Subtraction of products
|
||||
/// and 32-bit accumulation and overflow detection.
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlsd))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlsd(transmute(a), transmute(b), c)
|
||||
}
|
||||
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - b\[1\]
|
||||
/// res\[1\] = a\[1\] + b\[0\]
|
||||
///
|
||||
/// and the GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(sasx))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_sasx, a, b)
|
||||
}
|
||||
|
||||
/// Select bytes from each operand according to APSR GE flags
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res\[0\] = GE\[0\] ? a\[0\] : b\[0\]
|
||||
/// res\[1\] = GE\[1\] ? a\[1\] : b\[1\]
|
||||
/// res\[2\] = GE\[2\] ? a\[2\] : b\[2\]
|
||||
/// res\[3\] = GE\[3\] ? a\[3\] : b\[3\]
|
||||
///
|
||||
/// where GE are bits of APSR
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(sel))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __sel(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_sel, a, b)
|
||||
}
|
||||
|
||||
/// Signed halving parallel byte-wise addition.
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = (a\[0\] + b\[0\]) / 2
|
||||
/// res\[1\] = (a\[1\] + b\[1\]) / 2
|
||||
/// res\[2\] = (a\[2\] + b\[2\]) / 2
|
||||
/// res\[3\] = (a\[3\] + b\[3\]) / 2
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(shadd8))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_shadd8, a, b)
|
||||
}
|
||||
|
||||
/// Signed halving parallel halfword-wise addition.
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = (a\[0\] + b\[0\]) / 2
|
||||
/// res\[1\] = (a\[1\] + b\[1\]) / 2
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(shadd16))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_shadd16, a, b)
|
||||
}
|
||||
|
||||
/// Signed halving parallel byte-wise subtraction.
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = (a\[0\] - b\[0\]) / 2
|
||||
/// res\[1\] = (a\[1\] - b\[1\]) / 2
|
||||
/// res\[2\] = (a\[2\] - b\[2\]) / 2
|
||||
/// res\[3\] = (a\[3\] - b\[3\]) / 2
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(shsub8))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_shsub8, a, b)
|
||||
}
|
||||
|
||||
/// Inserts a `USUB8` instruction.
|
||||
///
|
||||
/// Returns the 8-bit unsigned equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - a\[0\]
|
||||
/// res\[1\] = a\[1\] - a\[1\]
|
||||
/// res\[2\] = a\[2\] - a\[2\]
|
||||
/// res\[3\] = a\[3\] - a\[3\]
|
||||
///
|
||||
/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
|
||||
/// The GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(usub8))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t {
|
||||
dsp_call!(arm_usub8, a, b)
|
||||
}
|
||||
|
||||
/// Inserts a `SSUB8` instruction.
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - a\[0\]
|
||||
/// res\[1\] = a\[1\] - a\[1\]
|
||||
/// res\[2\] = a\[2\] - a\[2\]
|
||||
/// res\[3\] = a\[3\] - a\[3\]
|
||||
///
|
||||
/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
|
||||
/// The GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(ssub8))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __ssub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_ssub8, a, b)
|
||||
}
|
||||
|
||||
/// Signed halving parallel halfword-wise subtraction.
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = (a\[0\] - b\[0\]) / 2
|
||||
/// res\[1\] = (a\[1\] - b\[1\]) / 2
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(shsub16))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_shsub16, a, b)
|
||||
}
|
||||
|
||||
/// Signed Dual Multiply Add.
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\]
|
||||
///
|
||||
/// and sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smuad))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smuad(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smuad(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Signed Dual Multiply Add Reversed.
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res = a\[0\] * b\[1\] + a\[1\] * b\[0\]
|
||||
///
|
||||
/// and sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smuadx))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smuadx(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smuadx(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Signed Dual Multiply Subtract.
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\]
|
||||
///
|
||||
/// and sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smusd))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smusd(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smusd(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Signed Dual Multiply Subtract Reversed.
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res = a\[0\] * b\[1\] - a\[1\] * b\[0\]
|
||||
///
|
||||
/// and sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smusdx))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __smusdx(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smusdx(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Sum of 8-bit absolute differences.
|
||||
///
|
||||
/// Returns the 8-bit unsigned equivalent of
|
||||
///
|
||||
/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
|
||||
/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\])
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(usad8))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __usad8(a: int8x4_t, b: int8x4_t) -> u32 {
|
||||
arm_usad8(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Sum of 8-bit absolute differences and constant.
|
||||
///
|
||||
/// Returns the 8-bit unsigned equivalent of
|
||||
///
|
||||
/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
|
||||
/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(usad8))]
|
||||
#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
|
||||
pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 {
|
||||
__usad8(a, b) + c
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::simd::{i8x4, i16x2, u8x4};
|
||||
use std::mem::transmute;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[test]
|
||||
fn qadd8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, i8::MAX);
|
||||
let b = i8x4::new(2, -1, 0, 1);
|
||||
let c = i8x4::new(3, 1, 3, i8::MAX);
|
||||
let r: i8x4 = dsp_call!(super::__qadd8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qsub8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, i8::MIN);
|
||||
let b = i8x4::new(2, -1, 0, 1);
|
||||
let c = i8x4::new(-1, 3, 3, i8::MIN);
|
||||
let r: i8x4 = dsp_call!(super::__qsub8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qadd16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(2, -1);
|
||||
let c = i16x2::new(3, 1);
|
||||
let r: i16x2 = dsp_call!(super::__qadd16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qsub16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(20, -10);
|
||||
let c = i16x2::new(-10, 30);
|
||||
let r: i16x2 = dsp_call!(super::__qsub16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qasx() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, i16::MAX);
|
||||
let b = i16x2::new(2, 2);
|
||||
let c = i16x2::new(-1, i16::MAX);
|
||||
let r: i16x2 = dsp_call!(super::__qasx, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qsax() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, i16::MAX);
|
||||
let b = i16x2::new(2, 2);
|
||||
let c = i16x2::new(3, i16::MAX - 2);
|
||||
let r: i16x2 = dsp_call!(super::__qsax, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sadd16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, i16::MAX);
|
||||
let b = i16x2::new(2, 2);
|
||||
let c = i16x2::new(3, -i16::MAX);
|
||||
let r: i16x2 = dsp_call!(super::__sadd16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sadd8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, i8::MAX);
|
||||
let b = i8x4::new(4, 3, 2, 2);
|
||||
let c = i8x4::new(5, 5, 5, -i8::MAX);
|
||||
let r: i8x4 = dsp_call!(super::__sadd8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sasx() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(2, 1);
|
||||
let c = i16x2::new(0, 4);
|
||||
let r: i16x2 = dsp_call!(super::__sasx, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smlad() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(3, 4);
|
||||
let r = super::__smlad(transmute(a), transmute(b), 10);
|
||||
assert_eq!(r, (1 * 3) + (2 * 4) + 10);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smlsd() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(3, 4);
|
||||
let r = super::__smlsd(transmute(a), transmute(b), 10);
|
||||
assert_eq!(r, ((1 * 3) - (2 * 4)) + 10);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sel() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, i8::MAX);
|
||||
let b = i8x4::new(4, 3, 2, 2);
|
||||
// call sadd8() to set GE bits
|
||||
super::__sadd8(transmute(a), transmute(b));
|
||||
let c = i8x4::new(1, 2, 3, i8::MAX);
|
||||
let r: i8x4 = dsp_call!(super::__sel, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shadd8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(5, 4, 3, 2);
|
||||
let c = i8x4::new(3, 3, 3, 3);
|
||||
let r: i8x4 = dsp_call!(super::__shadd8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shadd16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let c = i16x2::new(3, 3);
|
||||
let r: i16x2 = dsp_call!(super::__shadd16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shsub8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(5, 4, 3, 2);
|
||||
let c = i8x4::new(-2, -1, 0, 1);
|
||||
let r: i8x4 = dsp_call!(super::__shsub8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssub8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(5, 4, 3, 2);
|
||||
let c = i8x4::new(-4, -2, 0, 2);
|
||||
let r: i8x4 = dsp_call!(super::__ssub8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn usub8() {
|
||||
unsafe {
|
||||
let a = u8x4::new(1, 2, 3, 4);
|
||||
let b = u8x4::new(5, 4, 3, 2);
|
||||
let c = u8x4::new(252, 254, 0, 2);
|
||||
let r: u8x4 = dsp_call!(super::__usub8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shsub16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let c = i16x2::new(-2, -1);
|
||||
let r: i16x2 = dsp_call!(super::__shsub16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smuad() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let r = super::__smuad(transmute(a), transmute(b));
|
||||
assert_eq!(r, 13);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smuadx() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let r = super::__smuadx(transmute(a), transmute(b));
|
||||
assert_eq!(r, 14);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smusd() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let r = super::__smusd(transmute(a), transmute(b));
|
||||
assert_eq!(r, -3);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smusdx() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let r = super::__smusdx(transmute(a), transmute(b));
|
||||
assert_eq!(r, -6);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn usad8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(4, 3, 2, 1);
|
||||
let r = super::__usad8(transmute(a), transmute(b));
|
||||
assert_eq!(r, 8);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn usad8a() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(4, 3, 2, 1);
|
||||
let c = 10;
|
||||
let r = super::__usada8(transmute(a), transmute(b), c);
|
||||
assert_eq!(r, 8 + c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
//! Access types available on all architectures
|
||||
|
||||
/// Full system is the required shareability domain, reads and writes are the
|
||||
/// required access types
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct SY;
|
||||
|
||||
dmb_dsb!(SY);
|
||||
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
impl super::super::sealed::Isb for SY {
|
||||
#[inline(always)]
|
||||
unsafe fn __isb(&self) {
|
||||
super::isb(super::arg::SY)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
// Reference: ARM11 MPCore Processor Technical Reference Manual (ARM DDI 0360E) Section 3.5 "Summary
|
||||
// of CP15 instructions"
|
||||
|
||||
use crate::arch::asm;
|
||||
|
||||
/// Full system is the required shareability domain, reads and writes are the
|
||||
/// required access types
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct SY;
|
||||
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
impl super::super::sealed::Dmb for SY {
|
||||
#[inline(always)]
|
||||
unsafe fn __dmb(&self) {
|
||||
asm!(
|
||||
"mcr p15, 0, {}, c7, c10, 5",
|
||||
in(reg) 0_u32,
|
||||
options(preserves_flags, nostack)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
impl super::super::sealed::Dsb for SY {
|
||||
#[inline(always)]
|
||||
unsafe fn __dsb(&self) {
|
||||
asm!(
|
||||
"mcr p15, 0, {}, c7, c10, 4",
|
||||
in(reg) 0_u32,
|
||||
options(preserves_flags, nostack)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
impl super::super::sealed::Isb for SY {
|
||||
#[inline(always)]
|
||||
unsafe fn __isb(&self) {
|
||||
asm!(
|
||||
"mcr p15, 0, {}, c7, c5, 4",
|
||||
in(reg) 0_u32,
|
||||
options(preserves_flags, nostack)
|
||||
)
|
||||
}
|
||||
}
|
||||
185
library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
Normal file
185
library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
// Reference: Section 7.4 "Hints" of ACLE
|
||||
|
||||
// CP15 instruction
|
||||
#[cfg(not(any(
|
||||
// v8
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
// v7
|
||||
target_feature = "v7",
|
||||
// v6-M
|
||||
target_feature = "mclass"
|
||||
)))]
|
||||
mod cp15;
|
||||
|
||||
#[cfg(not(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
target_feature = "v7",
|
||||
target_feature = "mclass"
|
||||
)))]
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub use self::cp15::*;
|
||||
|
||||
// Dedicated instructions
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
target_feature = "v7",
|
||||
target_feature = "mclass"
|
||||
))]
|
||||
macro_rules! dmb_dsb {
|
||||
($A:ident) => {
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
impl super::super::sealed::Dmb for $A {
|
||||
#[inline(always)]
|
||||
unsafe fn __dmb(&self) {
|
||||
super::dmb(super::arg::$A)
|
||||
}
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
impl super::super::sealed::Dsb for $A {
|
||||
#[inline(always)]
|
||||
unsafe fn __dsb(&self) {
|
||||
super::dsb(super::arg::$A)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
target_feature = "v7",
|
||||
target_feature = "mclass"
|
||||
))]
|
||||
mod common;
|
||||
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
target_feature = "v7",
|
||||
target_feature = "mclass"
|
||||
))]
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub use self::common::*;
|
||||
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
target_feature = "v7",
|
||||
))]
|
||||
mod not_mclass;
|
||||
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
target_feature = "v7",
|
||||
))]
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub use self::not_mclass::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
|
||||
mod v8;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub use self::v8::*;
|
||||
|
||||
/// Generates a DMB (data memory barrier) instruction or equivalent CP15 instruction.
|
||||
///
|
||||
/// DMB ensures the observed ordering of memory accesses. Memory accesses of the specified type
|
||||
/// issued before the DMB are guaranteed to be observed (in the specified scope) before memory
|
||||
/// accesses issued after the DMB.
|
||||
///
|
||||
/// For example, DMB should be used between storing data, and updating a flag variable that makes
|
||||
/// that data available to another core.
|
||||
///
|
||||
/// The __dmb() intrinsic also acts as a compiler memory barrier of the appropriate type.
|
||||
#[inline(always)]
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub unsafe fn __dmb<A>(arg: A)
|
||||
where
|
||||
A: super::sealed::Dmb,
|
||||
{
|
||||
arg.__dmb()
|
||||
}
|
||||
|
||||
/// Generates a DSB (data synchronization barrier) instruction or equivalent CP15 instruction.
|
||||
///
|
||||
/// DSB ensures the completion of memory accesses. A DSB behaves as the equivalent DMB and has
|
||||
/// additional properties. After a DSB instruction completes, all memory accesses of the specified
|
||||
/// type issued before the DSB are guaranteed to have completed.
|
||||
///
|
||||
/// The __dsb() intrinsic also acts as a compiler memory barrier of the appropriate type.
|
||||
#[inline(always)]
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub unsafe fn __dsb<A>(arg: A)
|
||||
where
|
||||
A: super::sealed::Dsb,
|
||||
{
|
||||
arg.__dsb()
|
||||
}
|
||||
|
||||
/// Generates an ISB (instruction synchronization barrier) instruction or equivalent CP15
|
||||
/// instruction.
|
||||
///
|
||||
/// This instruction flushes the processor pipeline fetch buffers, so that following instructions
|
||||
/// are fetched from cache or memory.
|
||||
///
|
||||
/// An ISB is needed after some system maintenance operations. An ISB is also needed before
|
||||
/// transferring control to code that has been loaded or modified in memory, for example by an
|
||||
/// overlay mechanism or just-in-time code generator. (Note that if instruction and data caches are
|
||||
/// separate, privileged cache maintenance operations would be needed in order to unify the caches.)
|
||||
///
|
||||
/// The only supported argument for the __isb() intrinsic is 15, corresponding to the SY (full
|
||||
/// system) scope of the ISB instruction.
|
||||
#[inline(always)]
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub unsafe fn __isb<A>(arg: A)
|
||||
where
|
||||
A: super::sealed::Isb,
|
||||
{
|
||||
arg.__isb()
|
||||
}
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.dmb"
|
||||
)]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")]
|
||||
fn dmb(_: i32);
|
||||
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.dsb"
|
||||
)]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")]
|
||||
fn dsb(_: i32);
|
||||
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.isb"
|
||||
)]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")]
|
||||
fn isb(_: i32);
|
||||
}
|
||||
|
||||
// we put these in a module to prevent weirdness with glob re-exports
|
||||
mod arg {
|
||||
// See Section 7.3 Memory barriers of ACLE
|
||||
pub const SY: i32 = 15;
|
||||
pub const ST: i32 = 14;
|
||||
pub const LD: i32 = 13;
|
||||
pub const ISH: i32 = 11;
|
||||
pub const ISHST: i32 = 10;
|
||||
pub const ISHLD: i32 = 9;
|
||||
pub const NSH: i32 = 7;
|
||||
pub const NSHST: i32 = 6;
|
||||
pub const NSHLD: i32 = 5;
|
||||
pub const OSH: i32 = 3;
|
||||
pub const OSHST: i32 = 2;
|
||||
pub const OSHLD: i32 = 1;
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
//! Access types available on v7 and v8 but not on v7(E)-M or v8-M
|
||||
|
||||
/// Full system is the required shareability domain, writes are the required
|
||||
/// access type
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct ST;
|
||||
|
||||
dmb_dsb!(ST);
|
||||
|
||||
/// Inner Shareable is the required shareability domain, reads and writes are
|
||||
/// the required access types
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct ISH;
|
||||
|
||||
dmb_dsb!(ISH);
|
||||
|
||||
/// Inner Shareable is the required shareability domain, writes are the required
|
||||
/// access type
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct ISHST;
|
||||
|
||||
dmb_dsb!(ISHST);
|
||||
|
||||
/// Non-shareable is the required shareability domain, reads and writes are the
|
||||
/// required access types
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct NSH;
|
||||
|
||||
dmb_dsb!(NSH);
|
||||
|
||||
/// Non-shareable is the required shareability domain, writes are the required
|
||||
/// access type
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct NSHST;
|
||||
|
||||
dmb_dsb!(NSHST);
|
||||
|
||||
/// Outer Shareable is the required shareability domain, reads and writes are
|
||||
/// the required access types
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct OSH;
|
||||
|
||||
dmb_dsb!(OSH);
|
||||
|
||||
/// Outer Shareable is the required shareability domain, writes are the required
|
||||
/// access type
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct OSHST;
|
||||
|
||||
dmb_dsb!(OSHST);
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
/// Full system is the required shareability domain, reads are the required
|
||||
/// access type
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct LD;
|
||||
|
||||
dmb_dsb!(LD);
|
||||
|
||||
/// Inner Shareable is the required shareability domain, reads are the required
|
||||
/// access type
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct ISHLD;
|
||||
|
||||
dmb_dsb!(ISHLD);
|
||||
|
||||
/// Non-shareable is the required shareability domain, reads are the required
|
||||
/// access type
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct NSHLD;
|
||||
|
||||
dmb_dsb!(NSHLD);
|
||||
|
||||
/// Outer Shareable is the required shareability domain, reads are the required
|
||||
/// access type
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub struct OSHLD;
|
||||
|
||||
dmb_dsb!(OSHLD);
|
||||
125
library/stdarch/crates/core_arch/src/arm_shared/hints.rs
Normal file
125
library/stdarch/crates/core_arch/src/arm_shared/hints.rs
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
// # References
|
||||
//
|
||||
// - Section 7.4 "Hints" of ACLE
|
||||
// - Section 7.7 "NOP" of ACLE
|
||||
|
||||
/// Generates a WFI (wait for interrupt) hint instruction, or nothing.
|
||||
///
|
||||
/// The WFI instruction allows (but does not require) the processor to enter a
|
||||
/// low-power state until one of a number of asynchronous events occurs.
|
||||
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
|
||||
// LLVM says "instruction requires: armv6k"
|
||||
#[cfg(any(
|
||||
target_feature = "v6",
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
doc
|
||||
))]
|
||||
#[inline(always)]
|
||||
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
|
||||
pub unsafe fn __wfi() {
|
||||
hint(HINT_WFI);
|
||||
}
|
||||
|
||||
/// Generates a WFE (wait for event) hint instruction, or nothing.
|
||||
///
|
||||
/// The WFE instruction allows (but does not require) the processor to enter a
|
||||
/// low-power state until some event occurs such as a SEV being issued by
|
||||
/// another processor.
|
||||
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
|
||||
// LLVM says "instruction requires: armv6k"
|
||||
#[cfg(any(
|
||||
target_feature = "v6",
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
doc
|
||||
))]
|
||||
#[inline(always)]
|
||||
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
|
||||
pub unsafe fn __wfe() {
|
||||
hint(HINT_WFE);
|
||||
}
|
||||
|
||||
/// Generates a SEV (send a global event) hint instruction.
|
||||
///
|
||||
/// This causes an event to be signaled to all processors in a multiprocessor
|
||||
/// system. It is a NOP on a uniprocessor system.
|
||||
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M
|
||||
// LLVM says "instruction requires: armv6k"
|
||||
#[cfg(any(
|
||||
target_feature = "v6",
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
doc
|
||||
))]
|
||||
#[inline(always)]
|
||||
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
|
||||
pub unsafe fn __sev() {
|
||||
hint(HINT_SEV);
|
||||
}
|
||||
|
||||
/// Generates a send a local event hint instruction.
|
||||
///
|
||||
/// This causes an event to be signaled to only the processor executing this
|
||||
/// instruction. In a multiprocessor system, it is not required to affect the
|
||||
/// other processors.
|
||||
// LLVM says "instruction requires: armv8"
|
||||
#[cfg(any(
|
||||
target_feature = "v8", // 32-bit ARMv8
|
||||
target_arch = "aarch64", // AArch64
|
||||
target_arch = "arm64ec", // Arm64EC
|
||||
doc,
|
||||
))]
|
||||
#[inline(always)]
|
||||
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
|
||||
pub unsafe fn __sevl() {
|
||||
hint(HINT_SEVL);
|
||||
}
|
||||
|
||||
/// Generates a YIELD hint instruction.
|
||||
///
|
||||
/// This enables multithreading software to indicate to the hardware that it is
|
||||
/// performing a task, for example a spin-lock, that could be swapped out to
|
||||
/// improve overall system performance.
|
||||
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
|
||||
// LLVM says "instruction requires: armv6k"
|
||||
#[cfg(any(
|
||||
target_feature = "v6",
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
doc
|
||||
))]
|
||||
#[inline(always)]
|
||||
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
|
||||
pub unsafe fn __yield() {
|
||||
hint(HINT_YIELD);
|
||||
}
|
||||
|
||||
/// Generates an unspecified no-op instruction.
|
||||
///
|
||||
/// Note that not all architectures provide a distinguished NOP instruction. On
|
||||
/// those that do, it is unspecified whether this intrinsic generates it or
|
||||
/// another instruction. It is not guaranteed that inserting this instruction
|
||||
/// will increase execution time.
|
||||
#[inline(always)]
|
||||
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
|
||||
pub unsafe fn __nop() {
|
||||
crate::arch::asm!("nop", options(nomem, nostack, preserves_flags));
|
||||
}
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.hint"
|
||||
)]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")]
|
||||
fn hint(_: i32);
|
||||
}
|
||||
|
||||
// from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td
|
||||
const HINT_NOP: i32 = 0;
|
||||
const HINT_YIELD: i32 = 1;
|
||||
const HINT_WFE: i32 = 2;
|
||||
const HINT_WFI: i32 = 3;
|
||||
const HINT_SEV: i32 = 4;
|
||||
const HINT_SEVL: i32 = 5;
|
||||
117
library/stdarch/crates/core_arch/src/arm_shared/mod.rs
Normal file
117
library/stdarch/crates/core_arch/src/arm_shared/mod.rs
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
//! ARM C Language Extensions (ACLE)
|
||||
//!
|
||||
//! # Developer notes
|
||||
//!
|
||||
//! Below is a list of built-in targets that are representative of the different ARM
|
||||
//! architectures; the list includes the `target_feature`s they possess.
|
||||
//!
|
||||
//! - `armv4t-unknown-linux-gnueabi` - **ARMv4** - `+v4t`
|
||||
//! - `armv5te-unknown-linux-gnueabi` - **ARMv5TE** - `+v4t +v5te`
|
||||
//! - `arm-unknown-linux-gnueabi` - **ARMv6** - `+v4t +v5te +v6`
|
||||
//! - `thumbv6m-none-eabi` - **ARMv6-M** - `+v4t +v5te +v6 +thumb-mode +mclass`
|
||||
//! - `armv7-unknown-linux-gnueabihf` - **ARMv7-A** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +aclass`
|
||||
//! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +rclass`
|
||||
//! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
|
||||
//! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass`
|
||||
//! - `thumbv8m.main-none-eabi` - **ARMv8-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
|
||||
//! - `armv8r-none-eabi` - **ARMv8-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +v8 +thumb2 +rclass`
|
||||
//! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon`
|
||||
//!
|
||||
//! Section 10.1 of ACLE says:
|
||||
//!
|
||||
//! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes
|
||||
//! its predecessor instruction set."
|
||||
//!
|
||||
//! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes
|
||||
//! its predecessor instruction set."
|
||||
//!
|
||||
//! From that info and from looking at how LLVM features work (using custom targets) we can identify
|
||||
//! features that are subsets of others:
|
||||
//!
|
||||
//! Legend: `a < b` reads as "`a` is a subset of `b`"; this means that if `b` is enabled then `a` is
|
||||
//! enabled as well.
|
||||
//!
|
||||
//! - `v4t < v5te < v6 < v6k < v6t2 < v7 < v8`
|
||||
//! - `v6 < v8m < v6t2`
|
||||
//! - `v7 < v8m.main`
|
||||
//!
|
||||
//! *NOTE*: Section 5.4.7 of ACLE says:
|
||||
//!
|
||||
//! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the
|
||||
//! intrinsics defined in Saturating intrinsics are available."
|
||||
//!
|
||||
//! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te
|
||||
//! targets so we have to work around this difference.
|
||||
//!
|
||||
//! # References
|
||||
//!
|
||||
//! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest)
|
||||
|
||||
#![cfg_attr(
|
||||
all(target_arch = "aarch64", target_abi = "softfloat"),
|
||||
// Just allow the warning: anyone soundly using the intrinsics has to enable
|
||||
// the target feature, and that will generate a warning for them.
|
||||
allow(aarch64_softfloat_neon)
|
||||
)]
|
||||
// Only for 'neon' submodule
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
// 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported
|
||||
// via CP15 instructions. See Section 10.1 of ACLE
|
||||
mod barrier;
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub use self::barrier::*;
|
||||
|
||||
mod hints;
|
||||
#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
|
||||
pub use self::hints::*;
|
||||
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
target_feature = "v7",
|
||||
doc
|
||||
))]
|
||||
pub(crate) mod neon;
|
||||
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
target_feature = "v7",
|
||||
doc
|
||||
))]
|
||||
#[cfg_attr(
|
||||
not(target_arch = "arm"),
|
||||
stable(feature = "neon_intrinsics", since = "1.59.0")
|
||||
)]
|
||||
#[cfg_attr(
|
||||
target_arch = "arm",
|
||||
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
|
||||
)]
|
||||
pub use self::neon::*;
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
target_feature = "v7",
|
||||
doc
|
||||
))]
|
||||
pub(crate) mod test_support;
|
||||
|
||||
mod sealed {
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub trait Dmb {
|
||||
unsafe fn __dmb(&self);
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub trait Dsb {
|
||||
unsafe fn __dsb(&self);
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
|
||||
pub trait Isb {
|
||||
unsafe fn __isb(&self);
|
||||
}
|
||||
}
|
||||
75440
library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
Normal file
75440
library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,206 @@
|
|||
//! Tests for ARM+v7+neon load (vld1) intrinsics.
|
||||
//!
|
||||
//! These are included in `{arm, aarch64}::neon`.
|
||||
|
||||
use super::*;
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
use crate::core_arch::arm::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
|
||||
use crate::core_arch::aarch64::*;
|
||||
|
||||
use crate::core_arch::simd::*;
|
||||
use std::mem;
|
||||
use stdarch_test::simd_test;
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s8() {
|
||||
let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s8() {
|
||||
let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s16() {
|
||||
let a: [i16; 5] = [0, 1, 2, 3, 4];
|
||||
let e = i16x4::new(1, 2, 3, 4);
|
||||
let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s16() {
|
||||
let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s32() {
|
||||
let a: [i32; 3] = [0, 1, 2];
|
||||
let e = i32x2::new(1, 2);
|
||||
let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s32() {
|
||||
let a: [i32; 5] = [0, 1, 2, 3, 4];
|
||||
let e = i32x4::new(1, 2, 3, 4);
|
||||
let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s64() {
|
||||
let a: [i64; 2] = [0, 1];
|
||||
let e = i64x1::new(1);
|
||||
let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s64() {
|
||||
let a: [i64; 3] = [0, 1, 2];
|
||||
let e = i64x2::new(1, 2);
|
||||
let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u8() {
|
||||
let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u8() {
|
||||
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u16() {
|
||||
let a: [u16; 5] = [0, 1, 2, 3, 4];
|
||||
let e = u16x4::new(1, 2, 3, 4);
|
||||
let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u16() {
|
||||
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u32() {
|
||||
let a: [u32; 3] = [0, 1, 2];
|
||||
let e = u32x2::new(1, 2);
|
||||
let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u32() {
|
||||
let a: [u32; 5] = [0, 1, 2, 3, 4];
|
||||
let e = u32x4::new(1, 2, 3, 4);
|
||||
let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u64() {
|
||||
let a: [u64; 2] = [0, 1];
|
||||
let e = u64x1::new(1);
|
||||
let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u64() {
|
||||
let a: [u64; 3] = [0, 1, 2];
|
||||
let e = u64x2::new(1, 2);
|
||||
let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_p8() {
|
||||
let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_p8() {
|
||||
let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_p16() {
|
||||
let a: [p16; 5] = [0, 1, 2, 3, 4];
|
||||
let e = u16x4::new(1, 2, 3, 4);
|
||||
let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_p16() {
|
||||
let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vld1_p64() {
|
||||
let a: [p64; 2] = [0, 1];
|
||||
let e = u64x1::new(1);
|
||||
let r: u64x1 = transmute(vld1_p64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vld1q_p64() {
|
||||
let a: [p64; 3] = [0, 1, 2];
|
||||
let e = u64x2::new(1, 2);
|
||||
let r: u64x2 = transmute(vld1q_p64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_f32() {
|
||||
let a: [f32; 3] = [0., 1., 2.];
|
||||
let e = f32x2::new(1., 2.);
|
||||
let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_f32() {
|
||||
let a: [f32; 5] = [0., 1., 2., 3., 4.];
|
||||
let e = f32x4::new(1., 2., 3., 4.);
|
||||
let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
5531
library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
Normal file
5531
library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,93 @@
|
|||
//! Tests for ARM+v7+neon shift and insert (vsli[q]_n, vsri[q]_n) intrinsics.
|
||||
//!
|
||||
//! These are included in `{arm, aarch64}::neon`.
|
||||
|
||||
use super::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
|
||||
use crate::core_arch::aarch64::*;
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
use crate::core_arch::arm::*;
|
||||
|
||||
use crate::core_arch::simd::*;
|
||||
use std::mem::transmute;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
macro_rules! test_vsli {
|
||||
($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => {
|
||||
#[simd_test(enable = "neon")]
|
||||
#[allow(unused_assignments)]
|
||||
unsafe fn $test_id() {
|
||||
let a = [$($a as $t),*];
|
||||
let b = [$($b as $t),*];
|
||||
let n_bit_mask: $t = (1 << $n) - 1;
|
||||
let e = [$(($a as $t & n_bit_mask) | (($b as $t) << $n)),*];
|
||||
let r = $fn_id::<$n>(transmute(a), transmute(b));
|
||||
let mut d = e;
|
||||
d = transmute(r);
|
||||
assert_eq!(d, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
test_vsli!(test_vsli_n_s8, i8 => vsli_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5));
|
||||
test_vsli!(test_vsliq_n_s8, i8 => vsliq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2));
|
||||
test_vsli!(test_vsli_n_s16, i16 => vsli_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7));
|
||||
test_vsli!(test_vsliq_n_s16, i16 => vsliq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14));
|
||||
test_vsli!(test_vsli_n_s32, i32 => vsli_n_s32([125683, -78901], [-128, -112944], 23));
|
||||
test_vsli!(test_vsliq_n_s32, i32 => vsliq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15));
|
||||
test_vsli!(test_vsli_n_s64, i64 => vsli_n_s64([-333333], [1028], 45));
|
||||
test_vsli!(test_vsliq_n_s64, i64 => vsliq_n_s64([-333333, -52023], [1028, -99814], 33));
|
||||
test_vsli!(test_vsli_n_u8, u8 => vsli_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
|
||||
test_vsli!(test_vsliq_n_u8, u8 => vsliq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
|
||||
test_vsli!(test_vsli_n_u16, u16 => vsli_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
|
||||
test_vsli!(test_vsliq_n_u16, u16 => vsliq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
|
||||
test_vsli!(test_vsli_n_u32, u32 => vsli_n_u32([125683, 78901], [128, 112944], 23));
|
||||
test_vsli!(test_vsliq_n_u32, u32 => vsliq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15));
|
||||
test_vsli!(test_vsli_n_u64, u64 => vsli_n_u64([333333], [1028], 45));
|
||||
test_vsli!(test_vsliq_n_u64, u64 => vsliq_n_u64([333333, 52023], [1028, 99814], 33));
|
||||
test_vsli!(test_vsli_n_p8, i8 => vsli_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
|
||||
test_vsli!(test_vsliq_n_p8, i8 => vsliq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
|
||||
test_vsli!(test_vsli_n_p16, i16 => vsli_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
|
||||
test_vsli!(test_vsliq_n_p16, i16 => vsliq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
|
||||
//test_vsli!(test_vsli_n_p64, i64 => vsli_n_p64([333333], [1028], 45));
|
||||
//test_vsli!(test_vsliq_n_p64, i64 => vsliq_n_p64([333333, 52023], [1028, 99814], 33));
|
||||
|
||||
macro_rules! test_vsri {
|
||||
($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => {
|
||||
#[simd_test(enable = "neon")]
|
||||
#[allow(unused_assignments)]
|
||||
unsafe fn $test_id() {
|
||||
let a = [$($a as $t),*];
|
||||
let b = [$($b as $t),*];
|
||||
let n_bit_mask = (((1 as $t) << $n) - 1).rotate_right($n);
|
||||
let e = [$(($a as $t & n_bit_mask) | (($b as $t >> $n) & !n_bit_mask)),*];
|
||||
let r = $fn_id::<$n>(transmute(a), transmute(b));
|
||||
let mut d = e;
|
||||
d = transmute(r);
|
||||
assert_eq!(d, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
test_vsri!(test_vsri_n_s8, i8 => vsri_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5));
|
||||
test_vsri!(test_vsriq_n_s8, i8 => vsriq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2));
|
||||
test_vsri!(test_vsri_n_s16, i16 => vsri_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7));
|
||||
test_vsri!(test_vsriq_n_s16, i16 => vsriq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14));
|
||||
test_vsri!(test_vsri_n_s32, i32 => vsri_n_s32([125683, -78901], [-128, -112944], 23));
|
||||
test_vsri!(test_vsriq_n_s32, i32 => vsriq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15));
|
||||
test_vsri!(test_vsri_n_s64, i64 => vsri_n_s64([-333333], [1028], 45));
|
||||
test_vsri!(test_vsriq_n_s64, i64 => vsriq_n_s64([-333333, -52023], [1028, -99814], 33));
|
||||
test_vsri!(test_vsri_n_u8, u8 => vsri_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
|
||||
test_vsri!(test_vsriq_n_u8, u8 => vsriq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
|
||||
test_vsri!(test_vsri_n_u16, u16 => vsri_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
|
||||
test_vsri!(test_vsriq_n_u16, u16 => vsriq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
|
||||
test_vsri!(test_vsri_n_u32, u32 => vsri_n_u32([125683, 78901], [128, 112944], 23));
|
||||
test_vsri!(test_vsriq_n_u32, u32 => vsriq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15));
|
||||
test_vsri!(test_vsri_n_u64, u64 => vsri_n_u64([333333], [1028], 45));
|
||||
test_vsri!(test_vsriq_n_u64, u64 => vsriq_n_u64([333333, 52023], [1028, 99814], 33));
|
||||
test_vsri!(test_vsri_n_p8, i8 => vsri_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
|
||||
test_vsri!(test_vsriq_n_p8, i8 => vsriq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
|
||||
test_vsri!(test_vsri_n_p16, i16 => vsri_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
|
||||
test_vsri!(test_vsriq_n_p16, i16 => vsriq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
|
||||
//test_vsri!(test_vsri_n_p64, i64 => vsri_n_p64([333333], [1028], 45));
|
||||
//test_vsri!(test_vsriq_n_p64, i64 => vsriq_n_p64([333333, 52023], [1028, 99814], 33));
|
||||
|
|
@ -0,0 +1,389 @@
|
|||
//! Tests for ARM+v7+neon store (vst1) intrinsics.
|
||||
//!
|
||||
//! These are included in `{arm, aarch64}::neon`.
|
||||
|
||||
use super::*;
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
use crate::core_arch::arm::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
|
||||
use crate::core_arch::aarch64::*;
|
||||
|
||||
use crate::core_arch::simd::*;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_s8() {
|
||||
let mut vals = [0_i8; 9];
|
||||
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1_s8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
assert_eq!(vals[5], 5);
|
||||
assert_eq!(vals[6], 6);
|
||||
assert_eq!(vals[7], 7);
|
||||
assert_eq!(vals[8], 8);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_s8() {
|
||||
let mut vals = [0_i8; 17];
|
||||
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
||||
vst1q_s8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
assert_eq!(vals[5], 5);
|
||||
assert_eq!(vals[6], 6);
|
||||
assert_eq!(vals[7], 7);
|
||||
assert_eq!(vals[8], 8);
|
||||
assert_eq!(vals[9], 9);
|
||||
assert_eq!(vals[10], 10);
|
||||
assert_eq!(vals[11], 11);
|
||||
assert_eq!(vals[12], 12);
|
||||
assert_eq!(vals[13], 13);
|
||||
assert_eq!(vals[14], 14);
|
||||
assert_eq!(vals[15], 15);
|
||||
assert_eq!(vals[16], 16);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_s16() {
|
||||
let mut vals = [0_i16; 5];
|
||||
let a = i16x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1_s16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_s16() {
|
||||
let mut vals = [0_i16; 9];
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1q_s16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
assert_eq!(vals[5], 5);
|
||||
assert_eq!(vals[6], 6);
|
||||
assert_eq!(vals[7], 7);
|
||||
assert_eq!(vals[8], 8);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_s32() {
|
||||
let mut vals = [0_i32; 3];
|
||||
let a = i32x2::new(1, 2);
|
||||
|
||||
vst1_s32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_s32() {
|
||||
let mut vals = [0_i32; 5];
|
||||
let a = i32x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1q_s32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_s64() {
|
||||
let mut vals = [0_i64; 2];
|
||||
let a = i64x1::new(1);
|
||||
|
||||
vst1_s64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_s64() {
|
||||
let mut vals = [0_i64; 3];
|
||||
let a = i64x2::new(1, 2);
|
||||
|
||||
vst1q_s64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_u8() {
|
||||
let mut vals = [0_u8; 9];
|
||||
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1_u8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
assert_eq!(vals[5], 5);
|
||||
assert_eq!(vals[6], 6);
|
||||
assert_eq!(vals[7], 7);
|
||||
assert_eq!(vals[8], 8);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_u8() {
|
||||
let mut vals = [0_u8; 17];
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
||||
vst1q_u8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
assert_eq!(vals[5], 5);
|
||||
assert_eq!(vals[6], 6);
|
||||
assert_eq!(vals[7], 7);
|
||||
assert_eq!(vals[8], 8);
|
||||
assert_eq!(vals[9], 9);
|
||||
assert_eq!(vals[10], 10);
|
||||
assert_eq!(vals[11], 11);
|
||||
assert_eq!(vals[12], 12);
|
||||
assert_eq!(vals[13], 13);
|
||||
assert_eq!(vals[14], 14);
|
||||
assert_eq!(vals[15], 15);
|
||||
assert_eq!(vals[16], 16);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_u16() {
|
||||
let mut vals = [0_u16; 5];
|
||||
let a = u16x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1_u16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_u16() {
|
||||
let mut vals = [0_u16; 9];
|
||||
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1q_u16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
assert_eq!(vals[5], 5);
|
||||
assert_eq!(vals[6], 6);
|
||||
assert_eq!(vals[7], 7);
|
||||
assert_eq!(vals[8], 8);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_u32() {
|
||||
let mut vals = [0_u32; 3];
|
||||
let a = u32x2::new(1, 2);
|
||||
|
||||
vst1_u32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_u32() {
|
||||
let mut vals = [0_u32; 5];
|
||||
let a = u32x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1q_u32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_u64() {
|
||||
let mut vals = [0_u64; 2];
|
||||
let a = u64x1::new(1);
|
||||
|
||||
vst1_u64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_u64() {
|
||||
let mut vals = [0_u64; 3];
|
||||
let a = u64x2::new(1, 2);
|
||||
|
||||
vst1q_u64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_p8() {
|
||||
let mut vals = [0_u8; 9];
|
||||
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1_p8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
assert_eq!(vals[5], 5);
|
||||
assert_eq!(vals[6], 6);
|
||||
assert_eq!(vals[7], 7);
|
||||
assert_eq!(vals[8], 8);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_p8() {
|
||||
let mut vals = [0_u8; 17];
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
||||
vst1q_p8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
assert_eq!(vals[5], 5);
|
||||
assert_eq!(vals[6], 6);
|
||||
assert_eq!(vals[7], 7);
|
||||
assert_eq!(vals[8], 8);
|
||||
assert_eq!(vals[9], 9);
|
||||
assert_eq!(vals[10], 10);
|
||||
assert_eq!(vals[11], 11);
|
||||
assert_eq!(vals[12], 12);
|
||||
assert_eq!(vals[13], 13);
|
||||
assert_eq!(vals[14], 14);
|
||||
assert_eq!(vals[15], 15);
|
||||
assert_eq!(vals[16], 16);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_p16() {
|
||||
let mut vals = [0_u16; 5];
|
||||
let a = u16x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1_p16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_p16() {
|
||||
let mut vals = [0_u16; 9];
|
||||
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1q_p16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
assert_eq!(vals[3], 3);
|
||||
assert_eq!(vals[4], 4);
|
||||
assert_eq!(vals[5], 5);
|
||||
assert_eq!(vals[6], 6);
|
||||
assert_eq!(vals[7], 7);
|
||||
assert_eq!(vals[8], 8);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vst1_p64() {
|
||||
let mut vals = [0_u64; 2];
|
||||
let a = u64x1::new(1);
|
||||
|
||||
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vst1q_p64() {
|
||||
let mut vals = [0_u64; 3];
|
||||
let a = u64x2::new(1, 2);
|
||||
|
||||
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_f32() {
|
||||
let mut vals = [0_f32; 3];
|
||||
let a = f32x2::new(1., 2.);
|
||||
|
||||
vst1_f32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0.);
|
||||
assert_eq!(vals[1], 1.);
|
||||
assert_eq!(vals[2], 2.);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_f32() {
|
||||
let mut vals = [0_f32; 5];
|
||||
let a = f32x4::new(1., 2., 3., 4.);
|
||||
|
||||
vst1q_f32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0.);
|
||||
assert_eq!(vals[1], 1.);
|
||||
assert_eq!(vals[2], 2.);
|
||||
assert_eq!(vals[3], 3.);
|
||||
assert_eq!(vals[4], 4.);
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
836
library/stdarch/crates/core_arch/src/arm_shared/test_support.rs
Normal file
836
library/stdarch/crates/core_arch/src/arm_shared/test_support.rs
Normal file
|
|
@ -0,0 +1,836 @@
|
|||
#[cfg(target_arch = "arm")]
|
||||
use crate::core_arch::arm::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
|
||||
use crate::core_arch::aarch64::*;
|
||||
|
||||
use crate::core_arch::simd::*;
|
||||
use std::{mem::transmute, vec::Vec};
|
||||
|
||||
macro_rules! V_u8 {
|
||||
() => {
|
||||
vec![0x00u8, 0x01u8, 0x02u8, 0x0Fu8, 0x80u8, 0xF0u8, 0xFFu8]
|
||||
};
|
||||
}
|
||||
macro_rules! V_u16 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000u16, 0x0101u16, 0x0202u16, 0x0F0Fu16, 0x8000u16, 0xF0F0u16, 0xFFFFu16,
|
||||
]
|
||||
};
|
||||
}
|
||||
macro_rules! V_u32 {
|
||||
() => {
|
||||
vec![
|
||||
0x00000000u32,
|
||||
0x01010101u32,
|
||||
0x02020202u32,
|
||||
0x0F0F0F0Fu32,
|
||||
0x80000000u32,
|
||||
0xF0F0F0F0u32,
|
||||
0xFFFFFFFFu32,
|
||||
]
|
||||
};
|
||||
}
|
||||
macro_rules! V_u64 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000000000000000u64,
|
||||
0x0101010101010101u64,
|
||||
0x0202020202020202u64,
|
||||
0x0F0F0F0F0F0F0F0Fu64,
|
||||
0x8080808080808080u64,
|
||||
0xF0F0F0F0F0F0F0F0u64,
|
||||
0xFFFFFFFFFFFFFFFFu64,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! V_i8 {
|
||||
() => {
|
||||
vec![
|
||||
0x00i8, 0x01i8, 0x02i8, 0x0Fi8, -128i8, /* 0x80 */
|
||||
-16i8, /* 0xF0 */
|
||||
-1i8, /* 0xFF */
|
||||
]
|
||||
};
|
||||
}
|
||||
macro_rules! V_i16 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000i16, 0x0101i16, 0x0202i16, 0x0F0Fi16, -32768i16, /* 0x8000 */
|
||||
-3856i16, /* 0xF0F0 */
|
||||
-1i16, /* 0xFFF */
|
||||
]
|
||||
};
|
||||
}
|
||||
macro_rules! V_i32 {
|
||||
() => {
|
||||
vec![
|
||||
0x00000000i32,
|
||||
0x01010101i32,
|
||||
0x02020202i32,
|
||||
0x0F0F0F0Fi32,
|
||||
-2139062144i32, /* 0x80000000 */
|
||||
-252645136i32, /* 0xF0F0F0F0 */
|
||||
-1i32, /* 0xFFFFFFFF */
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! V_i64 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000000000000000i64,
|
||||
0x0101010101010101i64,
|
||||
0x0202020202020202i64,
|
||||
0x0F0F0F0F0F0F0F0Fi64,
|
||||
-9223372036854775808i64, /* 0x8000000000000000 */
|
||||
-1152921504606846976i64, /* 0xF000000000000000 */
|
||||
-1i64, /* 0xFFFFFFFFFFFFFFFF */
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! V_f32 {
|
||||
() => {
|
||||
vec![
|
||||
0.0f32,
|
||||
1.0f32,
|
||||
-1.0f32,
|
||||
1.2f32,
|
||||
2.4f32,
|
||||
f32::MAX,
|
||||
f32::MIN,
|
||||
f32::INFINITY,
|
||||
f32::NEG_INFINITY,
|
||||
f32::NAN,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! to64 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u64 { transmute(v) }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! to128 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u128 { transmute(v) }
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) fn test<T, U, V, W, X>(
|
||||
vals: Vec<T>,
|
||||
fill1: fn(T) -> V,
|
||||
fill2: fn(U) -> W,
|
||||
cast: fn(W) -> X,
|
||||
test_fun: fn(V, V) -> W,
|
||||
verify_fun: fn(T, T) -> U,
|
||||
) where
|
||||
T: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
U: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
V: Copy + core::fmt::Debug,
|
||||
W: Copy + core::fmt::Debug,
|
||||
X: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
{
|
||||
let pairs = vals.iter().zip(vals.iter());
|
||||
|
||||
for (i, j) in pairs {
|
||||
let a: V = fill1(*i);
|
||||
let b: V = fill1(*j);
|
||||
|
||||
let actual_pre: W = test_fun(a, b);
|
||||
let expected_pre: W = fill2(verify_fun(*i, *j));
|
||||
|
||||
let actual: X = cast(actual_pre);
|
||||
let expected: X = cast(expected_pre);
|
||||
|
||||
assert_eq!(
|
||||
actual, expected,
|
||||
"[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
|
||||
*i, *j, &a, &b, actual_pre, &a, &b, expected_pre
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! gen_test_fn {
|
||||
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
|
||||
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
|
||||
unsafe {
|
||||
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! gen_fill_fn {
|
||||
($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
|
||||
pub(crate) fn $id(val: $in_t) -> $out_t {
|
||||
let initial: [$in_t; $num_els] = [val; $num_els];
|
||||
let result: $cmp_t = unsafe { transmute(initial) };
|
||||
let result_out: $out_t = unsafe { transmute(result) };
|
||||
|
||||
// println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
|
||||
|
||||
result_out
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
gen_fill_fn!(fill_u8, 8, 8, u8, uint8x8_t, u64);
|
||||
gen_fill_fn!(fill_s8, 8, 8, i8, int8x8_t, u64);
|
||||
gen_fill_fn!(fillq_u8, 8, 16, u8, uint8x16_t, u128);
|
||||
gen_fill_fn!(fillq_s8, 8, 16, i8, int8x16_t, u128);
|
||||
|
||||
gen_fill_fn!(fill_u16, 16, 4, u16, uint16x4_t, u64);
|
||||
gen_fill_fn!(fill_s16, 16, 4, i16, int16x4_t, u64);
|
||||
gen_fill_fn!(fillq_u16, 16, 8, u16, uint16x8_t, u128);
|
||||
gen_fill_fn!(fillq_s16, 16, 8, i16, int16x8_t, u128);
|
||||
|
||||
gen_fill_fn!(fill_u32, 32, 2, u32, uint32x2_t, u64);
|
||||
gen_fill_fn!(fill_s32, 32, 2, i32, int32x2_t, u64);
|
||||
gen_fill_fn!(fillq_u32, 32, 4, u32, uint32x4_t, u128);
|
||||
gen_fill_fn!(fillq_s32, 32, 4, i32, int32x4_t, u128);
|
||||
|
||||
gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
|
||||
gen_fill_fn!(fill_s64, 64, 1, i64, int64x1_t, u64);
|
||||
gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
|
||||
gen_fill_fn!(fillq_s64, 64, 2, i64, int64x2_t, u128);
|
||||
|
||||
gen_fill_fn!(fill_f32, 32, 2, f32, float32x2_t, u64);
|
||||
gen_fill_fn!(fillq_f32, 32, 4, f32, float32x4_t, u128);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x8_t,
|
||||
uint8x8_t,
|
||||
u64,
|
||||
V_u8!(),
|
||||
fill_u8,
|
||||
fill_u8,
|
||||
to64!(uint8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x8_t,
|
||||
uint8x8_t,
|
||||
u64,
|
||||
V_u8!(),
|
||||
fill_u8,
|
||||
fill_u8,
|
||||
to64!(uint8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x8_t,
|
||||
uint8x8_t,
|
||||
u64,
|
||||
V_u8!(),
|
||||
fill_u8,
|
||||
fill_u8,
|
||||
to64!(uint8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x16_t,
|
||||
uint8x16_t,
|
||||
u128,
|
||||
V_u8!(),
|
||||
fillq_u8,
|
||||
fillq_u8,
|
||||
to128!(uint8x16_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x16_t,
|
||||
uint8x16_t,
|
||||
u128,
|
||||
V_u8!(),
|
||||
fillq_u8,
|
||||
fillq_u8,
|
||||
to128!(uint8x16_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x16_t,
|
||||
uint8x16_t,
|
||||
u128,
|
||||
V_u8!(),
|
||||
fillq_u8,
|
||||
fillq_u8,
|
||||
to128!(uint8x16_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_s8,
|
||||
i8,
|
||||
i8,
|
||||
int8x8_t,
|
||||
int8x8_t,
|
||||
u64,
|
||||
V_i8!(),
|
||||
fill_s8,
|
||||
fill_s8,
|
||||
to64!(int8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_s8,
|
||||
i8,
|
||||
i8,
|
||||
int8x8_t,
|
||||
int8x8_t,
|
||||
u64,
|
||||
V_i8!(),
|
||||
fill_s8,
|
||||
fill_s8,
|
||||
to64!(int8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_s8,
|
||||
i8,
|
||||
u8,
|
||||
int8x8_t,
|
||||
uint8x8_t,
|
||||
u64,
|
||||
V_i8!(),
|
||||
fill_s8,
|
||||
fill_u8,
|
||||
to64!(uint8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_s8,
|
||||
i8,
|
||||
i8,
|
||||
int8x16_t,
|
||||
int8x16_t,
|
||||
u128,
|
||||
V_i8!(),
|
||||
fillq_s8,
|
||||
fillq_s8,
|
||||
to128!(int8x16_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_s8,
|
||||
i8,
|
||||
i8,
|
||||
int8x16_t,
|
||||
int8x16_t,
|
||||
u128,
|
||||
V_i8!(),
|
||||
fillq_s8,
|
||||
fillq_s8,
|
||||
to128!(int8x16_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_s8,
|
||||
i8,
|
||||
u8,
|
||||
int8x16_t,
|
||||
uint8x16_t,
|
||||
u128,
|
||||
V_i8!(),
|
||||
fillq_s8,
|
||||
fillq_u8,
|
||||
to128!(uint8x16_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x4_t,
|
||||
uint16x4_t,
|
||||
u64,
|
||||
V_u16!(),
|
||||
fill_u16,
|
||||
fill_u16,
|
||||
to64!(uint16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x4_t,
|
||||
uint16x4_t,
|
||||
u64,
|
||||
V_u16!(),
|
||||
fill_u16,
|
||||
fill_u16,
|
||||
to64!(uint16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x4_t,
|
||||
uint16x4_t,
|
||||
u64,
|
||||
V_u16!(),
|
||||
fill_u16,
|
||||
fill_u16,
|
||||
to64!(uint16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x8_t,
|
||||
uint16x8_t,
|
||||
u128,
|
||||
V_u16!(),
|
||||
fillq_u16,
|
||||
fillq_u16,
|
||||
to128!(uint16x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x8_t,
|
||||
uint16x8_t,
|
||||
u128,
|
||||
V_u16!(),
|
||||
fillq_u16,
|
||||
fillq_u16,
|
||||
to128!(uint16x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x8_t,
|
||||
uint16x8_t,
|
||||
u128,
|
||||
V_u16!(),
|
||||
fillq_u16,
|
||||
fillq_u16,
|
||||
to128!(uint16x8_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_s16,
|
||||
i16,
|
||||
i16,
|
||||
int16x4_t,
|
||||
int16x4_t,
|
||||
u64,
|
||||
V_i16!(),
|
||||
fill_s16,
|
||||
fill_s16,
|
||||
to64!(int16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_s16,
|
||||
i16,
|
||||
i16,
|
||||
int16x4_t,
|
||||
int16x4_t,
|
||||
u64,
|
||||
V_i16!(),
|
||||
fill_s16,
|
||||
fill_s16,
|
||||
to64!(int16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_s16,
|
||||
i16,
|
||||
u16,
|
||||
int16x4_t,
|
||||
uint16x4_t,
|
||||
u64,
|
||||
V_i16!(),
|
||||
fill_s16,
|
||||
fill_u16,
|
||||
to64!(uint16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_s16,
|
||||
i16,
|
||||
i16,
|
||||
int16x8_t,
|
||||
int16x8_t,
|
||||
u128,
|
||||
V_i16!(),
|
||||
fillq_s16,
|
||||
fillq_s16,
|
||||
to128!(int16x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_s16,
|
||||
i16,
|
||||
i16,
|
||||
int16x8_t,
|
||||
int16x8_t,
|
||||
u128,
|
||||
V_i16!(),
|
||||
fillq_s16,
|
||||
fillq_s16,
|
||||
to128!(int16x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_s16,
|
||||
i16,
|
||||
u16,
|
||||
int16x8_t,
|
||||
uint16x8_t,
|
||||
u128,
|
||||
V_i16!(),
|
||||
fillq_s16,
|
||||
fillq_u16,
|
||||
to128!(uint16x8_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_u32!(),
|
||||
fill_u32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_u32!(),
|
||||
fill_u32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_u32!(),
|
||||
fill_u32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_u32!(),
|
||||
fillq_u32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_u32!(),
|
||||
fillq_u32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_u32!(),
|
||||
fillq_u32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_s32,
|
||||
i32,
|
||||
i32,
|
||||
int32x2_t,
|
||||
int32x2_t,
|
||||
u64,
|
||||
V_i32!(),
|
||||
fill_s32,
|
||||
fill_s32,
|
||||
to64!(int32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_s32,
|
||||
i32,
|
||||
i32,
|
||||
int32x2_t,
|
||||
int32x2_t,
|
||||
u64,
|
||||
V_i32!(),
|
||||
fill_s32,
|
||||
fill_s32,
|
||||
to64!(int32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_s32,
|
||||
i32,
|
||||
u32,
|
||||
int32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_i32!(),
|
||||
fill_s32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_s32,
|
||||
i32,
|
||||
i32,
|
||||
int32x4_t,
|
||||
int32x4_t,
|
||||
u128,
|
||||
V_i32!(),
|
||||
fillq_s32,
|
||||
fillq_s32,
|
||||
to128!(int32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_s32,
|
||||
i32,
|
||||
i32,
|
||||
int32x4_t,
|
||||
int32x4_t,
|
||||
u128,
|
||||
V_i32!(),
|
||||
fillq_s32,
|
||||
fillq_s32,
|
||||
to128!(int32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_s32,
|
||||
i32,
|
||||
u32,
|
||||
int32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_i32!(),
|
||||
fillq_s32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_u64!(),
|
||||
fill_u64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_u64!(),
|
||||
fill_u64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_u64!(),
|
||||
fill_u64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_u64!(),
|
||||
fillq_u64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_u64!(),
|
||||
fillq_u64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_u64!(),
|
||||
fillq_u64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_s64,
|
||||
i64,
|
||||
i64,
|
||||
int64x1_t,
|
||||
int64x1_t,
|
||||
u64,
|
||||
V_i64!(),
|
||||
fill_s64,
|
||||
fill_s64,
|
||||
to64!(int64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_s64,
|
||||
i64,
|
||||
i64,
|
||||
int64x1_t,
|
||||
int64x1_t,
|
||||
u64,
|
||||
V_i64!(),
|
||||
fill_s64,
|
||||
fill_s64,
|
||||
to64!(int64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_s64,
|
||||
i64,
|
||||
u64,
|
||||
int64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_i64!(),
|
||||
fill_s64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_s64,
|
||||
i64,
|
||||
i64,
|
||||
int64x2_t,
|
||||
int64x2_t,
|
||||
u128,
|
||||
V_i64!(),
|
||||
fillq_s64,
|
||||
fillq_s64,
|
||||
to128!(int64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_s64,
|
||||
i64,
|
||||
i64,
|
||||
int64x2_t,
|
||||
int64x2_t,
|
||||
u128,
|
||||
V_i64!(),
|
||||
fillq_s64,
|
||||
fillq_s64,
|
||||
to128!(int64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_s64,
|
||||
i64,
|
||||
u64,
|
||||
int64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_i64!(),
|
||||
fillq_s64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_f32,
|
||||
f32,
|
||||
f32,
|
||||
float32x2_t,
|
||||
float32x2_t,
|
||||
u64,
|
||||
V_f32!(),
|
||||
fill_f32,
|
||||
fill_f32,
|
||||
to64!(float32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_f32,
|
||||
f32,
|
||||
u32,
|
||||
float32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_f32!(),
|
||||
fill_f32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_f32,
|
||||
f32,
|
||||
f32,
|
||||
float32x4_t,
|
||||
float32x4_t,
|
||||
u128,
|
||||
V_f32!(),
|
||||
fillq_f32,
|
||||
fillq_f32,
|
||||
to128!(float32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_f32,
|
||||
f32,
|
||||
u32,
|
||||
float32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_f32!(),
|
||||
fillq_f32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
350
library/stdarch/crates/core_arch/src/core_arch_docs.md
Normal file
350
library/stdarch/crates/core_arch/src/core_arch_docs.md
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
SIMD and vendor intrinsics module.
|
||||
|
||||
This module is intended to be the gateway to architecture-specific
|
||||
intrinsic functions, typically related to SIMD (but not always!). Each
|
||||
architecture that Rust compiles to may contain a submodule here, which
|
||||
means that this is not a portable module! If you're writing a portable
|
||||
library take care when using these APIs!
|
||||
|
||||
Under this module you'll find an architecture-named module, such as
|
||||
`x86_64`. Each `#[cfg(target_arch)]` that Rust can compile to may have a
|
||||
module entry here, only present on that particular target. For example the
|
||||
`i686-pc-windows-msvc` target will have an `x86` module here, whereas
|
||||
`x86_64-pc-windows-msvc` has `x86_64`.
|
||||
|
||||
[rfc]: https://github.com/rust-lang/rfcs/pull/2325
|
||||
[tracked]: https://github.com/rust-lang/rust/issues/48556
|
||||
|
||||
# Overview
|
||||
|
||||
This module exposes vendor-specific intrinsics that typically correspond to
|
||||
a single machine instruction. These intrinsics are not portable: their
|
||||
availability is architecture-dependent, and not all machines of that
|
||||
architecture might provide the intrinsic.
|
||||
|
||||
The `arch` module is intended to be a low-level implementation detail for
|
||||
higher-level APIs. Using it correctly can be quite tricky as you need to
|
||||
ensure at least a few guarantees are upheld:
|
||||
|
||||
* The correct architecture's module is used. For example the `arm` module
|
||||
isn't available on the `x86_64-unknown-linux-gnu` target. This is
|
||||
typically done by ensuring that `#[cfg]` is used appropriately when using
|
||||
this module.
|
||||
* The CPU the program is currently running on supports the function being
|
||||
called. For example it is unsafe to call an AVX2 function on a CPU that
|
||||
doesn't actually support AVX2.
|
||||
|
||||
As a result of the latter of these guarantees all intrinsics in this module
|
||||
are `unsafe` and extra care needs to be taken when calling them!
|
||||
|
||||
# CPU Feature Detection
|
||||
|
||||
In order to call these APIs in a safe fashion there's a number of
|
||||
mechanisms available to ensure that the correct CPU feature is available
|
||||
to call an intrinsic. Let's consider, for example, the `_mm256_add_epi64`
|
||||
intrinsics on the `x86` and `x86_64` architectures. This function requires
|
||||
the AVX2 feature as [documented by Intel][intel-dox] so to correctly call
|
||||
this function we need to (a) guarantee we only call it on `x86`/`x86_64`
|
||||
and (b) ensure that the CPU feature is available
|
||||
|
||||
[intel-dox]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64&expand=100
|
||||
|
||||
## Static CPU Feature Detection
|
||||
|
||||
The first option available to us is to conditionally compile code via the
|
||||
`#[cfg]` attribute. CPU features correspond to the `target_feature` cfg
|
||||
available, and can be used like so:
|
||||
|
||||
```ignore
|
||||
#[cfg(
|
||||
all(
|
||||
any(target_arch = "x86", target_arch = "x86_64"),
|
||||
target_feature = "avx2"
|
||||
)
|
||||
)]
|
||||
fn foo() {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::_mm256_add_epi64;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::_mm256_add_epi64;
|
||||
|
||||
unsafe {
|
||||
_mm256_add_epi64(...);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Here we're using `#[cfg(target_feature = "avx2")]` to conditionally compile
|
||||
this function into our module. This means that if the `avx2` feature is
|
||||
*enabled statically* then we'll use the `_mm256_add_epi64` function at
|
||||
runtime. The `unsafe` block here can be justified through the usage of
|
||||
`#[cfg]` to only compile the code in situations where the safety guarantees
|
||||
are upheld.
|
||||
|
||||
Statically enabling a feature is typically done with the `-C
|
||||
target-feature` or `-C target-cpu` flags to the compiler. For example if
|
||||
your local CPU supports AVX2 then you can compile the above function with:
|
||||
|
||||
```sh
|
||||
$ RUSTFLAGS='-C target-cpu=native' cargo build
|
||||
```
|
||||
|
||||
Or otherwise you can specifically enable just the AVX2 feature:
|
||||
|
||||
```sh
|
||||
$ RUSTFLAGS='-C target-feature=+avx2' cargo build
|
||||
```
|
||||
|
||||
Note that when you compile a binary with a particular feature enabled it's
|
||||
important to ensure that you only run the binary on systems which satisfy
|
||||
the required feature set.
|
||||
|
||||
## Dynamic CPU Feature Detection
|
||||
|
||||
Sometimes statically dispatching isn't quite what you want. Instead you
|
||||
might want to build a portable binary that runs across a variety of CPUs,
|
||||
but at runtime it selects the most optimized implementation available. This
|
||||
allows you to build a "least common denominator" binary which has certain
|
||||
sections more optimized for different CPUs.
|
||||
|
||||
Taking our previous example from before, we're going to compile our binary
|
||||
*without* AVX2 support, but we'd like to enable it for just one function.
|
||||
We can do that in a manner like:
|
||||
|
||||
```ignore
|
||||
fn foo() {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
return unsafe { foo_avx2() };
|
||||
}
|
||||
}
|
||||
|
||||
// fallback implementation without using AVX2
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn foo_avx2() {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::_mm256_add_epi64;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::_mm256_add_epi64;
|
||||
|
||||
unsafe { _mm256_add_epi64(...); }
|
||||
}
|
||||
```
|
||||
|
||||
There's a couple of components in play here, so let's go through them in
|
||||
detail!
|
||||
|
||||
* First up we notice the `is_x86_feature_detected!` macro. Provided by
|
||||
the standard library, this macro will perform necessary runtime detection
|
||||
to determine whether the CPU the program is running on supports the
|
||||
specified feature. In this case the macro will expand to a boolean
|
||||
expression evaluating to whether the local CPU has the AVX2 feature or
|
||||
not.
|
||||
|
||||
Note that this macro, like the `arch` module, is platform-specific. For
|
||||
example calling `is_x86_feature_detected!("avx2")` on ARM will be a
|
||||
compile time error. To ensure we don't hit this error a statement level
|
||||
`#[cfg]` is used to only compile usage of the macro on `x86`/`x86_64`.
|
||||
|
||||
* Next up we see our AVX2-enabled function, `foo_avx2`. This function is
|
||||
decorated with the `#[target_feature]` attribute which enables a CPU
|
||||
feature for just this one function. Using a compiler flag like `-C
|
||||
target-feature=+avx2` will enable AVX2 for the entire program, but using
|
||||
an attribute will only enable it for the one function. Usage of the
|
||||
`#[target_feature]` attribute currently requires the function to also be
|
||||
`unsafe`, as we see here. This is because the function can only be
|
||||
correctly called on systems which have the AVX2 (like the intrinsics
|
||||
themselves).
|
||||
|
||||
And with all that we should have a working program! This program will run
|
||||
across all machines and it'll use the optimized AVX2 implementation on
|
||||
machines where support is detected.
|
||||
|
||||
# Ergonomics
|
||||
|
||||
It's important to note that using the `arch` module is not the easiest
|
||||
thing in the world, so if you're curious to try it out you may want to
|
||||
brace yourself for some wordiness!
|
||||
|
||||
The primary purpose of this module is to enable stable crates on crates.io
|
||||
to build up much more ergonomic abstractions which end up using SIMD under
|
||||
the hood. Over time these abstractions may also move into the standard
|
||||
library itself, but for now this module is tasked with providing the bare
|
||||
minimum necessary to use vendor intrinsics on stable Rust.
|
||||
|
||||
# Other architectures
|
||||
|
||||
This documentation is only for one particular architecture, you can find
|
||||
others at:
|
||||
|
||||
* [`x86`]
|
||||
* [`x86_64`]
|
||||
* [`arm`]
|
||||
* [`aarch64`]
|
||||
* [`riscv32`]
|
||||
* [`riscv64`]
|
||||
* [`mips`]
|
||||
* [`mips64`]
|
||||
* [`powerpc`]
|
||||
* [`powerpc64`]
|
||||
* [`nvptx`]
|
||||
* [`wasm32`]
|
||||
* [`loongarch64`]
|
||||
* [`s390x`]
|
||||
|
||||
[`x86`]: ../../core/arch/x86/index.html
|
||||
[`x86_64`]: ../../core/arch/x86_64/index.html
|
||||
[`arm`]: ../../core/arch/arm/index.html
|
||||
[`aarch64`]: ../../core/arch/aarch64/index.html
|
||||
[`riscv32`]: ../../core/arch/riscv32/index.html
|
||||
[`riscv64`]: ../../core/arch/riscv64/index.html
|
||||
[`mips`]: ../../core/arch/mips/index.html
|
||||
[`mips64`]: ../../core/arch/mips64/index.html
|
||||
[`powerpc`]: ../../core/arch/powerpc/index.html
|
||||
[`powerpc64`]: ../../core/arch/powerpc64/index.html
|
||||
[`nvptx`]: ../../core/arch/nvptx/index.html
|
||||
[`wasm32`]: ../../core/arch/wasm32/index.html
|
||||
[`loongarch64`]: ../../core/arch/loongarch64/index.html
|
||||
[`s390x`]: ../../core/arch/s390x/index.html
|
||||
|
||||
# Examples
|
||||
|
||||
First let's take a look at not actually using any intrinsics but instead
|
||||
using LLVM's auto-vectorization to produce optimized vectorized code for
|
||||
AVX2 and also for the default platform.
|
||||
|
||||
```rust
|
||||
fn main() {
|
||||
let mut dst = [0];
|
||||
add_quickly(&[1], &[2], &mut dst);
|
||||
assert_eq!(dst[0], 3);
|
||||
}
|
||||
|
||||
fn add_quickly(a: &[u8], b: &[u8], c: &mut [u8]) {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
// Note that this `unsafe` block is safe because we're testing
|
||||
// that the `avx2` feature is indeed available on our CPU.
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
return unsafe { add_quickly_avx2(a, b, c) };
|
||||
}
|
||||
}
|
||||
|
||||
add_quickly_fallback(a, b, c)
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn add_quickly_avx2(a: &[u8], b: &[u8], c: &mut [u8]) {
|
||||
add_quickly_fallback(a, b, c) // the function below is inlined here
|
||||
}
|
||||
|
||||
fn add_quickly_fallback(a: &[u8], b: &[u8], c: &mut [u8]) {
|
||||
for ((a, b), c) in a.iter().zip(b).zip(c) {
|
||||
*c = *a + *b;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Next up let's take a look at an example of manually using intrinsics. Here
|
||||
we'll be using SSE4.1 features to implement hex encoding.
|
||||
|
||||
```
|
||||
fn main() {
|
||||
let mut dst = [0; 32];
|
||||
hex_encode(b"\x01\x02\x03", &mut dst);
|
||||
assert_eq!(&dst[..6], b"010203");
|
||||
|
||||
let mut src = [0; 16];
|
||||
for i in 0..16 {
|
||||
src[i] = (i + 1) as u8;
|
||||
}
|
||||
hex_encode(&src, &mut dst);
|
||||
assert_eq!(&dst, b"0102030405060708090a0b0c0d0e0f10");
|
||||
}
|
||||
|
||||
pub fn hex_encode(src: &[u8], dst: &mut [u8]) {
|
||||
let len = src.len().checked_mul(2).unwrap();
|
||||
assert!(dst.len() >= len);
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
if is_x86_feature_detected!("sse4.1") {
|
||||
return unsafe { hex_encode_sse41(src, dst) };
|
||||
}
|
||||
}
|
||||
|
||||
hex_encode_fallback(src, dst)
|
||||
}
|
||||
|
||||
// translated from
|
||||
// <https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp>
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
unsafe fn hex_encode_sse41(mut src: &[u8], dst: &mut [u8]) {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
unsafe {
|
||||
let ascii_zero = _mm_set1_epi8(b'0' as i8);
|
||||
let nines = _mm_set1_epi8(9);
|
||||
let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
|
||||
let and4bits = _mm_set1_epi8(0xf);
|
||||
|
||||
let mut i = 0_isize;
|
||||
while src.len() >= 16 {
|
||||
let invec = _mm_loadu_si128(src.as_ptr() as *const _);
|
||||
|
||||
let masked1 = _mm_and_si128(invec, and4bits);
|
||||
let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
|
||||
|
||||
// return 0xff corresponding to the elements > 9, or 0x00 otherwise
|
||||
let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
|
||||
let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
|
||||
|
||||
// add '0' or the offset depending on the masks
|
||||
let masked1 = _mm_add_epi8(
|
||||
masked1,
|
||||
_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
|
||||
);
|
||||
let masked2 = _mm_add_epi8(
|
||||
masked2,
|
||||
_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
|
||||
);
|
||||
|
||||
// interleave masked1 and masked2 bytes
|
||||
let res1 = _mm_unpacklo_epi8(masked2, masked1);
|
||||
let res2 = _mm_unpackhi_epi8(masked2, masked1);
|
||||
|
||||
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
|
||||
_mm_storeu_si128(
|
||||
dst.as_mut_ptr().offset(i * 2 + 16) as *mut _,
|
||||
res2,
|
||||
);
|
||||
src = &src[16..];
|
||||
i += 16;
|
||||
}
|
||||
|
||||
let i = i as usize;
|
||||
hex_encode_fallback(src, &mut dst[i * 2..]);
|
||||
}
|
||||
}
|
||||
|
||||
fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) {
|
||||
fn hex(byte: u8) -> u8 {
|
||||
static TABLE: &[u8] = b"0123456789abcdef";
|
||||
TABLE[byte as usize]
|
||||
}
|
||||
|
||||
for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
|
||||
slots[0] = hex((*byte >> 4) & 0xf);
|
||||
slots[1] = hex(*byte & 0xf);
|
||||
}
|
||||
}
|
||||
```
|
||||
94
library/stdarch/crates/core_arch/src/lib.rs
Normal file
94
library/stdarch/crates/core_arch/src/lib.rs
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
#![doc = include_str!("core_arch_docs.md")]
|
||||
#![allow(improper_ctypes_definitions)]
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_features)]
|
||||
#![allow(internal_features)]
|
||||
#![allow(unsafe_op_in_unsafe_fn)]
|
||||
#![deny(rust_2018_idioms)]
|
||||
#![feature(
|
||||
custom_inner_attributes,
|
||||
link_llvm_intrinsics,
|
||||
repr_simd,
|
||||
simd_ffi,
|
||||
proc_macro_hygiene,
|
||||
stmt_expr_attributes,
|
||||
core_intrinsics,
|
||||
no_core,
|
||||
fmt_helpers_for_derive,
|
||||
rustc_attrs,
|
||||
staged_api,
|
||||
doc_cfg,
|
||||
tbm_target_feature,
|
||||
sse4a_target_feature,
|
||||
riscv_target_feature,
|
||||
arm_target_feature,
|
||||
mips_target_feature,
|
||||
powerpc_target_feature,
|
||||
s390x_target_feature,
|
||||
loongarch_target_feature,
|
||||
wasm_target_feature,
|
||||
abi_unadjusted,
|
||||
rtm_target_feature,
|
||||
allow_internal_unstable,
|
||||
decl_macro,
|
||||
asm_experimental_arch,
|
||||
x86_amx_intrinsics,
|
||||
f16,
|
||||
aarch64_unstable_target_feature,
|
||||
bigint_helper_methods
|
||||
)]
|
||||
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
|
||||
#![deny(clippy::missing_inline_in_public_items)]
|
||||
#![allow(
|
||||
clippy::identity_op,
|
||||
clippy::inline_always,
|
||||
clippy::too_many_arguments,
|
||||
clippy::cast_sign_loss,
|
||||
clippy::cast_lossless,
|
||||
clippy::cast_possible_wrap,
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_precision_loss,
|
||||
clippy::cognitive_complexity,
|
||||
clippy::many_single_char_names,
|
||||
clippy::missing_safety_doc,
|
||||
clippy::shadow_reuse,
|
||||
clippy::similar_names,
|
||||
clippy::unusual_byte_groupings,
|
||||
clippy::wrong_self_convention
|
||||
)]
|
||||
#![cfg_attr(test, allow(unused_imports))]
|
||||
#![no_std]
|
||||
#![stable(feature = "stdsimd", since = "1.27.0")]
|
||||
#![doc(
|
||||
test(attr(deny(warnings))),
|
||||
test(attr(allow(dead_code, deprecated, unused_variables, unused_mut)))
|
||||
)]
|
||||
#![cfg_attr(
|
||||
test,
|
||||
feature(
|
||||
stdarch_arm_feature_detection,
|
||||
stdarch_powerpc_feature_detection,
|
||||
stdarch_s390x_feature_detection
|
||||
)
|
||||
)]
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate std;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate std_detect;
|
||||
#[path = "mod.rs"]
|
||||
mod core_arch;
|
||||
|
||||
#[stable(feature = "stdsimd", since = "1.27.0")]
|
||||
pub mod arch {
|
||||
#[stable(feature = "stdsimd", since = "1.27.0")]
|
||||
#[allow(unused_imports)]
|
||||
pub use crate::core_arch::arch::*;
|
||||
#[stable(feature = "stdsimd", since = "1.27.0")]
|
||||
pub use core::arch::asm;
|
||||
}
|
||||
|
||||
#[allow(unused_imports)]
|
||||
use core::{array, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};
|
||||
7063
library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
Normal file
7063
library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
Normal file
File diff suppressed because it is too large
Load diff
21
library/stdarch/crates/core_arch/src/loongarch64/lasx/mod.rs
Normal file
21
library/stdarch/crates/core_arch/src/loongarch64/lasx/mod.rs
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
//! LoongArch64 LASX intrinsics
|
||||
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
#[rustfmt::skip]
|
||||
mod types;
|
||||
|
||||
#[rustfmt::skip]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub use self::types::*;
|
||||
|
||||
#[rustfmt::skip]
|
||||
mod generated;
|
||||
|
||||
#[rustfmt::skip]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub use self::generated::*;
|
||||
|
||||
#[rustfmt::skip]
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
14758
library/stdarch/crates/core_arch/src/loongarch64/lasx/tests.rs
Normal file
14758
library/stdarch/crates/core_arch/src/loongarch64/lasx/tests.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,33 @@
|
|||
types! {
|
||||
#![unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
|
||||
/// LOONGARCH-specific 256-bit wide vector of 32 packed `i8`.
|
||||
pub struct v32i8(32 x pub(crate) i8);
|
||||
|
||||
/// LOONGARCH-specific 256-bit wide vector of 16 packed `i16`.
|
||||
pub struct v16i16(16 x pub(crate) i16);
|
||||
|
||||
/// LOONGARCH-specific 256-bit wide vector of 8 packed `i32`.
|
||||
pub struct v8i32(8 x pub(crate) i32);
|
||||
|
||||
/// LOONGARCH-specific 256-bit wide vector of 4 packed `i64`.
|
||||
pub struct v4i64(4 x pub(crate) i64);
|
||||
|
||||
/// LOONGARCH-specific 256-bit wide vector of 32 packed `u8`.
|
||||
pub struct v32u8(32 x pub(crate) u8);
|
||||
|
||||
/// LOONGARCH-specific 256-bit wide vector of 16 packed `u16`.
|
||||
pub struct v16u16(16 x pub(crate) u16);
|
||||
|
||||
/// LOONGARCH-specific 256-bit wide vector of 8 packed `u32`.
|
||||
pub struct v8u32(8 x pub(crate) u32);
|
||||
|
||||
/// LOONGARCH-specific 256-bit wide vector of 4 packed `u64`.
|
||||
pub struct v4u64(4 x pub(crate) u64);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 8 packed `f32`.
|
||||
pub struct v8f32(8 x pub(crate) f32);
|
||||
|
||||
/// LOONGARCH-specific 256-bit wide vector of 4 packed `f64`.
|
||||
pub struct v4f64(4 x pub(crate) f64);
|
||||
}
|
||||
6879
library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
Normal file
6879
library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
Normal file
File diff suppressed because it is too large
Load diff
21
library/stdarch/crates/core_arch/src/loongarch64/lsx/mod.rs
Normal file
21
library/stdarch/crates/core_arch/src/loongarch64/lsx/mod.rs
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
//! LoongArch64 LSX intrinsics
|
||||
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
#[rustfmt::skip]
|
||||
mod types;
|
||||
|
||||
#[rustfmt::skip]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub use self::types::*;
|
||||
|
||||
#[rustfmt::skip]
|
||||
mod generated;
|
||||
|
||||
#[rustfmt::skip]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub use self::generated::*;
|
||||
|
||||
#[rustfmt::skip]
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
7164
library/stdarch/crates/core_arch/src/loongarch64/lsx/tests.rs
Normal file
7164
library/stdarch/crates/core_arch/src/loongarch64/lsx/tests.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,33 @@
|
|||
types! {
|
||||
#![unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 16 packed `i8`.
|
||||
pub struct v16i8(16 x pub(crate) i8);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 8 packed `i16`.
|
||||
pub struct v8i16(8 x pub(crate) i16);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 4 packed `i32`.
|
||||
pub struct v4i32(4 x pub(crate) i32);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 2 packed `i64`.
|
||||
pub struct v2i64(2 x pub(crate) i64);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 16 packed `u8`.
|
||||
pub struct v16u8(16 x pub(crate) u8);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 8 packed `u16`.
|
||||
pub struct v8u16(8 x pub(crate) u16);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 4 packed `u32`.
|
||||
pub struct v4u32(4 x pub(crate) u32);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 2 packed `u64`.
|
||||
pub struct v2u64(2 x pub(crate) u64);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 4 packed `f32`.
|
||||
pub struct v4f32(4 x pub(crate) f32);
|
||||
|
||||
/// LOONGARCH-specific 128-bit wide vector of 2 packed `f64`.
|
||||
pub struct v2f64(2 x pub(crate) f64);
|
||||
}
|
||||
376
library/stdarch/crates/core_arch/src/loongarch64/mod.rs
Normal file
376
library/stdarch/crates/core_arch/src/loongarch64/mod.rs
Normal file
|
|
@ -0,0 +1,376 @@
|
|||
//! `LoongArch` intrinsics
|
||||
|
||||
mod lasx;
|
||||
mod lsx;
|
||||
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub use self::lasx::*;
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub use self::lsx::*;
|
||||
|
||||
use crate::arch::asm;
|
||||
|
||||
/// Reads the 64-bit stable counter value and the counter ID
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn rdtime_d() -> (i64, isize) {
|
||||
let val: i64;
|
||||
let tid: isize;
|
||||
asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
|
||||
(val, tid)
|
||||
}
|
||||
|
||||
/// Reads the lower 32-bit stable counter value and the counter ID
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn rdtimel_w() -> (i32, isize) {
|
||||
let val: i32;
|
||||
let tid: isize;
|
||||
asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
|
||||
(val, tid)
|
||||
}
|
||||
|
||||
/// Reads the upper 32-bit stable counter value and the counter ID
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn rdtimeh_w() -> (i32, isize) {
|
||||
let val: i32;
|
||||
let tid: isize;
|
||||
asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
|
||||
(val, tid)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.loongarch.crc.w.b.w"]
|
||||
fn __crc_w_b_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crc.w.h.w"]
|
||||
fn __crc_w_h_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crc.w.w.w"]
|
||||
fn __crc_w_w_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crc.w.d.w"]
|
||||
fn __crc_w_d_w(a: i64, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.b.w"]
|
||||
fn __crcc_w_b_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.h.w"]
|
||||
fn __crcc_w_h_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.w.w"]
|
||||
fn __crcc_w_w_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.d.w"]
|
||||
fn __crcc_w_d_w(a: i64, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.cacop.d"]
|
||||
fn __cacop(a: i64, b: i64, c: i64);
|
||||
#[link_name = "llvm.loongarch.dbar"]
|
||||
fn __dbar(a: i32);
|
||||
#[link_name = "llvm.loongarch.ibar"]
|
||||
fn __ibar(a: i32);
|
||||
#[link_name = "llvm.loongarch.movgr2fcsr"]
|
||||
fn __movgr2fcsr(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.movfcsr2gr"]
|
||||
fn __movfcsr2gr(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.csrrd.d"]
|
||||
fn __csrrd(a: i32) -> i64;
|
||||
#[link_name = "llvm.loongarch.csrwr.d"]
|
||||
fn __csrwr(a: i64, b: i32) -> i64;
|
||||
#[link_name = "llvm.loongarch.csrxchg.d"]
|
||||
fn __csrxchg(a: i64, b: i64, c: i32) -> i64;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.b"]
|
||||
fn __iocsrrd_b(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.h"]
|
||||
fn __iocsrrd_h(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.w"]
|
||||
fn __iocsrrd_w(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.d"]
|
||||
fn __iocsrrd_d(a: i32) -> i64;
|
||||
#[link_name = "llvm.loongarch.iocsrwr.b"]
|
||||
fn __iocsrwr_b(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.iocsrwr.h"]
|
||||
fn __iocsrwr_h(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.iocsrwr.w"]
|
||||
fn __iocsrwr_w(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.iocsrwr.d"]
|
||||
fn __iocsrwr_d(a: i64, b: i32);
|
||||
#[link_name = "llvm.loongarch.break"]
|
||||
fn __break(a: i32);
|
||||
#[link_name = "llvm.loongarch.cpucfg"]
|
||||
fn __cpucfg(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.syscall"]
|
||||
fn __syscall(a: i32);
|
||||
#[link_name = "llvm.loongarch.asrtle.d"]
|
||||
fn __asrtle(a: i64, b: i64);
|
||||
#[link_name = "llvm.loongarch.asrtgt.d"]
|
||||
fn __asrtgt(a: i64, b: i64);
|
||||
#[link_name = "llvm.loongarch.lddir.d"]
|
||||
fn __lddir(a: i64, b: i64) -> i64;
|
||||
#[link_name = "llvm.loongarch.ldpte.d"]
|
||||
fn __ldpte(a: i64, b: i64);
|
||||
#[link_name = "llvm.loongarch.frecipe.s"]
|
||||
fn __frecipe_s(a: f32) -> f32;
|
||||
#[link_name = "llvm.loongarch.frecipe.d"]
|
||||
fn __frecipe_d(a: f64) -> f64;
|
||||
#[link_name = "llvm.loongarch.frsqrte.s"]
|
||||
fn __frsqrte_s(a: f32) -> f32;
|
||||
#[link_name = "llvm.loongarch.frsqrte.d"]
|
||||
fn __frsqrte_d(a: f64) -> f64;
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crc_w_b_w(a: i32, b: i32) -> i32 {
|
||||
__crc_w_b_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crc_w_h_w(a: i32, b: i32) -> i32 {
|
||||
__crc_w_h_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crc_w_w_w(a: i32, b: i32) -> i32 {
|
||||
__crc_w_w_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crc_w_d_w(a: i64, b: i32) -> i32 {
|
||||
__crc_w_d_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crcc_w_b_w(a: i32, b: i32) -> i32 {
|
||||
__crcc_w_b_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crcc_w_h_w(a: i32, b: i32) -> i32 {
|
||||
__crcc_w_h_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crcc_w_w_w(a: i32, b: i32) -> i32 {
|
||||
__crcc_w_w_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crcc_w_d_w(a: i64, b: i32) -> i32 {
|
||||
__crcc_w_d_w(a, b)
|
||||
}
|
||||
|
||||
/// Generates the cache operation instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn cacop<const IMM12: i64>(a: i64, b: i64) {
|
||||
static_assert_simm_bits!(IMM12, 12);
|
||||
__cacop(a, b, IMM12);
|
||||
}
|
||||
|
||||
/// Generates the memory barrier instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn dbar<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__dbar(IMM15);
|
||||
}
|
||||
|
||||
/// Generates the instruction-fetch barrier instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn ibar<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__ibar(IMM15);
|
||||
}
|
||||
|
||||
/// Moves data from a GPR to the FCSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
|
||||
static_assert_uimm_bits!(IMM5, 5);
|
||||
__movgr2fcsr(IMM5, a);
|
||||
}
|
||||
|
||||
/// Moves data from a FCSR to the GPR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn movfcsr2gr<const IMM5: i32>() -> i32 {
|
||||
static_assert_uimm_bits!(IMM5, 5);
|
||||
__movfcsr2gr(IMM5)
|
||||
}
|
||||
|
||||
/// Reads the CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn csrrd<const IMM14: i32>() -> i64 {
|
||||
static_assert_uimm_bits!(IMM14, 14);
|
||||
__csrrd(IMM14)
|
||||
}
|
||||
|
||||
/// Writes the CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn csrwr<const IMM14: i32>(a: i64) -> i64 {
|
||||
static_assert_uimm_bits!(IMM14, 14);
|
||||
__csrwr(a, IMM14)
|
||||
}
|
||||
|
||||
/// Exchanges the CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn csrxchg<const IMM14: i32>(a: i64, b: i64) -> i64 {
|
||||
static_assert_uimm_bits!(IMM14, 14);
|
||||
__csrxchg(a, b, IMM14)
|
||||
}
|
||||
|
||||
/// Reads the 8-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_b(a: i32) -> i32 {
|
||||
__iocsrrd_b(a)
|
||||
}
|
||||
|
||||
/// Reads the 16-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_h(a: i32) -> i32 {
|
||||
__iocsrrd_h(a)
|
||||
}
|
||||
|
||||
/// Reads the 32-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_w(a: i32) -> i32 {
|
||||
__iocsrrd_w(a)
|
||||
}
|
||||
|
||||
/// Reads the 64-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_d(a: i32) -> i64 {
|
||||
__iocsrrd_d(a)
|
||||
}
|
||||
|
||||
/// Writes the 8-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_b(a: i32, b: i32) {
|
||||
__iocsrwr_b(a, b)
|
||||
}
|
||||
|
||||
/// Writes the 16-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_h(a: i32, b: i32) {
|
||||
__iocsrwr_h(a, b)
|
||||
}
|
||||
|
||||
/// Writes the 32-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_w(a: i32, b: i32) {
|
||||
__iocsrwr_w(a, b)
|
||||
}
|
||||
|
||||
/// Writes the 64-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_d(a: i64, b: i32) {
|
||||
__iocsrwr_d(a, b)
|
||||
}
|
||||
|
||||
/// Generates the breakpoint instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn brk<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__break(IMM15);
|
||||
}
|
||||
|
||||
/// Reads the CPU configuration register
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn cpucfg(a: i32) -> i32 {
|
||||
__cpucfg(a)
|
||||
}
|
||||
|
||||
/// Generates the syscall instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn syscall<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__syscall(IMM15);
|
||||
}
|
||||
|
||||
/// Generates the less-than-or-equal asseration instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn asrtle(a: i64, b: i64) {
|
||||
__asrtle(a, b);
|
||||
}
|
||||
|
||||
/// Generates the greater-than asseration instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn asrtgt(a: i64, b: i64) {
|
||||
__asrtgt(a, b);
|
||||
}
|
||||
|
||||
/// Loads the page table directory entry
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn lddir<const B: i64>(a: i64) -> i64 {
|
||||
__lddir(a, B)
|
||||
}
|
||||
|
||||
/// Loads the page table entry
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn ldpte<const B: i64>(a: i64) {
|
||||
__ldpte(a, B)
|
||||
}
|
||||
|
||||
/// Calculate the approximate single-precision result of 1.0 divided
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn frecipe_s(a: f32) -> f32 {
|
||||
__frecipe_s(a)
|
||||
}
|
||||
|
||||
/// Calculate the approximate double-precision result of 1.0 divided
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn frecipe_d(a: f64) -> f64 {
|
||||
__frecipe_d(a)
|
||||
}
|
||||
|
||||
/// Calculate the approximate single-precision result of dividing 1.0 by the square root
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn frsqrte_s(a: f32) -> f32 {
|
||||
__frsqrte_s(a)
|
||||
}
|
||||
|
||||
/// Calculate the approximate double-precision result of dividing 1.0 by the square root
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn frsqrte_d(a: f64) -> f64 {
|
||||
__frsqrte_d(a)
|
||||
}
|
||||
165
library/stdarch/crates/core_arch/src/macros.rs
Normal file
165
library/stdarch/crates/core_arch/src/macros.rs
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
//! Utility macros.
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! static_assert {
|
||||
($e:expr) => {
|
||||
const {
|
||||
assert!($e);
|
||||
}
|
||||
};
|
||||
($e:expr, $msg:expr) => {
|
||||
const {
|
||||
assert!($e, $msg);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! static_assert_uimm_bits {
|
||||
($imm:ident, $bits:expr) => {
|
||||
// `0 <= $imm` produces a warning if the immediate has an unsigned type
|
||||
#[allow(unused_comparisons)]
|
||||
{
|
||||
static_assert!(
|
||||
0 <= $imm && $imm < (1 << $bits),
|
||||
concat!(
|
||||
stringify!($imm),
|
||||
" doesn't fit in ",
|
||||
stringify!($bits),
|
||||
" bits",
|
||||
)
|
||||
)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! static_assert_simm_bits {
|
||||
($imm:ident, $bits:expr) => {
|
||||
static_assert!(
|
||||
(-1 << ($bits - 1)) - 1 <= $imm && $imm < (1 << ($bits - 1)),
|
||||
concat!(
|
||||
stringify!($imm),
|
||||
" doesn't fit in ",
|
||||
stringify!($bits),
|
||||
" bits",
|
||||
)
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! types {
|
||||
(
|
||||
#![$stability_first:meta]
|
||||
$(
|
||||
#![$stability_more:meta]
|
||||
)*
|
||||
|
||||
$(
|
||||
$(#[$doc:meta])*
|
||||
$(stability: [$stability_already: meta])*
|
||||
pub struct $name:ident($len:literal x $v:vis $elem_type:ty);
|
||||
)*
|
||||
) => (types! {
|
||||
$(
|
||||
#![$stability_more]
|
||||
)*
|
||||
|
||||
$(
|
||||
$(#[$doc])*
|
||||
$(stability: [$stability_already])*
|
||||
stability: [$stability_first]
|
||||
pub struct $name($len x $v $elem_type);
|
||||
)*
|
||||
});
|
||||
|
||||
(
|
||||
$(
|
||||
$(#[$doc:meta])*
|
||||
$(stability: [$stability: meta])+
|
||||
pub struct $name:ident($len:literal x $v:vis $elem_type:ty);
|
||||
)*
|
||||
) => ($(
|
||||
$(#[$doc])*
|
||||
$(#[$stability])+
|
||||
#[derive(Copy, Clone)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(simd)]
|
||||
#[allow(clippy::missing_inline_in_public_items)]
|
||||
pub struct $name($v [$elem_type; $len]);
|
||||
|
||||
impl $name {
|
||||
/// Using `my_simd([x; N])` seemingly fails tests,
|
||||
/// so use this internal helper for it instead.
|
||||
#[inline(always)]
|
||||
$v fn splat(value: $elem_type) -> $name {
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(simd)]
|
||||
struct JustOne([$elem_type; 1]);
|
||||
let one = JustOne([value]);
|
||||
// SAFETY: 0 is always in-bounds because we're shuffling
|
||||
// a simd type with exactly one element.
|
||||
unsafe { simd_shuffle!(one, one, [0; $len]) }
|
||||
}
|
||||
|
||||
/// Returns an array reference containing the entire SIMD vector.
|
||||
$v const fn as_array(&self) -> &[$elem_type; $len] {
|
||||
// SAFETY: this type is just an overaligned `[T; N]` with
|
||||
// potential padding at the end, so pointer casting to a
|
||||
// `&[T; N]` is safe.
|
||||
//
|
||||
// NOTE: This deliberately doesn't just use `&self.0` because it may soon be banned
|
||||
// see https://github.com/rust-lang/compiler-team/issues/838
|
||||
unsafe { &*(self as *const Self as *const [$elem_type; $len]) }
|
||||
|
||||
}
|
||||
|
||||
/// Returns a mutable array reference containing the entire SIMD vector.
|
||||
#[inline]
|
||||
$v fn as_mut_array(&mut self) -> &mut [$elem_type; $len] {
|
||||
// SAFETY: this type is just an overaligned `[T; N]` with
|
||||
// potential padding at the end, so pointer casting to a
|
||||
// `&mut [T; N]` is safe.
|
||||
//
|
||||
// NOTE: This deliberately doesn't just use `&mut self.0` because it may soon be banned
|
||||
// see https://github.com/rust-lang/compiler-team/issues/838
|
||||
unsafe { &mut *(self as *mut Self as *mut [$elem_type; $len]) }
|
||||
}
|
||||
}
|
||||
|
||||
$(#[$stability])+
|
||||
impl crate::fmt::Debug for $name {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
|
||||
crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
|
||||
}
|
||||
}
|
||||
)*);
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
#[repr(simd)]
|
||||
pub(crate) struct SimdShuffleIdx<const LEN: usize>(pub(crate) [u32; LEN]);
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! simd_shuffle {
|
||||
($x:expr, $y:expr, $idx:expr $(,)?) => {{
|
||||
$crate::intrinsics::simd::simd_shuffle(
|
||||
$x,
|
||||
$y,
|
||||
const { $crate::core_arch::macros::SimdShuffleIdx($idx) },
|
||||
)
|
||||
}};
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! simd_insert {
|
||||
($x:expr, $idx:expr, $val:expr $(,)?) => {{ $crate::intrinsics::simd::simd_insert($x, const { $idx }, $val) }};
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! simd_extract {
|
||||
($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }};
|
||||
($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }};
|
||||
}
|
||||
20
library/stdarch/crates/core_arch/src/mips/mod.rs
Normal file
20
library/stdarch/crates/core_arch/src/mips/mod.rs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
//! MIPS
|
||||
|
||||
// Building this module (even if unused) for non-fp64 targets fails with an LLVM
|
||||
// error.
|
||||
#[cfg(target_feature = "fp64")]
|
||||
mod msa;
|
||||
#[cfg(target_feature = "fp64")]
|
||||
#[unstable(feature = "stdarch_mips", issue = "111198")]
|
||||
pub use self::msa::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Generates the trap instruction `BREAK`
|
||||
#[cfg_attr(test, assert_instr(break))]
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_mips", issue = "111198")]
|
||||
pub unsafe fn break_() -> ! {
|
||||
crate::intrinsics::abort()
|
||||
}
|
||||
18398
library/stdarch/crates/core_arch/src/mips/msa.rs
Normal file
18398
library/stdarch/crates/core_arch/src/mips/msa.rs
Normal file
File diff suppressed because it is too large
Load diff
343
library/stdarch/crates/core_arch/src/mod.rs
Normal file
343
library/stdarch/crates/core_arch/src/mod.rs
Normal file
|
|
@ -0,0 +1,343 @@
|
|||
//! `core_arch`
|
||||
|
||||
#![allow(unknown_lints, unnecessary_transmutes)]
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
|
||||
mod riscv_shared;
|
||||
|
||||
#[cfg(any(
|
||||
target_arch = "arm",
|
||||
target_arch = "aarch64",
|
||||
target_arch = "arm64ec",
|
||||
doc
|
||||
))]
|
||||
mod arm_shared;
|
||||
|
||||
mod simd;
|
||||
|
||||
#[doc = include_str!("core_arch_docs.md")]
|
||||
#[stable(feature = "simd_arch", since = "1.27.0")]
|
||||
pub mod arch {
|
||||
/// Platform-specific intrinsics for the `x86` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "x86", doc))]
|
||||
#[doc(cfg(target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub mod x86 {
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use crate::core_arch::x86::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `x86_64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "x86_64", doc))]
|
||||
#[doc(cfg(target_arch = "x86_64"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub mod x86_64 {
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use crate::core_arch::x86::*;
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use crate::core_arch::x86_64::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `arm` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "arm", doc))]
|
||||
#[doc(cfg(target_arch = "arm"))]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
pub mod arm {
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
pub use crate::core_arch::arm::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `aarch64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))]
|
||||
#[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub mod aarch64 {
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub use crate::core_arch::aarch64::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `riscv32` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "riscv32", doc))]
|
||||
#[doc(cfg(any(target_arch = "riscv32")))]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub mod riscv32 {
|
||||
pub use crate::core_arch::riscv_shared::*;
|
||||
pub use crate::core_arch::riscv32::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `riscv64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "riscv64", doc))]
|
||||
#[doc(cfg(any(target_arch = "riscv64")))]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub mod riscv64 {
|
||||
pub use crate::core_arch::riscv64::*;
|
||||
// RISC-V RV64 supports all RV32 instructions as well in current specifications (2022-01-05).
|
||||
// Module `riscv_shared` includes instructions available under all RISC-V platforms,
|
||||
// i.e. RISC-V RV32 instructions.
|
||||
pub use crate::core_arch::riscv_shared::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `wasm32` platform.
|
||||
///
|
||||
/// This module provides intrinsics specific to the WebAssembly
|
||||
/// architecture. Here you'll find intrinsics specific to WebAssembly that
|
||||
/// aren't otherwise surfaced somewhere in a cross-platform abstraction of
|
||||
/// `std`, and you'll also find functions for leveraging WebAssembly
|
||||
/// proposals such as [atomics] and [simd].
|
||||
///
|
||||
/// Intrinsics in the `wasm32` module are modeled after the WebAssembly
|
||||
/// instructions that they represent. Most functions are named after the
|
||||
/// instruction they intend to correspond to, and the arguments/results
|
||||
/// correspond to the type signature of the instruction itself. Stable
|
||||
/// WebAssembly instructions are [documented online][instrdoc].
|
||||
///
|
||||
/// [instrdoc]: https://webassembly.github.io/spec/core/valid/instructions.html
|
||||
///
|
||||
/// If a proposal is not yet stable in WebAssembly itself then the functions
|
||||
/// within this function may be unstable and require the nightly channel of
|
||||
/// Rust to use. As the proposal itself stabilizes the intrinsics in this
|
||||
/// module should stabilize as well.
|
||||
///
|
||||
/// [atomics]: https://github.com/webassembly/threads
|
||||
/// [simd]: https://github.com/webassembly/simd
|
||||
///
|
||||
/// See the [module documentation](../index.html) for general information
|
||||
/// about the `arch` module and platform intrinsics.
|
||||
///
|
||||
/// ## Atomics
|
||||
///
|
||||
/// The [threads proposal][atomics] for WebAssembly adds a number of
|
||||
/// instructions for dealing with multithreaded programs. Most instructions
|
||||
/// added in the [atomics] proposal are exposed in Rust through the
|
||||
/// `std::sync::atomic` module. Some instructions, however, don't have
|
||||
/// direct equivalents in Rust so they're exposed here instead.
|
||||
///
|
||||
/// Note that the instructions added in the [atomics] proposal can work in
|
||||
/// either a context with a shared wasm memory and without. These intrinsics
|
||||
/// are always available in the standard library, but you likely won't be
|
||||
/// able to use them too productively unless you recompile the standard
|
||||
/// library (and all your code) with `-Ctarget-feature=+atomics`.
|
||||
///
|
||||
/// It's also worth pointing out that multi-threaded WebAssembly and its
|
||||
/// story in Rust is still in a somewhat "early days" phase as of the time
|
||||
/// of this writing. Pieces should mostly work but it generally requires a
|
||||
/// good deal of manual setup. At this time it's not as simple as "just call
|
||||
/// `std::thread::spawn`", but it will hopefully get there one day!
|
||||
///
|
||||
/// ## SIMD
|
||||
///
|
||||
/// The [simd proposal][simd] for WebAssembly added a new `v128` type for a
|
||||
/// 128-bit SIMD register. It also added a large array of instructions to
|
||||
/// operate on the `v128` type to perform data processing. Using SIMD on
|
||||
/// wasm is intended to be similar to as you would on `x86_64`, for example.
|
||||
/// You'd write a function such as:
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// #[cfg(target_arch = "wasm32")]
|
||||
/// #[target_feature(enable = "simd128")]
|
||||
/// unsafe fn uses_simd() {
|
||||
/// use std::arch::wasm32::*;
|
||||
/// // ...
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Unlike `x86_64`, however, WebAssembly does not currently have dynamic
|
||||
/// detection at runtime as to whether SIMD is supported (this is one of the
|
||||
/// motivators for the [conditional sections][condsections] and [feature
|
||||
/// detection] proposals, but that is still pretty early days). This means
|
||||
/// that your binary will either have SIMD and can only run on engines
|
||||
/// which support SIMD, or it will not have SIMD at all. For compatibility
|
||||
/// the standard library itself does not use any SIMD internally.
|
||||
/// Determining how best to ship your WebAssembly binary with SIMD is
|
||||
/// largely left up to you as it can be pretty nuanced depending on
|
||||
/// your situation.
|
||||
///
|
||||
/// [condsections]: https://github.com/webassembly/conditional-sections
|
||||
/// [feature detection]: https://github.com/WebAssembly/feature-detection
|
||||
///
|
||||
/// To enable SIMD support at compile time you need to do one of two things:
|
||||
///
|
||||
/// * First you can annotate functions with `#[target_feature(enable =
|
||||
/// "simd128")]`. This causes just that one function to have SIMD support
|
||||
/// available to it, and intrinsics will get inlined as usual in this
|
||||
/// situation.
|
||||
///
|
||||
/// * Second you can compile your program with `-Ctarget-feature=+simd128`.
|
||||
/// This compilation flag blanket enables SIMD support for your entire
|
||||
/// compilation. Note that this does not include the standard library
|
||||
/// unless you [recompile the standard library][buildstd].
|
||||
///
|
||||
/// [buildstd]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#build-std
|
||||
///
|
||||
/// If you enable SIMD via either of these routes then you'll have a
|
||||
/// WebAssembly binary that uses SIMD instructions, and you'll need to ship
|
||||
/// that accordingly. Also note that if you call SIMD intrinsics but don't
|
||||
/// enable SIMD via either of these mechanisms, you'll still have SIMD
|
||||
/// generated in your program. This means to generate a binary without SIMD
|
||||
/// you'll need to avoid both options above plus calling into any intrinsics
|
||||
/// in this module.
|
||||
#[cfg(any(target_arch = "wasm32", doc))]
|
||||
#[doc(cfg(target_arch = "wasm32"))]
|
||||
#[stable(feature = "simd_wasm32", since = "1.33.0")]
|
||||
pub mod wasm32 {
|
||||
#[stable(feature = "simd_wasm32", since = "1.33.0")]
|
||||
pub use crate::core_arch::wasm32::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `wasm64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "wasm64", doc))]
|
||||
#[doc(cfg(target_arch = "wasm64"))]
|
||||
#[unstable(feature = "simd_wasm64", issue = "90599")]
|
||||
pub mod wasm64 {
|
||||
#[unstable(feature = "simd_wasm64", issue = "90599")]
|
||||
pub use crate::core_arch::wasm32::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `wasm` target family.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_family = "wasm", doc))]
|
||||
#[doc(cfg(target_family = "wasm"))]
|
||||
#[unstable(feature = "simd_wasm64", issue = "90599")]
|
||||
pub mod wasm {
|
||||
#[unstable(feature = "simd_wasm64", issue = "90599")]
|
||||
pub use crate::core_arch::wasm32::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `mips` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "mips", doc))]
|
||||
#[doc(cfg(target_arch = "mips"))]
|
||||
#[unstable(feature = "stdarch_mips", issue = "111198")]
|
||||
pub mod mips {
|
||||
pub use crate::core_arch::mips::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `mips64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "mips64", doc))]
|
||||
#[doc(cfg(target_arch = "mips64"))]
|
||||
#[unstable(feature = "stdarch_mips", issue = "111198")]
|
||||
pub mod mips64 {
|
||||
pub use crate::core_arch::mips::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `PowerPC` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "powerpc", doc))]
|
||||
#[doc(cfg(target_arch = "powerpc"))]
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub mod powerpc {
|
||||
pub use crate::core_arch::powerpc::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `PowerPC64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "powerpc64", doc))]
|
||||
#[doc(cfg(target_arch = "powerpc64"))]
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub mod powerpc64 {
|
||||
pub use crate::core_arch::powerpc64::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `NVPTX` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "nvptx64", doc))]
|
||||
#[doc(cfg(target_arch = "nvptx64"))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub mod nvptx {
|
||||
pub use crate::core_arch::nvptx::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `loongarch` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "loongarch64", doc))]
|
||||
#[doc(cfg(target_arch = "loongarch64"))]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub mod loongarch64 {
|
||||
pub use crate::core_arch::loongarch64::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `s390x` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "s390x", doc))]
|
||||
#[doc(cfg(target_arch = "s390x"))]
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
pub mod s390x {
|
||||
pub use crate::core_arch::s390x::*;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))]
|
||||
#[doc(cfg(any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
mod x86;
|
||||
#[cfg(any(target_arch = "x86_64", doc))]
|
||||
#[doc(cfg(target_arch = "x86_64"))]
|
||||
mod x86_64;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))]
|
||||
#[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))]
|
||||
mod aarch64;
|
||||
#[cfg(any(target_arch = "arm", doc))]
|
||||
#[doc(cfg(any(target_arch = "arm")))]
|
||||
mod arm;
|
||||
|
||||
#[cfg(any(target_arch = "riscv32", doc))]
|
||||
#[doc(cfg(any(target_arch = "riscv32")))]
|
||||
mod riscv32;
|
||||
|
||||
#[cfg(any(target_arch = "riscv64", doc))]
|
||||
#[doc(cfg(any(target_arch = "riscv64")))]
|
||||
mod riscv64;
|
||||
|
||||
#[cfg(any(target_family = "wasm", doc))]
|
||||
#[doc(cfg(target_family = "wasm"))]
|
||||
mod wasm32;
|
||||
|
||||
#[cfg(any(target_arch = "mips", target_arch = "mips64", doc))]
|
||||
#[doc(cfg(any(target_arch = "mips", target_arch = "mips64")))]
|
||||
mod mips;
|
||||
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64", doc))]
|
||||
#[doc(cfg(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
mod powerpc;
|
||||
|
||||
#[cfg(any(target_arch = "powerpc64", doc))]
|
||||
#[doc(cfg(target_arch = "powerpc64"))]
|
||||
mod powerpc64;
|
||||
|
||||
#[cfg(any(target_arch = "nvptx64", doc))]
|
||||
#[doc(cfg(target_arch = "nvptx64"))]
|
||||
mod nvptx;
|
||||
|
||||
#[cfg(any(target_arch = "loongarch64", doc))]
|
||||
#[doc(cfg(target_arch = "loongarch64"))]
|
||||
mod loongarch64;
|
||||
|
||||
#[cfg(any(target_arch = "s390x", doc))]
|
||||
#[doc(cfg(target_arch = "s390x"))]
|
||||
mod s390x;
|
||||
236
library/stdarch/crates/core_arch/src/nvptx/mod.rs
Normal file
236
library/stdarch/crates/core_arch/src/nvptx/mod.rs
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
//! NVPTX intrinsics (experimental)
|
||||
//!
|
||||
//! These intrinsics form the foundation of the CUDA
|
||||
//! programming model.
|
||||
//!
|
||||
//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also
|
||||
//! the [LLVM NVPTX Backend documentation][llvm_docs].
|
||||
//!
|
||||
//! [cuda_c]:
|
||||
//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
|
||||
//! [llvm_docs]:
|
||||
//! https://llvm.org/docs/NVPTXUsage.html
|
||||
|
||||
use crate::ffi::c_void;
|
||||
|
||||
mod packed;
|
||||
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub use packed::*;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "C" {
|
||||
#[link_name = "llvm.nvvm.barrier0"]
|
||||
fn syncthreads() -> ();
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
|
||||
fn block_dim_x() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
|
||||
fn block_dim_y() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
|
||||
fn block_dim_z() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
|
||||
fn block_idx_x() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
|
||||
fn block_idx_y() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
|
||||
fn block_idx_z() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
|
||||
fn grid_dim_x() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
|
||||
fn grid_dim_y() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
|
||||
fn grid_dim_z() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
|
||||
fn thread_idx_x() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
|
||||
fn thread_idx_y() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
|
||||
fn thread_idx_z() -> i32;
|
||||
}
|
||||
|
||||
/// Synchronizes all threads in the block.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _syncthreads() -> () {
|
||||
syncthreads()
|
||||
}
|
||||
|
||||
/// x-th thread-block dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_dim_x() -> i32 {
|
||||
block_dim_x()
|
||||
}
|
||||
|
||||
/// y-th thread-block dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_dim_y() -> i32 {
|
||||
block_dim_y()
|
||||
}
|
||||
|
||||
/// z-th thread-block dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_dim_z() -> i32 {
|
||||
block_dim_z()
|
||||
}
|
||||
|
||||
/// x-th thread-block index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_idx_x() -> i32 {
|
||||
block_idx_x()
|
||||
}
|
||||
|
||||
/// y-th thread-block index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_idx_y() -> i32 {
|
||||
block_idx_y()
|
||||
}
|
||||
|
||||
/// z-th thread-block index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_idx_z() -> i32 {
|
||||
block_idx_z()
|
||||
}
|
||||
|
||||
/// x-th block-grid dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _grid_dim_x() -> i32 {
|
||||
grid_dim_x()
|
||||
}
|
||||
|
||||
/// y-th block-grid dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _grid_dim_y() -> i32 {
|
||||
grid_dim_y()
|
||||
}
|
||||
|
||||
/// z-th block-grid dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _grid_dim_z() -> i32 {
|
||||
grid_dim_z()
|
||||
}
|
||||
|
||||
/// x-th thread index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _thread_idx_x() -> i32 {
|
||||
thread_idx_x()
|
||||
}
|
||||
|
||||
/// y-th thread index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _thread_idx_y() -> i32 {
|
||||
thread_idx_y()
|
||||
}
|
||||
|
||||
/// z-th thread index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _thread_idx_z() -> i32 {
|
||||
thread_idx_z()
|
||||
}
|
||||
|
||||
/// Generates the trap instruction `TRAP`
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn trap() -> ! {
|
||||
crate::intrinsics::abort()
|
||||
}
|
||||
|
||||
// Basic CUDA syscall declarations.
|
||||
unsafe extern "C" {
|
||||
/// Print formatted output from a kernel to a host-side output stream.
|
||||
///
|
||||
/// Syscall arguments:
|
||||
/// * `status`: The status value that is returned by `vprintf`.
|
||||
/// * `format`: A pointer to the format specifier input (uses common `printf` format).
|
||||
/// * `valist`: A pointer to the valist input.
|
||||
///
|
||||
/// ```
|
||||
/// #[repr(C)]
|
||||
/// struct PrintArgs(f32, f32, f32, i32);
|
||||
///
|
||||
/// vprintf(
|
||||
/// "int(%f + %f) = int(%f) = %d\n".as_ptr(),
|
||||
/// transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// Sources:
|
||||
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
|
||||
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub fn vprintf(format: *const u8, valist: *const c_void) -> i32;
|
||||
|
||||
/// Allocate memory dynamically from a fixed-size heap in global memory.
|
||||
///
|
||||
/// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
|
||||
/// from the device heap and returns a pointer to the allocated memory
|
||||
/// or `NULL` if insufficient memory exists to fulfill the request.
|
||||
///
|
||||
/// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
|
||||
///
|
||||
/// The memory allocated by a given CUDA thread via `malloc()` remains allocated
|
||||
/// for the lifetime of the CUDA context, or until it is explicitly released
|
||||
/// by a call to `free()`. It can be used by any other CUDA threads
|
||||
/// even from subsequent kernel launches.
|
||||
///
|
||||
/// Sources:
|
||||
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
|
||||
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
|
||||
// FIXME(denzp): assign `malloc` and `nothrow` attributes.
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub fn malloc(size: usize) -> *mut c_void;
|
||||
|
||||
/// Free previously dynamically allocated memory.
|
||||
///
|
||||
/// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
|
||||
/// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
|
||||
/// the call to `free()` is ignored.
|
||||
///
|
||||
/// Any CUDA thread may free memory allocated by another thread, but care should be taken
|
||||
/// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
|
||||
/// with the same `ptr` has undefined behavior.
|
||||
///
|
||||
/// Sources:
|
||||
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
|
||||
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
|
||||
// FIXME(denzp): assign `nothrow` attribute.
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub fn free(ptr: *mut c_void);
|
||||
|
||||
// Internal declaration of the syscall. Exported variant has
|
||||
// the `char_size` parameter set to `1` (single char size in bytes).
|
||||
fn __assertfail(
|
||||
message: *const u8,
|
||||
file: *const u8,
|
||||
line: u32,
|
||||
function: *const u8,
|
||||
char_size: usize,
|
||||
);
|
||||
}
|
||||
|
||||
/// Syscall to be used whenever the *assert expression produces a `false` value*.
|
||||
///
|
||||
/// Syscall arguments:
|
||||
/// * `message`: The pointer to the string that should be output.
|
||||
/// * `file`: The pointer to the file name string associated with the assert.
|
||||
/// * `line`: The line number associated with the assert.
|
||||
/// * `function`: The pointer to the function name string associated with the assert.
|
||||
///
|
||||
/// Source:
|
||||
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) {
|
||||
__assertfail(message, file, line, function, 1)
|
||||
}
|
||||
139
library/stdarch/crates/core_arch/src/nvptx/packed.rs
Normal file
139
library/stdarch/crates/core_arch/src/nvptx/packed.rs
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
//! NVPTX Packed data types (SIMD)
|
||||
//!
|
||||
//! Packed Data Types is what PTX calls SIMD types. See [PTX ISA (Packed Data Types)](https://docs.nvidia.com/cuda/parallel-thread-execution/#packed-data-types) for a full reference.
|
||||
|
||||
// Note: #[assert_instr] tests are not actually being run on nvptx due to being a `no_std` target incapable of running tests. Something like FileCheck would be appropriate for verifying the correct instruction is used.
|
||||
|
||||
use crate::intrinsics::simd::*;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "C" {
|
||||
#[link_name = "llvm.minimum.v2f16"]
|
||||
fn llvm_f16x2_minimum(a: f16x2, b: f16x2) -> f16x2;
|
||||
#[link_name = "llvm.maximum.v2f16"]
|
||||
fn llvm_f16x2_maximum(a: f16x2, b: f16x2) -> f16x2;
|
||||
}
|
||||
|
||||
types! {
|
||||
#![unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
|
||||
/// PTX-specific 32-bit wide floating point (f16 x 2) vector type
|
||||
pub struct f16x2(2 x f16);
|
||||
|
||||
}
|
||||
|
||||
/// Add two values, round to nearest even
|
||||
///
|
||||
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add>
|
||||
///
|
||||
/// Corresponds to the CUDA C intrinsics:
|
||||
/// - [`__hadd2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g921c795176eaa31265bd80ef4fe4b8e6)
|
||||
/// - [`__hadd2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g6cd8ddb2c3d670e1a10c3eb2e7644f82)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(add.rn.f16x22))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn f16x2_add(a: f16x2, b: f16x2) -> f16x2 {
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
||||
/// Subtract two values, round to nearest even
|
||||
///
|
||||
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub>
|
||||
///
|
||||
/// Corresponds to the CUDA C intrinsics:
|
||||
/// - [`__hsub2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1ga5536c9c3d853d8c8b9de60e18b41e54)
|
||||
/// - [`__hsub2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g8adc164c68d553354f749f0f0645a874)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(sub.rn.f16x2))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn f16x2_sub(a: f16x2, b: f16x2) -> f16x2 {
|
||||
simd_sub(a, b)
|
||||
}
|
||||
|
||||
/// Multiply two values, round to nearest even
|
||||
///
|
||||
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-mul>
|
||||
///
|
||||
/// Corresponds to the CUDA C intrinsics:
|
||||
/// - [`__hmul2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g70de3f2ee48babe4e0969397ac17708e)
|
||||
/// - [`__hmul2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g99f8fe23a4b4c6898d6faf999afaa76e)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(mul.rn.f16x2))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn f16x2_mul(a: f16x2, b: f16x2) -> f16x2 {
|
||||
simd_mul(a, b)
|
||||
}
|
||||
|
||||
/// Fused multiply-add, round to nearest even
|
||||
///
|
||||
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-fma>
|
||||
///
|
||||
/// Corresponds to the CUDA C intrinsics:
|
||||
/// - [`__fma2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g43628ba21ded8b1e188a367348008dab)
|
||||
/// - [`__fma2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g43628ba21ded8b1e188a367348008dab)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(fma.rn.f16x2))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn f16x2_fma(a: f16x2, b: f16x2, c: f16x2) -> f16x2 {
|
||||
simd_fma(a, b, c)
|
||||
}
|
||||
|
||||
/// Arithmetic negate
|
||||
///
|
||||
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-neg>
|
||||
///
|
||||
/// Corresponds to the CUDA C intrinsic [`__hmin2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g9e17a33f96061804166f3fbd395422b6)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(neg.f16x2))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn f16x2_neg(a: f16x2) -> f16x2 {
|
||||
simd_neg(a)
|
||||
}
|
||||
|
||||
/// Find the minimum of two values
|
||||
///
|
||||
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-min>
|
||||
///
|
||||
/// Corresponds to the CUDA C intrinsic [`__hmin2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g9e17a33f96061804166f3fbd395422b6)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(min.f16x2))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn f16x2_min(a: f16x2, b: f16x2) -> f16x2 {
|
||||
simd_fmin(a, b)
|
||||
}
|
||||
|
||||
/// Find the minimum of two values, NaNs pass through.
|
||||
///
|
||||
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-min>
|
||||
///
|
||||
/// Corresponds to the CUDA C intrinsic [`__hmin2_nan`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g8bb8f58e9294cc261d2f42c4d5aecd6b)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(min.NaN.f16x2))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn f16x2_min_nan(a: f16x2, b: f16x2) -> f16x2 {
|
||||
llvm_f16x2_minimum(a, b)
|
||||
}
|
||||
|
||||
/// Find the maximum of two values
|
||||
///
|
||||
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-max>
|
||||
///
|
||||
/// Corresponds to the CUDA C intrinsic [`__hmax2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g59fc7fc7975d8127b202444a05e57e3d)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(max.f16x2))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn f16x2_max(a: f16x2, b: f16x2) -> f16x2 {
|
||||
simd_fmax(a, b)
|
||||
}
|
||||
|
||||
/// Find the maximum of two values, NaNs pass through.
|
||||
///
|
||||
/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-max>
|
||||
///
|
||||
/// Corresponds to the CUDA C intrinsic [`__hmax2_nan`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g41623db7850e3074fd9daa80a14c3897)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(max.NaN.f16x2))]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn f16x2_max_nan(a: f16x2, b: f16x2) -> f16x2 {
|
||||
llvm_f16x2_maximum(a, b)
|
||||
}
|
||||
6692
library/stdarch/crates/core_arch/src/powerpc/altivec.rs
Normal file
6692
library/stdarch/crates/core_arch/src/powerpc/altivec.rs
Normal file
File diff suppressed because it is too large
Load diff
315
library/stdarch/crates/core_arch/src/powerpc/macros.rs
Normal file
315
library/stdarch/crates/core_arch/src/powerpc/macros.rs
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
macro_rules! test_impl {
|
||||
($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr:ident]) => {
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(test, assert_instr($instr))]
|
||||
pub unsafe fn $fun ($($v : $ty),*) -> $r {
|
||||
$call ($($v),*)
|
||||
}
|
||||
};
|
||||
($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr_altivec:ident / $instr_vsx:ident]) => {
|
||||
test_impl! { $fun ($($v : $ty),*) -> $r [$call, $instr_altivec / $instr_vsx / $instr_vsx] }
|
||||
};
|
||||
($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr_altivec:ident / $instr_vsx:ident / $instr_pwr9:ident]) => {
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(all(test, not(target_feature="vsx"), not(target_feature = "power9-vector")), assert_instr($instr_altivec))]
|
||||
#[cfg_attr(all(test, target_feature="vsx", not(target_feature = "power9-vector")), assert_instr($instr_vsx))]
|
||||
#[cfg_attr(all(test, not(target_feature="vsx"), target_feature = "power9-vector"), assert_instr($instr_pwr9))]
|
||||
pub unsafe fn $fun ($($v : $ty),*) -> $r {
|
||||
$call ($($v),*)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unknown_lints, unused_macro_rules)]
|
||||
macro_rules! impl_vec_trait {
|
||||
([$Trait:ident $m:ident] $fun:ident ($a:ty)) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl $Trait for $a {
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn $m(self) -> Self {
|
||||
$fun(transmute(self))
|
||||
}
|
||||
}
|
||||
};
|
||||
([$Trait:ident $m:ident] $fun:ident ($a:ty) -> $r:ty) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl $Trait for $a {
|
||||
type Result = $r;
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn $m(self) -> Self::Result {
|
||||
$fun(transmute(self))
|
||||
}
|
||||
}
|
||||
};
|
||||
([$Trait:ident $m:ident]+ $fun:ident ($a:ty) -> $r:ty) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl $Trait for $a {
|
||||
type Result = $r;
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn $m(self) -> Self::Result {
|
||||
transmute($fun(transmute(self)))
|
||||
}
|
||||
}
|
||||
};
|
||||
([$Trait:ident $m:ident] 1 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident, $sf: ident)) => {
|
||||
impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char) -> vector_unsigned_char }
|
||||
impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char) -> vector_signed_char }
|
||||
impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short) -> vector_unsigned_short }
|
||||
impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short) -> vector_signed_short }
|
||||
impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int) -> vector_unsigned_int }
|
||||
impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int) -> vector_signed_int }
|
||||
impl_vec_trait!{ [$Trait $m] $sf (vector_float) -> vector_float }
|
||||
};
|
||||
([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty) -> $r:ty) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl $Trait<$b> for $a {
|
||||
type Result = $r;
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn $m(self, b: $b) -> Self::Result {
|
||||
$fun(transmute(self), transmute(b))
|
||||
}
|
||||
}
|
||||
};
|
||||
([$Trait:ident $m:ident]+ $fun:ident ($a:ty, $b:ty) -> $r:ty) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl $Trait<$b> for $a {
|
||||
type Result = $r;
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn $m(self, b: $b) -> Self::Result {
|
||||
transmute($fun(transmute(self), transmute(b)))
|
||||
}
|
||||
}
|
||||
};
|
||||
([$Trait:ident $m:ident] $fun:ident ($a:ty, ~$b:ty) -> $r:ty) => {
|
||||
impl_vec_trait!{ [$Trait $m] $fun ($a, $a) -> $r }
|
||||
impl_vec_trait!{ [$Trait $m] $fun ($a, $b) -> $r }
|
||||
impl_vec_trait!{ [$Trait $m] $fun ($b, $a) -> $r }
|
||||
};
|
||||
([$Trait:ident $m:ident] ~($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => {
|
||||
impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, ~vector_bool_char) -> vector_unsigned_char }
|
||||
impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, ~vector_bool_char) -> vector_signed_char }
|
||||
impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, ~vector_bool_short) -> vector_unsigned_short }
|
||||
impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, ~vector_bool_short) -> vector_signed_short }
|
||||
impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, ~vector_bool_int) -> vector_unsigned_int }
|
||||
impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, ~vector_bool_int) -> vector_signed_int }
|
||||
};
|
||||
([$Trait:ident $m:ident] ~($fn:ident)) => {
|
||||
impl_vec_trait!{ [$Trait $m] ~($fn, $fn, $fn, $fn, $fn, $fn) }
|
||||
};
|
||||
([$Trait:ident $m:ident] 2 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => {
|
||||
impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char }
|
||||
impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, vector_signed_char) -> vector_signed_char }
|
||||
impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short }
|
||||
impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, vector_signed_short) -> vector_signed_short }
|
||||
impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int }
|
||||
impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, vector_signed_int) -> vector_signed_int }
|
||||
};
|
||||
([$Trait:ident $m:ident] 2 ($fn:ident)) => {
|
||||
impl_vec_trait!{ [$Trait $m] ($fn, $fn, $fn, $fn, $fn, $fn) }
|
||||
};
|
||||
([$Trait:ident $m:ident]+ 2b ($b:ident, $h:ident, $w:ident)) => {
|
||||
impl_vec_trait!{ [$Trait $m]+ $b (vector_bool_char, vector_bool_char) -> vector_bool_char }
|
||||
impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char }
|
||||
impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_signed_char) -> vector_signed_char }
|
||||
impl_vec_trait!{ [$Trait $m]+ $h (vector_bool_short, vector_bool_short) -> vector_bool_short }
|
||||
impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short }
|
||||
impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_signed_short) -> vector_signed_short }
|
||||
impl_vec_trait!{ [$Trait $m]+ $w (vector_bool_int, vector_bool_int) -> vector_bool_int }
|
||||
impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int }
|
||||
impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_signed_int) -> vector_signed_int }
|
||||
};
|
||||
([$Trait:ident $m:ident]+ 2b ($fn:ident)) => {
|
||||
impl_vec_trait!{ [$Trait $m]+ 2b ($fn, $fn, $fn) }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! s_t_l {
|
||||
(i32x4) => {
|
||||
vector_signed_int
|
||||
};
|
||||
(i16x8) => {
|
||||
vector_signed_short
|
||||
};
|
||||
(i8x16) => {
|
||||
vector_signed_char
|
||||
};
|
||||
|
||||
(u32x4) => {
|
||||
vector_unsigned_int
|
||||
};
|
||||
(u16x8) => {
|
||||
vector_unsigned_short
|
||||
};
|
||||
(u8x16) => {
|
||||
vector_unsigned_char
|
||||
};
|
||||
|
||||
(f32x4) => {
|
||||
vector_float
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! t_t_l {
|
||||
(i32) => {
|
||||
vector_signed_int
|
||||
};
|
||||
(i16) => {
|
||||
vector_signed_short
|
||||
};
|
||||
(i8) => {
|
||||
vector_signed_char
|
||||
};
|
||||
|
||||
(u32) => {
|
||||
vector_unsigned_int
|
||||
};
|
||||
(u16) => {
|
||||
vector_unsigned_short
|
||||
};
|
||||
(u8) => {
|
||||
vector_unsigned_char
|
||||
};
|
||||
|
||||
(f32) => {
|
||||
vector_float
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! t_t_s {
|
||||
(i32) => {
|
||||
i32x4
|
||||
};
|
||||
(i16) => {
|
||||
i16x8
|
||||
};
|
||||
(i8) => {
|
||||
i8x16
|
||||
};
|
||||
|
||||
(u32) => {
|
||||
u32x4
|
||||
};
|
||||
(u16) => {
|
||||
u16x8
|
||||
};
|
||||
(u8) => {
|
||||
u8x16
|
||||
};
|
||||
|
||||
(f32) => {
|
||||
f32x4
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! t_u {
|
||||
(vector_bool_char) => {
|
||||
vector_unsigned_char
|
||||
};
|
||||
(vector_bool_short) => {
|
||||
vector_unsigned_short
|
||||
};
|
||||
(vector_bool_int) => {
|
||||
vector_unsigned_int
|
||||
};
|
||||
(vector_unsigned_char) => {
|
||||
vector_unsigned_char
|
||||
};
|
||||
(vector_unsigned_short) => {
|
||||
vector_unsigned_short
|
||||
};
|
||||
(vector_unsigned_int) => {
|
||||
vector_unsigned_int
|
||||
};
|
||||
(vector_signed_char) => {
|
||||
vector_unsigned_char
|
||||
};
|
||||
(vector_signed_short) => {
|
||||
vector_unsigned_short
|
||||
};
|
||||
(vector_signed_int) => {
|
||||
vector_unsigned_int
|
||||
};
|
||||
(vector_float) => {
|
||||
vector_unsigned_int
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! t_b {
|
||||
(vector_bool_char) => {
|
||||
vector_bool_char
|
||||
};
|
||||
(vector_bool_short) => {
|
||||
vector_bool_short
|
||||
};
|
||||
(vector_bool_int) => {
|
||||
vector_bool_int
|
||||
};
|
||||
(vector_signed_char) => {
|
||||
vector_bool_char
|
||||
};
|
||||
(vector_signed_short) => {
|
||||
vector_bool_short
|
||||
};
|
||||
(vector_signed_int) => {
|
||||
vector_bool_int
|
||||
};
|
||||
(vector_unsigned_char) => {
|
||||
vector_bool_char
|
||||
};
|
||||
(vector_unsigned_short) => {
|
||||
vector_bool_short
|
||||
};
|
||||
(vector_unsigned_int) => {
|
||||
vector_bool_int
|
||||
};
|
||||
(vector_float) => {
|
||||
vector_bool_int
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_from {
|
||||
($s: ident) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl From<$s> for s_t_l!($s) {
|
||||
fn from (v: $s) -> Self {
|
||||
unsafe {
|
||||
transmute(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
($($s: ident),*) => {
|
||||
$(
|
||||
impl_from! { $s }
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_neg {
|
||||
($s: ident : $zero: expr) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl crate::ops::Neg for s_t_l!($s) {
|
||||
type Output = s_t_l!($s);
|
||||
fn neg(self) -> Self::Output {
|
||||
unsafe { simd_neg(self) }
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) use impl_from;
|
||||
pub(crate) use impl_neg;
|
||||
pub(crate) use impl_vec_trait;
|
||||
pub(crate) use s_t_l;
|
||||
pub(crate) use t_b;
|
||||
pub(crate) use t_t_l;
|
||||
pub(crate) use t_t_s;
|
||||
pub(crate) use t_u;
|
||||
pub(crate) use test_impl;
|
||||
22
library/stdarch/crates/core_arch/src/powerpc/mod.rs
Normal file
22
library/stdarch/crates/core_arch/src/powerpc/mod.rs
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
//! PowerPC intrinsics
|
||||
|
||||
pub(crate) mod macros;
|
||||
|
||||
mod altivec;
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub use self::altivec::*;
|
||||
|
||||
mod vsx;
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub use self::vsx::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Generates the trap instruction `TRAP`
|
||||
#[cfg_attr(test, assert_instr(trap))]
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub unsafe fn trap() -> ! {
|
||||
crate::intrinsics::abort()
|
||||
}
|
||||
240
library/stdarch/crates/core_arch/src/powerpc/vsx.rs
Normal file
240
library/stdarch/crates/core_arch/src/powerpc/vsx.rs
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
//! PowerPC Vector Scalar eXtensions (VSX) intrinsics.
|
||||
//!
|
||||
//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA
|
||||
//! NVlink)] and [POWER ISA v3.0B (for POWER9)].
|
||||
//!
|
||||
//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u
|
||||
//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv
|
||||
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
use crate::core_arch::powerpc::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
use crate::mem::transmute;
|
||||
|
||||
types! {
|
||||
#![unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
|
||||
// pub struct vector_Float16 = f16x8;
|
||||
/// PowerPC-specific 128-bit wide vector of two packed `i64`
|
||||
pub struct vector_signed_long(2 x i64);
|
||||
/// PowerPC-specific 128-bit wide vector of two packed `u64`
|
||||
pub struct vector_unsigned_long(2 x u64);
|
||||
/// PowerPC-specific 128-bit wide vector mask of two `i64`
|
||||
pub struct vector_bool_long(2 x i64);
|
||||
/// PowerPC-specific 128-bit wide vector of two packed `f64`
|
||||
pub struct vector_double(2 x f64);
|
||||
// pub struct vector_signed_long_long = vector_signed_long;
|
||||
// pub struct vector_unsigned_long_long = vector_unsigned_long;
|
||||
// pub struct vector_bool_long_long = vector_bool_long;
|
||||
// pub struct vector_signed___int128 = i128x1;
|
||||
// pub struct vector_unsigned___int128 = i128x1;
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "C" {
|
||||
#[link_name = "llvm.ppc.altivec.vperm"]
|
||||
fn vperm(
|
||||
a: vector_signed_int,
|
||||
b: vector_signed_int,
|
||||
c: vector_unsigned_char,
|
||||
) -> vector_signed_int;
|
||||
}
|
||||
|
||||
mod sealed {
|
||||
use super::*;
|
||||
use crate::core_arch::simd::*;
|
||||
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub trait VectorPermDI {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self;
|
||||
}
|
||||
|
||||
// xxpermdi has an big-endian bias and extended mnemonics
|
||||
#[inline]
|
||||
#[target_feature(enable = "vsx")]
|
||||
#[cfg_attr(all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0))]
|
||||
#[cfg_attr(all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0))]
|
||||
unsafe fn xxpermdi(a: vector_signed_long, b: vector_signed_long, dm: u8) -> vector_signed_long {
|
||||
let a: i64x2 = transmute(a);
|
||||
let b: i64x2 = transmute(b);
|
||||
let r: i64x2 = match dm & 0b11 {
|
||||
0 => simd_shuffle!(a, b, [0b00, 0b10]),
|
||||
1 => simd_shuffle!(a, b, [0b01, 0b10]),
|
||||
2 => simd_shuffle!(a, b, [0b00, 0b11]),
|
||||
_ => simd_shuffle!(a, b, [0b01, 0b11]),
|
||||
};
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
macro_rules! vec_xxpermdi {
|
||||
{$impl: ident} => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl VectorPermDI for $impl {
|
||||
#[inline]
|
||||
#[target_feature(enable = "vsx")]
|
||||
unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self {
|
||||
transmute(xxpermdi(transmute(self), transmute(b), dm))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vec_xxpermdi! { vector_unsigned_long }
|
||||
vec_xxpermdi! { vector_signed_long }
|
||||
vec_xxpermdi! { vector_bool_long }
|
||||
vec_xxpermdi! { vector_double }
|
||||
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub trait VectorMergeEo {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
unsafe fn vec_mergee(self, b: Self) -> Self;
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
unsafe fn vec_mergeo(self, b: Self) -> Self;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(
|
||||
all(test, target_endian = "little", target_feature = "power8-vector"),
|
||||
assert_instr(vmrgow)
|
||||
)]
|
||||
#[cfg_attr(
|
||||
all(test, target_endian = "big", target_feature = "power8-vector"),
|
||||
assert_instr(vmrgew)
|
||||
)]
|
||||
unsafe fn mergee(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
|
||||
let p = transmute(u8x16::new(
|
||||
0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
|
||||
0x1A, 0x1B,
|
||||
));
|
||||
vec_perm(a, b, p)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[cfg_attr(
|
||||
all(test, target_endian = "little", target_feature = "power8-vector"),
|
||||
assert_instr(vmrgew)
|
||||
)]
|
||||
#[cfg_attr(
|
||||
all(test, target_endian = "big", target_feature = "power8-vector"),
|
||||
assert_instr(vmrgow)
|
||||
)]
|
||||
unsafe fn mergeo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
|
||||
let p = transmute(u8x16::new(
|
||||
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D,
|
||||
0x1E, 0x1F,
|
||||
));
|
||||
vec_perm(a, b, p)
|
||||
}
|
||||
|
||||
macro_rules! vec_mergeeo {
|
||||
{ $impl: ident, $even: ident, $odd: ident } => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl VectorMergeEo for $impl {
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn vec_mergee(self, b: Self) -> Self {
|
||||
transmute(mergee(transmute(self), transmute(b)))
|
||||
}
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn vec_mergeo(self, b: Self) -> Self {
|
||||
transmute(mergeo(transmute(self), transmute(b)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vec_mergeeo! { vector_signed_int, mergee, mergeo }
|
||||
vec_mergeeo! { vector_unsigned_int, mergee, mergeo }
|
||||
vec_mergeeo! { vector_bool_int, mergee, mergeo }
|
||||
vec_mergeeo! { vector_float, mergee, mergeo }
|
||||
}
|
||||
|
||||
/// Vector permute.
|
||||
#[inline]
|
||||
#[target_feature(enable = "vsx")]
|
||||
//#[rustc_legacy_const_generics(2)]
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub unsafe fn vec_xxpermdi<T, const DM: i32>(a: T, b: T) -> T
|
||||
where
|
||||
T: sealed::VectorPermDI,
|
||||
{
|
||||
static_assert_uimm_bits!(DM, 2);
|
||||
a.vec_xxpermdi(b, DM as u8)
|
||||
}
|
||||
|
||||
/// Vector Merge Even
|
||||
///
|
||||
/// ## Purpose
|
||||
/// Merges the even-numbered values from two vectors.
|
||||
///
|
||||
/// ## Result value
|
||||
/// The even-numbered elements of a are stored into the even-numbered elements of r.
|
||||
/// The even-numbered elements of b are stored into the odd-numbered elements of r.
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub unsafe fn vec_mergee<T>(a: T, b: T) -> T
|
||||
where
|
||||
T: sealed::VectorMergeEo,
|
||||
{
|
||||
a.vec_mergee(b)
|
||||
}
|
||||
|
||||
/// Vector Merge Odd
|
||||
///
|
||||
/// ## Purpose
|
||||
/// Merges the odd-numbered values from two vectors.
|
||||
///
|
||||
/// ## Result value
|
||||
/// The odd-numbered elements of a are stored into the even-numbered elements of r.
|
||||
/// The odd-numbered elements of b are stored into the odd-numbered elements of r.
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub unsafe fn vec_mergeo<T>(a: T, b: T) -> T
|
||||
where
|
||||
T: sealed::VectorMergeEo,
|
||||
{
|
||||
a.vec_mergeo(b)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[cfg(target_arch = "powerpc")]
|
||||
use crate::core_arch::arch::powerpc::*;
|
||||
|
||||
#[cfg(target_arch = "powerpc64")]
|
||||
use crate::core_arch::arch::powerpc64::*;
|
||||
|
||||
use crate::core_arch::simd::*;
|
||||
use crate::mem::transmute;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
macro_rules! test_vec_xxpermdi {
|
||||
{$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
|
||||
#[simd_test(enable = "vsx")]
|
||||
unsafe fn $name() {
|
||||
let a: $longtype = transmute($shorttype::new($($a),+, $($b),+));
|
||||
let b = transmute($shorttype::new($($c),+, $($d),+));
|
||||
|
||||
assert_eq!($shorttype::new($($a),+, $($c),+), transmute(vec_xxpermdi::<_, 0>(a, b)));
|
||||
assert_eq!($shorttype::new($($b),+, $($c),+), transmute(vec_xxpermdi::<_, 1>(a, b)));
|
||||
assert_eq!($shorttype::new($($a),+, $($d),+), transmute(vec_xxpermdi::<_, 2>(a, b)));
|
||||
assert_eq!($shorttype::new($($b),+, $($d),+), transmute(vec_xxpermdi::<_, 3>(a, b)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_vec_xxpermdi! {test_vec_xxpermdi_u64x2, u64x2, vector_unsigned_long, [0], [1], [2], [3]}
|
||||
test_vec_xxpermdi! {test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]}
|
||||
test_vec_xxpermdi! {test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]}
|
||||
test_vec_xxpermdi! {test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]}
|
||||
}
|
||||
14
library/stdarch/crates/core_arch/src/powerpc64/mod.rs
Normal file
14
library/stdarch/crates/core_arch/src/powerpc64/mod.rs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
//! PowerPC 64
|
||||
//!
|
||||
//! The reference is the [64-Bit ELF V2 ABI Specification - Power
|
||||
//! Architecture].
|
||||
//!
|
||||
//! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
|
||||
|
||||
mod vsx;
|
||||
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub use crate::core_arch::powerpc::*;
|
||||
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub use self::vsx::*;
|
||||
156
library/stdarch/crates/core_arch/src/powerpc64/vsx.rs
Normal file
156
library/stdarch/crates/core_arch/src/powerpc64/vsx.rs
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
//! PowerPC Vector Scalar eXtensions (VSX) intrinsics.
|
||||
//!
|
||||
//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA
|
||||
//! NVlink)] and [POWER ISA v3.0B (for POWER9)].
|
||||
//!
|
||||
//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u
|
||||
//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv
|
||||
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
use crate::core_arch::powerpc::macros::*;
|
||||
use crate::core_arch::powerpc::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
use crate::mem::transmute;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "C" {
|
||||
#[link_name = "llvm.ppc.vsx.lxvl"]
|
||||
fn lxvl(a: *const u8, l: usize) -> vector_signed_int;
|
||||
|
||||
#[link_name = "llvm.ppc.vsx.stxvl"]
|
||||
fn stxvl(v: vector_signed_int, a: *mut u8, l: usize);
|
||||
}
|
||||
|
||||
mod sealed {
|
||||
use super::*;
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "power9-vector")]
|
||||
#[cfg_attr(test, assert_instr(lxvl))]
|
||||
unsafe fn vec_lxvl(p: *const u8, l: usize) -> vector_signed_int {
|
||||
lxvl(p, l << 56)
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub trait VectorXloads {
|
||||
type Result;
|
||||
unsafe fn vec_xl_len(self, l: usize) -> Self::Result;
|
||||
}
|
||||
|
||||
macro_rules! impl_vsx_loads {
|
||||
($ty:ident) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl VectorXloads for *const $ty {
|
||||
type Result = t_t_l!($ty);
|
||||
#[inline]
|
||||
#[target_feature(enable = "power9-vector")]
|
||||
unsafe fn vec_xl_len(self, l: usize) -> Self::Result {
|
||||
transmute(vec_lxvl(self as *const u8, l))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_vsx_loads! { i8 }
|
||||
impl_vsx_loads! { u8 }
|
||||
impl_vsx_loads! { i16 }
|
||||
impl_vsx_loads! { u16 }
|
||||
impl_vsx_loads! { i32 }
|
||||
impl_vsx_loads! { u32 }
|
||||
impl_vsx_loads! { f32 }
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "power9-vector")]
|
||||
#[cfg_attr(test, assert_instr(stxvl))]
|
||||
unsafe fn vec_stxvl(v: vector_signed_int, a: *mut u8, l: usize) {
|
||||
stxvl(v, a, l << 56);
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub trait VectorXstores {
|
||||
type Out;
|
||||
unsafe fn vec_xst_len(self, p: Self::Out, l: usize);
|
||||
}
|
||||
|
||||
macro_rules! impl_stores {
|
||||
($ty:ident) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl VectorXstores for t_t_l!($ty) {
|
||||
type Out = *mut $ty;
|
||||
#[inline]
|
||||
#[target_feature(enable = "power9-vector")]
|
||||
unsafe fn vec_xst_len(self, a: Self::Out, l: usize) {
|
||||
stxvl(transmute(self), a as *mut u8, l)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_stores! { i8 }
|
||||
impl_stores! { u8 }
|
||||
impl_stores! { i16 }
|
||||
impl_stores! { u16 }
|
||||
impl_stores! { i32 }
|
||||
impl_stores! { u32 }
|
||||
impl_stores! { f32 }
|
||||
}
|
||||
|
||||
/// Vector Load with Length
|
||||
///
|
||||
/// ## Purpose
|
||||
/// Loads a vector of a specified byte length.
|
||||
///
|
||||
/// ## Result value
|
||||
/// Loads the number of bytes specified by b from the address specified in a.
|
||||
/// Initializes elements in order from the byte stream (as defined by the endianness of the
|
||||
/// target). Any bytes of elements that cannot be initialized from the number of loaded bytes have
|
||||
/// a zero value.
|
||||
///
|
||||
/// Between 0 and 16 bytes, inclusive, will be loaded. The length is specified by the
|
||||
/// least-significant byte of b, as min (b mod 256, 16). The behavior is undefined if the length
|
||||
/// argument is outside of the range 0–255, or if it is not a multiple of the vector element size.
|
||||
///
|
||||
/// ## Notes
|
||||
/// vec_xl_len should not be used to load from cache-inhibited memory.
|
||||
#[inline]
|
||||
#[target_feature(enable = "power9-vector")]
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub unsafe fn vec_xl_len<T>(p: T, len: usize) -> <T as sealed::VectorXloads>::Result
|
||||
where
|
||||
T: sealed::VectorXloads,
|
||||
{
|
||||
p.vec_xl_len(len)
|
||||
}
|
||||
|
||||
/// Vector Store with Length
|
||||
///
|
||||
/// ## Purpose
|
||||
///
|
||||
/// Stores a vector of a specified byte length.
|
||||
///
|
||||
/// ## Operation
|
||||
///
|
||||
/// Stores the number of bytes specified by c of the vector a to the address specified
|
||||
/// in b. The bytes are obtained starting from the lowest-numbered byte of the lowest-numbered
|
||||
/// element (as defined by the endianness of the target). All bytes of an element are accessed
|
||||
/// before proceeding to the next higher element.
|
||||
///
|
||||
/// Between 0 and 16 bytes, inclusive, will be stored. The length is specified by the
|
||||
/// least-significant byte of c, as min (c mod 256, 16). The behavior is undefined if the length
|
||||
/// argument is outside of the range 0–255, or if it is not a multiple of the vector element size.
|
||||
///
|
||||
/// ## Notes
|
||||
/// vec_xst_len should not be used to store to cache-inhibited memory.
|
||||
#[inline]
|
||||
#[target_feature(enable = "power9-vector")]
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub unsafe fn vec_xst_len<T>(v: T, a: <T as sealed::VectorXstores>::Out, l: usize)
|
||||
where
|
||||
T: sealed::VectorXstores,
|
||||
{
|
||||
v.vec_xst_len(a, l)
|
||||
}
|
||||
6
library/stdarch/crates/core_arch/src/riscv32/mod.rs
Normal file
6
library/stdarch/crates/core_arch/src/riscv32/mod.rs
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
//! RISC-V RV32 specific intrinsics
|
||||
|
||||
mod zk;
|
||||
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub use zk::*;
|
||||
331
library/stdarch/crates/core_arch/src/riscv32/zk.rs
Normal file
331
library/stdarch/crates/core_arch/src/riscv32/zk.rs
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.riscv.aes32esi"]
|
||||
fn _aes32esi(rs1: i32, rs2: i32, bs: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.aes32esmi"]
|
||||
fn _aes32esmi(rs1: i32, rs2: i32, bs: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.aes32dsi"]
|
||||
fn _aes32dsi(rs1: i32, rs2: i32, bs: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.aes32dsmi"]
|
||||
fn _aes32dsmi(rs1: i32, rs2: i32, bs: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.zip.i32"]
|
||||
fn _zip(rs1: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.unzip.i32"]
|
||||
fn _unzip(rs1: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sig0h"]
|
||||
fn _sha512sig0h(rs1: i32, rs2: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sig0l"]
|
||||
fn _sha512sig0l(rs1: i32, rs2: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sig1h"]
|
||||
fn _sha512sig1h(rs1: i32, rs2: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sig1l"]
|
||||
fn _sha512sig1l(rs1: i32, rs2: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sum0r"]
|
||||
fn _sha512sum0r(rs1: i32, rs2: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sum1r"]
|
||||
fn _sha512sum1r(rs1: i32, rs2: i32) -> i32;
|
||||
}
|
||||
|
||||
/// AES final round encryption instruction for RV32.
|
||||
///
|
||||
/// This instruction sources a single byte from rs2 according to bs. To this it applies the
|
||||
/// forward AES SBox operation, before XOR’ing the result with rs1. This instruction must
|
||||
/// always be implemented such that its execution latency does not depend on the data being
|
||||
/// operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.3
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
|
||||
/// used.
|
||||
#[target_feature(enable = "zkne")]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(aes32esi, BS = 0))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes32esi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
|
||||
static_assert!(BS < 4);
|
||||
|
||||
unsafe { _aes32esi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
|
||||
}
|
||||
|
||||
/// AES middle round encryption instruction for RV32 with.
|
||||
///
|
||||
/// This instruction sources a single byte from rs2 according to bs. To this it applies the
|
||||
/// forward AES SBox operation, and a partial forward MixColumn, before XOR’ing the result with
|
||||
/// rs1. This instruction must always be implemented such that its execution latency does not
|
||||
/// depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.4
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
|
||||
/// used.
|
||||
#[target_feature(enable = "zkne")]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(aes32esmi, BS = 0))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes32esmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
|
||||
static_assert!(BS < 4);
|
||||
|
||||
unsafe { _aes32esmi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
|
||||
}
|
||||
|
||||
/// AES final round decryption instruction for RV32.
|
||||
///
|
||||
/// This instruction sources a single byte from rs2 according to bs. To this it applies the
|
||||
/// inverse AES SBox operation, and XOR’s the result with rs1. This instruction must always be
|
||||
/// implemented such that its execution latency does not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.1
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
|
||||
/// used.
|
||||
#[target_feature(enable = "zknd")]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(aes32dsi, BS = 0))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes32dsi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
|
||||
static_assert!(BS < 4);
|
||||
|
||||
unsafe { _aes32dsi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
|
||||
}
|
||||
|
||||
/// AES middle round decryption instruction for RV32.
|
||||
///
|
||||
/// This instruction sources a single byte from rs2 according to bs. To this it applies the
|
||||
/// inverse AES SBox operation, and a partial inverse MixColumn, before XOR’ing the result with
|
||||
/// rs1. This instruction must always be implemented such that its execution latency does not
|
||||
/// depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.2
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
|
||||
/// used.
|
||||
#[target_feature(enable = "zknd")]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(aes32dsmi, BS = 0))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
|
||||
static_assert!(BS < 4);
|
||||
|
||||
unsafe { _aes32dsmi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
|
||||
}
|
||||
|
||||
/// Place upper/lower halves of the source register into odd/even bits of the destination
|
||||
/// respectivley.
|
||||
///
|
||||
/// This instruction places bits in the low half of the source register into the even bit
|
||||
/// positions of the destination, and bits in the high half of the source register into the odd
|
||||
/// bit positions of the destination. It is the inverse of the unzip instruction. This
|
||||
/// instruction is available only on RV32.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.49
|
||||
#[target_feature(enable = "zbkb")]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(zip))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn zip(rs: u32) -> u32 {
|
||||
unsafe { _zip(rs as i32) as u32 }
|
||||
}
|
||||
|
||||
/// Place odd and even bits of the source word into upper/lower halves of the destination.
|
||||
///
|
||||
/// This instruction places the even bits of the source register into the low half of the
|
||||
/// destination, and the odd bits of the source into the high bits of the destination. It is
|
||||
/// the inverse of the zip instruction. This instruction is available only on RV32.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.45
|
||||
#[target_feature(enable = "zbkb")]
|
||||
#[cfg_attr(test, assert_instr(unzip))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn unzip(rs: u32) -> u32 {
|
||||
unsafe { _unzip(rs as i32) as u32 }
|
||||
}
|
||||
|
||||
/// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash
|
||||
/// function \[49\] (Section 4.1.3).
|
||||
///
|
||||
/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
|
||||
/// SHA2-512 hash function in conjunction with the sha512sig0l instruction. The transform is a
|
||||
/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
|
||||
/// registers. This instruction must always be implemented such that its execution latency does
|
||||
/// not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.31
|
||||
#[target_feature(enable = "zknh")]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(sha512sig0h))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
|
||||
unsafe { _sha512sig0h(rs1 as i32, rs2 as i32) as u32 }
|
||||
}
|
||||
|
||||
/// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function
|
||||
/// \[49\] (Section 4.1.3).
|
||||
///
|
||||
/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
|
||||
/// SHA2-512 hash function in conjunction with the sha512sig0h instruction. The transform is a
|
||||
/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
|
||||
/// registers. This instruction must always be implemented such that its execution latency does
|
||||
/// not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.32
|
||||
#[target_feature(enable = "zknh")]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(sha512sig0l))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
|
||||
unsafe { _sha512sig0l(rs1 as i32, rs2 as i32) as u32 }
|
||||
}
|
||||
|
||||
/// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash
|
||||
/// function \[49\] (Section 4.1.3).
|
||||
///
|
||||
/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
|
||||
/// SHA2-512 hash function in conjunction with the sha512sig1l instruction. The transform is a
|
||||
/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
|
||||
/// registers. This instruction must always be implemented such that its execution latency does
|
||||
/// not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.33
|
||||
#[target_feature(enable = "zknh")]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(sha512sig1h))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
|
||||
unsafe { _sha512sig1h(rs1 as i32, rs2 as i32) as u32 }
|
||||
}
|
||||
|
||||
/// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function
|
||||
/// \[49\] (Section 4.1.3).
|
||||
///
|
||||
/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
|
||||
/// SHA2-512 hash function in conjunction with the sha512sig1h instruction. The transform is a
|
||||
/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
|
||||
/// registers. This instruction must always be implemented such that its execution latency does
|
||||
/// not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.34
|
||||
#[target_feature(enable = "zknh")]
|
||||
#[cfg_attr(test, assert_instr(sha512sig1l))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
|
||||
unsafe { _sha512sig1l(rs1 as i32, rs2 as i32) as u32 }
|
||||
}
|
||||
|
||||
/// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section
|
||||
/// 4.1.3).
|
||||
///
|
||||
/// This instruction is implemented on RV32 only. Used to compute the Sum0 transform of the
|
||||
/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
|
||||
/// output is represented by two 32-bit registers. This instruction must always be implemented
|
||||
/// such that its execution latency does not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.35
|
||||
#[target_feature(enable = "zknh")]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(sha512sum0r))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
|
||||
unsafe { _sha512sum0r(rs1 as i32, rs2 as i32) as u32 }
|
||||
}
|
||||
|
||||
/// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section
|
||||
/// 4.1.3).
|
||||
///
|
||||
/// This instruction is implemented on RV32 only. Used to compute the Sum1 transform of the
|
||||
/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
|
||||
/// output is represented by two 32-bit registers. This instruction must always be implemented
|
||||
/// such that its execution latency does not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.36
|
||||
#[target_feature(enable = "zknh")]
|
||||
// See #1464
|
||||
// #[cfg_attr(test, assert_instr(sha512sum1r))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sum1r(rs1: u32, rs2: u32) -> u32 {
|
||||
unsafe { _sha512sum1r(rs1 as i32, rs2 as i32) as u32 }
|
||||
}
|
||||
57
library/stdarch/crates/core_arch/src/riscv64/mod.rs
Normal file
57
library/stdarch/crates/core_arch/src/riscv64/mod.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
//! RISC-V RV64 specific intrinsics
|
||||
use crate::arch::asm;
|
||||
|
||||
mod zk;
|
||||
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub use zk::*;
|
||||
|
||||
/// Loads virtual machine memory by unsigned word integer
|
||||
///
|
||||
/// This instruction performs an explicit memory access as though `V=1`;
|
||||
/// i.e., with the address translation and protection, and the endianness, that apply to memory
|
||||
/// accesses in either VS-mode or VU-mode.
|
||||
///
|
||||
/// This operation is not available under RV32 base instruction set.
|
||||
///
|
||||
/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.WU`
|
||||
/// instruction which is effectively a dereference to any memory address.
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub unsafe fn hlv_wu(src: *const u32) -> u32 {
|
||||
let value: u32;
|
||||
asm!(".insn i 0x73, 0x4, {}, {}, 0x681", out(reg) value, in(reg) src, options(readonly, nostack));
|
||||
value
|
||||
}
|
||||
|
||||
/// Loads virtual machine memory by double integer
|
||||
///
|
||||
/// This instruction performs an explicit memory access as though `V=1`;
|
||||
/// i.e., with the address translation and protection, and the endianness, that apply to memory
|
||||
/// accesses in either VS-mode or VU-mode.
|
||||
///
|
||||
/// This operation is not available under RV32 base instruction set.
|
||||
///
|
||||
/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.D`
|
||||
/// instruction which is effectively a dereference to any memory address.
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub unsafe fn hlv_d(src: *const i64) -> i64 {
|
||||
let value: i64;
|
||||
asm!(".insn i 0x73, 0x4, {}, {}, 0x6C0", out(reg) value, in(reg) src, options(readonly, nostack));
|
||||
value
|
||||
}
|
||||
|
||||
/// Stores virtual machine memory by double integer
|
||||
///
|
||||
/// This instruction performs an explicit memory access as though `V=1`;
|
||||
/// i.e., with the address translation and protection, and the endianness, that apply to memory
|
||||
/// accesses in either VS-mode or VU-mode.
|
||||
///
|
||||
/// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.D`
|
||||
/// instruction which is effectively a dereference to any memory address.
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub unsafe fn hsv_d(dst: *mut i64, src: i64) {
|
||||
asm!(".insn r 0x73, 0x4, 0x37, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
|
||||
}
|
||||
265
library/stdarch/crates/core_arch/src/riscv64/zk.rs
Normal file
265
library/stdarch/crates/core_arch/src/riscv64/zk.rs
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.riscv.aes64es"]
|
||||
fn _aes64es(rs1: i64, rs2: i64) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.aes64esm"]
|
||||
fn _aes64esm(rs1: i64, rs2: i64) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.aes64ds"]
|
||||
fn _aes64ds(rs1: i64, rs2: i64) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.aes64dsm"]
|
||||
fn _aes64dsm(rs1: i64, rs2: i64) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.aes64ks1i"]
|
||||
fn _aes64ks1i(rs1: i64, rnum: i32) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.aes64ks2"]
|
||||
fn _aes64ks2(rs1: i64, rs2: i64) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.aes64im"]
|
||||
fn _aes64im(rs1: i64) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sig0"]
|
||||
fn _sha512sig0(rs1: i64) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sig1"]
|
||||
fn _sha512sig1(rs1: i64) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sum0"]
|
||||
fn _sha512sum0(rs1: i64) -> i64;
|
||||
|
||||
#[link_name = "llvm.riscv.sha512sum1"]
|
||||
fn _sha512sum1(rs1: i64) -> i64;
|
||||
}
|
||||
|
||||
/// AES final round encryption instruction for RV64.
|
||||
///
|
||||
/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
|
||||
/// of the next round output, applying the ShiftRows and SubBytes steps. This instruction must
|
||||
/// always be implemented such that its execution latency does not depend on the data being
|
||||
/// operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.7
|
||||
#[target_feature(enable = "zkne")]
|
||||
#[cfg_attr(test, assert_instr(aes64es))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes64es(rs1: u64, rs2: u64) -> u64 {
|
||||
unsafe { _aes64es(rs1 as i64, rs2 as i64) as u64 }
|
||||
}
|
||||
|
||||
/// AES middle round encryption instruction for RV64.
|
||||
///
|
||||
/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
|
||||
/// of the next round output, applying the ShiftRows, SubBytes and MixColumns steps. This
|
||||
/// instruction must always be implemented such that its execution latency does not depend on
|
||||
/// the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.8
|
||||
#[target_feature(enable = "zkne")]
|
||||
#[cfg_attr(test, assert_instr(aes64esm))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes64esm(rs1: u64, rs2: u64) -> u64 {
|
||||
unsafe { _aes64esm(rs1 as i64, rs2 as i64) as u64 }
|
||||
}
|
||||
|
||||
/// AES final round decryption instruction for RV64.
|
||||
///
|
||||
/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
|
||||
/// of the next round output, applying the Inverse ShiftRows and SubBytes steps. This
|
||||
/// instruction must always be implemented such that its execution latency does not depend on
|
||||
/// the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.5
|
||||
#[target_feature(enable = "zknd")]
|
||||
#[cfg_attr(test, assert_instr(aes64ds))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes64ds(rs1: u64, rs2: u64) -> u64 {
|
||||
unsafe { _aes64ds(rs1 as i64, rs2 as i64) as u64 }
|
||||
}
|
||||
|
||||
/// AES middle round decryption instruction for RV64.
|
||||
///
|
||||
/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
|
||||
/// of the next round output, applying the Inverse ShiftRows, SubBytes and MixColumns steps.
|
||||
/// This instruction must always be implemented such that its execution latency does not depend
|
||||
/// on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.6
|
||||
#[target_feature(enable = "zknd")]
|
||||
#[cfg_attr(test, assert_instr(aes64dsm))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
|
||||
unsafe { _aes64dsm(rs1 as i64, rs2 as i64) as u64 }
|
||||
}
|
||||
|
||||
/// This instruction implements part of the KeySchedule operation for the AES Block cipher
|
||||
/// involving the SBox operation.
|
||||
///
|
||||
/// This instruction implements the rotation, SubBytes and Round Constant addition steps of the
|
||||
/// AES block cipher Key Schedule. This instruction must always be implemented such that its
|
||||
/// execution latency does not depend on the data being operated on. Note that rnum must be in
|
||||
/// the range 0x0..0xA. The values 0xB..0xF are reserved.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.10
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// The `RNUM` parameter is expected to be a constant value inside the range of `0..=10`.
|
||||
#[target_feature(enable = "zkne", enable = "zknd")]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
|
||||
static_assert!(RNUM <= 10);
|
||||
|
||||
unsafe { _aes64ks1i(rs1 as i64, RNUM as i32) as u64 }
|
||||
}
|
||||
|
||||
/// This instruction implements part of the KeySchedule operation for the AES Block cipher.
|
||||
///
|
||||
/// This instruction implements the additional XOR’ing of key words as part of the AES block
|
||||
/// cipher Key Schedule. This instruction must always be implemented such that its execution
|
||||
/// latency does not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.11
|
||||
#[target_feature(enable = "zkne", enable = "zknd")]
|
||||
#[cfg_attr(test, assert_instr(aes64ks2))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
|
||||
unsafe { _aes64ks2(rs1 as i64, rs2 as i64) as u64 }
|
||||
}
|
||||
|
||||
/// This instruction accelerates the inverse MixColumns step of the AES Block Cipher, and is used to aid creation of
|
||||
/// the decryption KeySchedule.
|
||||
///
|
||||
/// The instruction applies the inverse MixColumns transformation to two columns of the state array, packed
|
||||
/// into a single 64-bit register. It is used to create the inverse cipher KeySchedule, according to the equivalent
|
||||
/// inverse cipher construction in (Page 23, Section 5.3.5). This instruction must always be implemented
|
||||
/// such that its execution latency does not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.9
|
||||
#[target_feature(enable = "zkne", enable = "zknd")]
|
||||
#[cfg_attr(test, assert_instr(aes64im))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn aes64im(rs1: u64) -> u64 {
|
||||
unsafe { _aes64im(rs1 as i64) as u64 }
|
||||
}
|
||||
|
||||
/// Implements the Sigma0 transformation function as used in the SHA2-512 hash function \[49\]
|
||||
/// (Section 4.1.3).
|
||||
///
|
||||
/// This instruction is supported for the RV64 base architecture. It implements the Sigma0
|
||||
/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
|
||||
/// implemented such that its execution latency does not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.37
|
||||
#[target_feature(enable = "zknh")]
|
||||
#[cfg_attr(test, assert_instr(sha512sig0))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sig0(rs1: u64) -> u64 {
|
||||
unsafe { _sha512sig0(rs1 as i64) as u64 }
|
||||
}
|
||||
|
||||
/// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\]
|
||||
/// (Section 4.1.3).
|
||||
///
|
||||
/// This instruction is supported for the RV64 base architecture. It implements the Sigma1
|
||||
/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
|
||||
/// implemented such that its execution latency does not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.38
|
||||
#[target_feature(enable = "zknh")]
|
||||
#[cfg_attr(test, assert_instr(sha512sig1))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sig1(rs1: u64) -> u64 {
|
||||
unsafe { _sha512sig1(rs1 as i64) as u64 }
|
||||
}
|
||||
|
||||
/// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\]
|
||||
/// (Section 4.1.3).
|
||||
///
|
||||
/// This instruction is supported for the RV64 base architecture. It implements the Sum0
|
||||
/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
|
||||
/// implemented such that its execution latency does not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.39
|
||||
#[target_feature(enable = "zknh")]
|
||||
#[cfg_attr(test, assert_instr(sha512sum0))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sum0(rs1: u64) -> u64 {
|
||||
unsafe { _sha512sum0(rs1 as i64) as u64 }
|
||||
}
|
||||
|
||||
/// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\]
|
||||
/// (Section 4.1.3).
|
||||
///
|
||||
/// This instruction is supported for the RV64 base architecture. It implements the Sum1
|
||||
/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
|
||||
/// implemented such that its execution latency does not depend on the data being operated on.
|
||||
///
|
||||
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
|
||||
///
|
||||
/// Version: v1.0.1
|
||||
///
|
||||
/// Section: 3.40
|
||||
#[target_feature(enable = "zknh")]
|
||||
#[cfg_attr(test, assert_instr(sha512sum1))]
|
||||
#[inline]
|
||||
#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
|
||||
pub fn sha512sum1(rs1: u64) -> u64 {
|
||||
unsafe { _sha512sum1(rs1 as i64) as u64 }
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue