Import stdarch history as a Josh subtree

2025-06-23 17:22:53 +02:00 · 2025-06-23 17:22:53 +02:00 · e433101882
commit e433101882
parent 9510b476d5 359f986ddd
306 changed files with 729730 additions and 0 deletions
--- a/library/stdarch/.cirrus.yml
+++ b/library/stdarch/.cirrus.yml
@ -0,0 +1,16 @@
+task:
+  name: x86_64-unknown-freebsd
+  freebsd_instance:
+    image_family: freebsd-13-4
+  env:
+    # FIXME(freebsd): FreeBSD has a segfault when `RUST_BACKTRACE` is set
+    # https://github.com/rust-lang/rust/issues/132185
+    RUST_BACKTRACE: "0"
+  setup_script:
+    - curl https://sh.rustup.rs -sSf --output rustup.sh
+    - sh rustup.sh --default-toolchain nightly -y
+    - . $HOME/.cargo/env
+    - rustup default nightly
+  test_script:
+    - . $HOME/.cargo/env
+    - cargo build --all
--- a/library/stdarch/.git-blame-ignore-revs
+++ b/library/stdarch/.git-blame-ignore-revs
@ -0,0 +1,4 @@
+# Use `git config blame.ignorerevsfile .git-blame-ignore-revs` to make `git blame` ignore the following commits.
+
+# format with style edition 2024
+fc87bd98d689590a0b6f5ee4110c5b9f962faa66
--- a/library/stdarch/.github/workflows/main.yml
+++ b/library/stdarch/.github/workflows/main.yml
@ -0,0 +1,288 @@
+name: CI
+on:
+  pull_request:
+  merge_group:
+
+jobs:
+  style:
+    name: Check Style
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: rustup update nightly --no-self-update && rustup default nightly
+    - run: ci/style.sh
+
+  docs:
+    name: Build Documentation
+    needs: [style]
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: rustup update nightly --no-self-update && rustup default nightly
+    - run: ci/dox.sh
+      env:
+        CI: 1
+
+  verify:
+    name: Automatic intrinsic verification
+    needs: [style]
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: rustup update nightly --no-self-update && rustup default nightly
+    - run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
+
+  test:
+    needs: [style]
+    name: Test
+    runs-on: ${{ matrix.target.os }}
+    strategy:
+      matrix:
+        profile:
+        - dev
+        - release
+        target:
+        # Dockers that are run through docker on linux
+        - tuple: i686-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: x86_64-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: arm-unknown-linux-gnueabihf
+          os: ubuntu-latest
+        - tuple: armv7-unknown-linux-gnueabihf
+          os: ubuntu-latest
+        - tuple: aarch64-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: aarch64_be-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: riscv32gc-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: riscv64gc-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: powerpc-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: powerpc64-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: powerpc64le-unknown-linux-gnu
+          os: ubuntu-latest
+        # MIPS targets disabled since they are dropped to tier 3.
+        # See https://github.com/rust-lang/compiler-team/issues/648
+        #- tuple: mips-unknown-linux-gnu
+        #  os: ubuntu-latest
+        #- tuple: mips64-unknown-linux-gnuabi64
+        #  os: ubuntu-latest
+        #- tuple: mips64el-unknown-linux-gnuabi64
+        #  os: ubuntu-latest
+        #- tuple: mipsel-unknown-linux-musl
+        #  os: ubuntu-latest
+        - tuple: s390x-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: i586-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: nvptx64-nvidia-cuda
+          os: ubuntu-latest
+        - tuple: thumbv6m-none-eabi
+          os: ubuntu-latest
+        - tuple: thumbv7m-none-eabi
+          os: ubuntu-latest
+        - tuple: thumbv7em-none-eabi
+          os: ubuntu-latest
+        - tuple: thumbv7em-none-eabihf
+          os: ubuntu-latest
+        - tuple: loongarch64-unknown-linux-gnu
+          os: ubuntu-latest
+        - tuple: wasm32-wasip1
+          os: ubuntu-latest
+
+        # macOS targets
+        - tuple: x86_64-apple-darwin
+          os: macos-15-large
+        - tuple: x86_64-apple-ios-macabi
+          os: macos-15-large
+        - tuple: aarch64-apple-darwin
+          os: macos-15
+        - tuple: aarch64-apple-ios-macabi
+          os: macos-15
+        # FIXME: gh-actions build environment doesn't have linker support
+        # - tuple: i686-apple-darwin
+        #   os: macos-13
+
+        # Windows targets
+        - tuple: x86_64-pc-windows-msvc
+          os: windows-2025
+        - tuple: i686-pc-windows-msvc
+          os: windows-2025
+        - tuple: aarch64-pc-windows-msvc
+          os: windows-11-arm
+        - tuple: x86_64-pc-windows-gnu
+          os: windows-2025
+        # - tuple: i686-pc-windows-gnu
+        #   os: windows-latest
+
+        # Add additional variables to the matrix variations generated above using `include`:
+        include:
+        # `TEST_EVERYTHING` setups - there should be at least 1 for each architecture
+        - target:
+            tuple: aarch64-unknown-linux-gnu
+            os: ubuntu-latest
+          test_everything: true
+        - target:
+            tuple: aarch64_be-unknown-linux-gnu
+            os: ubuntu-latest
+          test_everything: true
+          build_std: true
+        - target:
+            tuple: armv7-unknown-linux-gnueabihf
+            os: ubuntu-latest
+          test_everything: true
+        - target:
+            tuple: loongarch64-unknown-linux-gnu
+            os: ubuntu-latest
+          test_everything: true
+        - target:
+            tuple: powerpc-unknown-linux-gnu
+            os: ubuntu-latest
+          disable_assert_instr: true
+          test_everything: true
+        - target:
+            tuple: powerpc64-unknown-linux-gnu
+            os: ubuntu-latest
+          disable_assert_instr: true
+          test_everything: true
+        - target:
+            tuple: powerpc64le-unknown-linux-gnu
+            os: ubuntu-latest
+          test_everything: true
+        - target:
+            tuple: riscv32gc-unknown-linux-gnu
+            os: ubuntu-latest
+          test_everything: true
+          build_std: true
+        - target:
+            tuple: riscv64gc-unknown-linux-gnu
+            os: ubuntu-latest
+          test_everything: true
+        - target:
+            tuple: s390x-unknown-linux-gnu
+            os: ubuntu-latest
+          test_everything: true
+        - target:
+            tuple: x86_64-unknown-linux-gnu
+            os: ubuntu-latest
+          test_everything: true
+        # MIPS targets disabled since they are dropped to tier 3.
+        # See https://github.com/rust-lang/compiler-team/issues/648
+        #- target:
+        #    tuple: mips-unknown-linux-gnu
+        #    os: ubuntu-latest
+        #  norun: true
+        #- target:
+        #    tuple: mips64-unknown-linux-gnuabi64
+        #    os: ubuntu-latest
+        #  norun: true
+        #- target:
+        #    tuple: mips64el-unknown-linux-gnuabi64
+        #    os: ubuntu-latest
+        #  norun: true
+        #- target:
+        #    tuple: mipsel-unknown-linux-musl
+        #    os: ubuntu-latest
+        #  norun: true
+        - target:
+            tuple: aarch64-apple-darwin
+            os: macos-15
+          norun: true # https://github.com/rust-lang/stdarch/issues/1206
+        - target:
+            tuple: aarch64-apple-ios-macabi
+            os: macos-15
+          norun: true # https://github.com/rust-lang/stdarch/issues/1206
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: |
+        rustup update nightly --no-self-update
+        rustup default nightly
+      shell: bash
+      if: matrix.target.os != 'windows-11-arm'
+    - name: Install Rust for `windows-11-arm` runners
+      # The arm runners don't have Rust pre-installed (https://github.com/actions/partner-runner-images/issues/77)
+      run: |
+        curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly
+        echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+      shell: bash
+      if: matrix.target.os == 'windows-11-arm'
+
+    - run: rustup target add ${{ matrix.target.tuple }}
+      shell: bash
+      if: matrix.build_std == ''
+    - run: |
+        rustup component add rust-src
+        echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV
+      shell: bash
+      if: matrix.build_std != ''
+
+    # Configure some env vars based on matrix configuration
+    - run: echo "PROFILE=--profile=${{matrix.profile}}" >> $GITHUB_ENV
+      shell: bash
+    - run: echo "NORUN=1" >> $GITHUB_ENV
+      shell: bash
+      if: matrix.norun != '' || startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda'
+    - run: echo "STDARCH_TEST_EVERYTHING=1" >> $GITHUB_ENV
+      shell: bash
+      if: matrix.test_everything != ''
+    - run: echo "STDARCH_DISABLE_ASSERT_INSTR=1" >> $GITHUB_ENV
+      shell: bash
+      if: matrix.disable_assert_instr != ''
+    - run: echo "NOSTD=1" >> $GITHUB_ENV
+      shell: bash
+      if: startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda'
+
+    # Windows & OSX go straight to `run.sh` ...
+    - run: ./ci/run.sh
+      shell: bash
+      if: matrix.target.os != 'ubuntu-latest' || startsWith(matrix.target.tuple, 'thumb')
+      env:
+        TARGET: ${{ matrix.target.tuple }}
+
+    # ... while Linux goes to `run-docker.sh`
+    - run: ./ci/run-docker.sh ${{ matrix.target.tuple }}
+      shell: bash
+      if: matrix.target.os == 'ubuntu-latest' && !startsWith(matrix.target.tuple, 'thumb')
+      env:
+        TARGET: ${{ matrix.target.tuple }}
+
+  build-std-detect:
+    needs: [style]
+    name: Build std_detect
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: rustup update nightly && rustup default nightly
+    - run: ./ci/build-std-detect.sh
+
+  conclusion:
+    needs:
+      - docs
+      - verify
+      - test
+      - build-std-detect
+    runs-on: ubuntu-latest
+    # We need to ensure this job does *not* get skipped if its dependencies fail,
+    # because a skipped job is considered a success by GitHub. So we have to
+    # overwrite `if:`. We use `!cancelled()` to ensure the job does still not get run
+    # when the workflow is canceled manually.
+    #
+    # ALL THE PREVIOUS JOBS NEED TO BE ADDED TO THE `needs` SECTION OF THIS JOB!
+    if: ${{ !cancelled() }} # make sure this is never "skipped"
+    steps:
+      - name: Conclusion
+        run: |
+          # Print the dependent jobs to see them in the CI log
+          jq -C <<< '${{ toJson(needs) }}'
+          # Check if all jobs that we depend on (in the needs array) were successful.
+          jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}'
--- a/library/stdarch/.gitignore
+++ b/library/stdarch/.gitignore
@ -0,0 +1,9 @@
+.*.swp
+target
+tags
+crates/stdarch-gen-arm/aarch64.rs
+crates/stdarch-gen-arm/arm.rs
+crates/stdarch-gen-loongarch/lasx.c
+crates/stdarch-gen-loongarch/lsx.c
+c_programs/*
+rust_programs/*
--- a/library/stdarch/.gitmodules
+++ b/library/stdarch/.gitmodules
--- a/library/stdarch/CONTRIBUTING.md
+++ b/library/stdarch/CONTRIBUTING.md
@ -0,0 +1,93 @@
+# Contributing to stdarch
+
+The `stdarch` crate is more than willing to accept contributions! First you'll
+probably want to check out the repository and make sure that tests pass for you:
+
+```
+$ git clone https://github.com/rust-lang/stdarch
+$ cd stdarch
+$ TARGET="<your-target-arch>" ci/run.sh
+```
+
+Where `<your-target-arch>` is the target triple as used by `rustup`, e.g. `x86_64-unknown-linux-gnu` (without any preceding `nightly-` or similar).
+Also remember that this repository requires the nightly channel of Rust!
+The above tests do in fact require nightly rust to be the default on your system, to set that use `rustup default nightly` (and `rustup default stable` to revert).
+
+If any of the above steps don't work, [please let us know][new]!
+
+Next up you can [find an issue][issues] to help out on, we've selected a few
+with the [`help wanted`][help] tag which could
+particularly use some help. You may be most interested in [#40][vendor],
+implementing all vendor intrinsics on x86. That issue's got some good pointers
+about where to get started!
+
+If you've got general questions feel free to [join us on gitter][gitter] and ask
+around! Feel free to ping either @BurntSushi or @alexcrichton with questions.
+
+[gitter]: https://gitter.im/rust-impl-period/WG-libs-simd
+
+# How to write examples for stdarch intrinsics
+
+There are a few features that must be enabled for the given intrinsic to work
+properly and the example must only be run by `cargo test --doc` when the feature
+is supported by the CPU. As a result, the default `fn main` that is generated by
+`rustdoc` will not work (in most cases). Consider using the following as a guide
+to ensure your example works as expected.
+
+```rust
+/// # // We need cfg_target_feature to ensure the example is only
+/// # // run by `cargo test --doc` when the CPU supports the feature
+/// # #![feature(cfg_target_feature)]
+/// # // We need target_feature for the intrinsic to work
+/// # #![feature(target_feature)]
+/// #
+/// # // rustdoc by default uses `extern crate stdarch`, but we need the
+/// # // `#[macro_use]`
+/// # #[macro_use] extern crate stdarch;
+/// #
+/// # // The real main function
+/// # fn main() {
+/// #     // Only run this if `<target feature>` is supported
+/// #     if cfg_feature_enabled!("<target feature>") {
+/// #         // Create a `worker` function that will only be run if the target feature
+/// #         // is supported and ensure that `target_feature` is enabled for your worker
+/// #         // function
+/// #         #[target_feature(enable = "<target feature>")]
+/// #         unsafe fn worker() {
+///
+/// // Write your example here. Feature specific intrinsics will work here! Go wild!
+///
+/// #         }
+/// #         unsafe { worker(); }
+/// #     }
+/// # }
+```
+
+If some of the above syntax does not look familiar, the [Documentation as tests] section
+of the [Rust Book] describes the `rustdoc` syntax quite well. As always, feel free
+to [join us on gitter][gitter] and ask us if you hit any snags, and thank you for helping
+to improve the documentation of `stdarch`!
+
+# Alternative Testing Instructions
+
+It is generally recommended that you use `ci/run-docker.sh` to run the tests.
+However this might not work for you, e.g. if you are on Windows.
+
+In that case you can fall back to running `cargo +nightly test` and `cargo +nightly test --release -p core_arch` for testing the code generation.
+Note that these require the nightly toolchain to be installed and for `rustc` to know about your target triple and its CPU.
+In particular you need to set the `TARGET` environment variable as you would for `ci/run.sh`.
+In addition you need to set `RUSTCFLAGS` (need the `C`) to indicate target features, e.g. `RUSTCFLAGS="-C -target-features=+avx2"`.
+You can also set `-C -target-cpu=native` if you're "just" developing against your current CPU.
+
+Be warned that when you use these alternative instructions, [things may go less smoothly than they would with `ci/run-docker.sh`][ci-run-good], e.g. instruction generation tests may fail because the disassembler named them differently, e.g. it may generate `vaesenc` instead of `aesenc` instructions despite them behaving the same.
+Also these instructions execute less tests than would normally be done, so don't be surprised that when you eventually pull-request some errors may show up for tests not covered here.
+
+
+[new]: https://github.com/rust-lang/stdarch/issues/new
+[issues]: https://github.com/rust-lang/stdarch/issues
+[help]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22
+[impl]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3Aimpl-period
+[vendor]: https://github.com/rust-lang/stdarch/issues/40
+[Documentation as tests]: https://doc.rust-lang.org/book/first-edition/documentation.html#documentation-as-tests
+[Rust Book]: https://doc.rust-lang.org/book/first-edition
+[ci-run-good]: https://github.com/rust-lang/stdarch/issues/931#issuecomment-711412126
--- a/library/stdarch/Cargo.lock
+++ b/library/stdarch/Cargo.lock
@ -0,0 +1,965 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.98"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
+
+[[package]]
+name = "assert-instr-macro"
+version = "0.1.0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.102",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
+
+[[package]]
+name = "cc"
+version = "1.2.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "956a5e21988b87f372569b66183b78babf23ebc2e744b733e4350a752c4dafac"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
+
+[[package]]
+name = "clap"
+version = "4.5.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim 0.11.1",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.102",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "core_arch"
+version = "0.1.5"
+dependencies = [
+ "std_detect",
+ "stdarch-test",
+ "syscalls",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "csv"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "darling"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c"
+dependencies = [
+ "darling_core",
+ "darling_macro",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim 0.10.0",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835"
+dependencies = [
+ "darling_core",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "diff"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "env_logger"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
+dependencies = [
+ "log",
+ "regex",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580"
+dependencies = [
+ "humantime",
+ "is-terminal",
+ "log",
+ "regex",
+ "termcolor",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "getrandom"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
+[[package]]
+name = "humantime"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
+
+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+
+[[package]]
+name = "indexmap"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
+dependencies = [
+ "autocfg",
+ "hashbrown 0.12.3",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.15.4",
+]
+
+[[package]]
+name = "intrinsic-test"
+version = "0.1.0"
+dependencies = [
+ "clap",
+ "csv",
+ "diff",
+ "itertools",
+ "lazy_static",
+ "log",
+ "pretty_env_logger",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "is-terminal"
+version = "0.4.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
+[[package]]
+name = "itertools"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.172"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
+
+[[package]]
+name = "linked-hash-map"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "memchr"
+version = "2.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "pretty_env_logger"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c"
+dependencies = [
+ "env_logger 0.10.2",
+ "log",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quick-xml"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ca7dd09b5f4a9029c35e323b086d0a68acdc673317b9c4d002c6f1d4a7278c6"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
+[[package]]
+name = "quickcheck"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
+dependencies = [
+ "env_logger 0.8.4",
+ "log",
+ "rand",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rayon"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
+
+[[package]]
+name = "rustc-std-workspace-alloc"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9d441c3b2ebf55cebf796bfdc265d67fa09db17b7bb6bd4be75c509e1e8fec3"
+
+[[package]]
+name = "rustc-std-workspace-core"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa9c45b374136f52f2d6311062c7146bff20fec063c3f5d46a410bd937746955"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "semver"
+version = "1.0.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
+
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.102",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.140"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_with"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff"
+dependencies = [
+ "serde",
+ "serde_with_macros",
+]
+
+[[package]]
+name = "serde_with_macros"
+version = "1.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "serde_yaml"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b"
+dependencies = [
+ "indexmap 1.9.3",
+ "ryu",
+ "serde",
+ "yaml-rust",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "simd-test-macro"
+version = "0.1.0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.102",
+]
+
+[[package]]
+name = "std_detect"
+version = "0.1.5"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "rustc-std-workspace-alloc",
+ "rustc-std-workspace-core",
+]
+
+[[package]]
+name = "stdarch-gen-arm"
+version = "0.1.0"
+dependencies = [
+ "itertools",
+ "lazy_static",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "serde",
+ "serde_with",
+ "serde_yaml",
+ "walkdir",
+]
+
+[[package]]
+name = "stdarch-gen-loongarch"
+version = "0.1.0"
+dependencies = [
+ "rand",
+]
+
+[[package]]
+name = "stdarch-test"
+version = "0.1.0"
+dependencies = [
+ "assert-instr-macro",
+ "cc",
+ "cfg-if",
+ "lazy_static",
+ "rustc-demangle",
+ "simd-test-macro",
+ "wasmprinter",
+]
+
+[[package]]
+name = "stdarch-verify"
+version = "0.1.0"
+dependencies = [
+ "proc-macro2",
+ "quick-xml",
+ "quote",
+ "serde",
+ "serde_json",
+ "syn 2.0.102",
+]
+
+[[package]]
+name = "stdarch_examples"
+version = "0.0.0"
+dependencies = [
+ "core_arch",
+ "quickcheck",
+ "rand",
+ "std_detect",
+]
+
+[[package]]
+name = "strsim"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syscalls"
+version = "0.6.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d0e35dc7d73976a53c7e6d7d177ef804a0c0ee774ec77bcc520c2216fd7cbe"
+
+[[package]]
+name = "termcolor"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasmparser"
+version = "0.113.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "286049849b5a5bd09a8773171be96824afabffc7cc3df6caaf33a38db6cd07ae"
+dependencies = [
+ "indexmap 2.9.0",
+ "semver",
+]
+
+[[package]]
+name = "wasmprinter"
+version = "0.2.67"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6615a5587149e753bf4b93f90fa3c3f41c88597a7a2da72879afcabeda9648f"
+dependencies = [
+ "anyhow",
+ "wasmparser",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "yaml-rust"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
+dependencies = [
+ "linked-hash-map",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.102",
+]
--- a/library/stdarch/Cargo.toml
+++ b/library/stdarch/Cargo.toml
@ -0,0 +1,19 @@
+[workspace]
+resolver = "1"
+members = [
+  "crates/*",
+  "examples",
+]
+exclude = [
+  "crates/wasm-assert-instr-tests"
+]
+
+[profile.release]
+debug = true
+opt-level = 3
+incremental = true
+
+[profile.bench]
+debug = 1
+opt-level = 3
+incremental = true
--- a/library/stdarch/LICENSE-APACHE
+++ b/library/stdarch/LICENSE-APACHE
@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
--- a/library/stdarch/LICENSE-MIT
+++ b/library/stdarch/LICENSE-MIT
@ -0,0 +1,25 @@
+Copyright (c) 2017 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
--- a/library/stdarch/README.md
+++ b/library/stdarch/README.md
@ -0,0 +1,18 @@
+stdarch - Rust's standard library SIMD components
+=======
+
+[![Actions Status](https://github.com/rust-lang/stdarch/workflows/CI/badge.svg)](https://github.com/rust-lang/stdarch/actions)
+
+
+# Crates
+
+This repository contains two main crates:
+
+* [`core_arch`](crates/core_arch/README.md) implements `core::arch` - Rust's
+  core library architecture-specific intrinsics, and
+  
+* [`std_detect`](crates/std_detect/README.md) implements `std::detect` - Rust's
+  standard library run-time CPU feature detection.
+
+The `std::simd` component now lives in the
+[`packed_simd_2`](https://github.com/rust-lang/packed_simd) crate.
--- a/library/stdarch/ci/build-std-detect.sh
+++ b/library/stdarch/ci/build-std-detect.sh
@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+# Build std_detect on non-Linux & non-x86 targets.
+#
+# In std_detect, non-x86 targets have OS-specific implementations,
+# but we can test only Linux in CI. This script builds targets supported
+# by std_detect but cannot be tested in CI.
+
+set -ex
+cd "$(dirname "$0")"/..
+
+targets=(
+    # Linux
+    aarch64-unknown-linux-musl
+    armv5te-unknown-linux-musleabi
+    aarch64-unknown-linux-ohos
+    armv7-unknown-linux-ohos
+
+    # Android
+    aarch64-linux-android
+    arm-linux-androideabi
+
+    # FreeBSD
+    aarch64-unknown-freebsd
+    armv6-unknown-freebsd
+    powerpc-unknown-freebsd
+    powerpc64-unknown-freebsd
+
+    # OpenBSD
+    aarch64-unknown-openbsd
+
+    # Windows
+    aarch64-pc-windows-msvc
+)
+
+rustup component add rust-src # for -Z build-std
+
+cd crates/std_detect
+for target in "${targets[@]}"; do
+    if rustup target add "${target}" &>/dev/null; then
+        cargo build --target "${target}"
+    else
+        # tier 3 targets requires -Z build-std.
+        cargo build -Z build-std="core,alloc" --target "${target}"
+    fi
+done
--- a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@ -0,0 +1,19 @@
+FROM ubuntu:25.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc \
+  g++ \
+  ca-certificates \
+  libc6-dev \
+  gcc-aarch64-linux-gnu \
+  g++-aarch64-linux-gnu \
+  libc6-dev-arm64-cross \
+  qemu-user \
+  make \
+  file \
+  clang-19 \
+  lld
+
+ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
+    CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
+    OBJDUMP=aarch64-linux-gnu-objdump \
+    STDARCH_TEST_SKIP_FEATURE=tme
--- a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
@ -0,0 +1,30 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc \
+  g++ \
+  ca-certificates \
+  libc6-dev \
+  libc6-dev-arm64-cross \
+  qemu-user \
+  make \
+  file \
+  clang-19 \
+  curl \
+  xz-utils \
+  lld
+
+ENV TOOLCHAIN="arm-gnu-toolchain-14.2.rel1-x86_64-aarch64_be-none-linux-gnu"
+
+# Download the aarch64_be gcc toolchain
+RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.2.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
+RUN tar -xvf "${TOOLCHAIN}.tar.xz"
+RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
+
+ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
+ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
+
+ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc"
+ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}"
+ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump"
+ENV STDARCH_TEST_SKIP_FEATURE=tme
--- a/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
+++ b/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
@ -0,0 +1,13 @@
+FROM ubuntu:25.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc \
+  ca-certificates \
+  libc6-dev \
+  gcc-arm-linux-gnueabihf \
+  libc6-dev-armhf-cross \
+  qemu-user \
+  make \
+  file
+ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
+    CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
+    OBJDUMP=arm-linux-gnueabihf-objdump
--- a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@ -0,0 +1,17 @@
+FROM ubuntu:24.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc \
+  g++ \
+  ca-certificates \
+  libc6-dev \
+  gcc-arm-linux-gnueabihf \
+  g++-arm-linux-gnueabihf \
+  libc6-dev-armhf-cross \
+  qemu-user \
+  make \
+  file \
+  clang-19 \
+  lld
+ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
+    CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
+    OBJDUMP=arm-linux-gnueabihf-objdump
--- a/library/stdarch/ci/docker/i586-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/i586-unknown-linux-gnu/Dockerfile
@ -0,0 +1,7 @@
+FROM ubuntu:25.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc-multilib \
+  libc6-dev \
+  file \
+  make \
+  ca-certificates
--- a/library/stdarch/ci/docker/i686-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/i686-unknown-linux-gnu/Dockerfile
@ -0,0 +1,7 @@
+FROM ubuntu:25.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc-multilib \
+  libc6-dev \
+  file \
+  make \
+  ca-certificates
--- a/library/stdarch/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
@ -0,0 +1,12 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    gcc libc6-dev qemu-user-static ca-certificates \
+    gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
+
+
+ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \
+    CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-loongarch64-static -cpu max -L /usr/loongarch64-linux-gnu" \
+    OBJDUMP=loongarch64-linux-gnu-objdump \
+    STDARCH_TEST_SKIP_FEATURE=frecipe
--- a/library/stdarch/ci/docker/mips-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/mips-unknown-linux-gnu/Dockerfile
@ -0,0 +1,13 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc libc6-dev qemu-user ca-certificates \
+        gcc-mips-linux-gnu libc6-dev-mips-cross \
+        qemu-system-mips \
+        qemu-user \
+        make \
+        file
+
+ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
+    CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
+    OBJDUMP=mips-linux-gnu-objdump
--- a/library/stdarch/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
+++ b/library/stdarch/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
@ -0,0 +1,10 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc libc6-dev qemu-user ca-certificates \
+        gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
+        qemu-system-mips64 qemu-user
+
+ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
+    CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
+    OBJDUMP=mips64-linux-gnuabi64-objdump
--- a/library/stdarch/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
+++ b/library/stdarch/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
@ -0,0 +1,10 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc libc6-dev qemu-user ca-certificates \
+        gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
+        qemu-system-mips64el
+
+ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
+    CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
+    OBJDUMP=mips64el-linux-gnuabi64-objdump
--- a/library/stdarch/ci/docker/mipsel-unknown-linux-musl/Dockerfile
+++ b/library/stdarch/ci/docker/mipsel-unknown-linux-musl/Dockerfile
@ -0,0 +1,25 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    ca-certificates \
+    gcc \
+    libc6-dev \
+    make \
+    qemu-user \
+    qemu-system-mips \
+    bzip2 \
+    curl \
+    file
+
+RUN mkdir /toolchain
+
+# Note that this originally came from:
+# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
+RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
+      tar xjf - -C /toolchain --strip-components=2
+
+ENV PATH=$PATH:/rust/bin:/toolchain/bin \
+    CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
+    CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
+    CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"
--- a/library/stdarch/ci/docker/nvptx64-nvidia-cuda/Dockerfile
+++ b/library/stdarch/ci/docker/nvptx64-nvidia-cuda/Dockerfile
@ -0,0 +1,5 @@
+FROM ubuntu:25.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc \
+  libc6-dev \
+  ca-certificates
--- a/library/stdarch/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
@ -0,0 +1,12 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc libc6-dev qemu-user ca-certificates \
+        gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
+        qemu-system-ppc make file
+
+ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
+    CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu mpc8610 -L /usr/powerpc-linux-gnu" \
+    CC=powerpc-linux-gnu-gcc \
+    OBJDUMP=powerpc-linux-gnu-objdump \
+    STDARCH_TEST_SKIP_FEATURE=vsx
--- a/library/stdarch/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
@ -0,0 +1,14 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc libc6-dev qemu-user ca-certificates \
+        gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \
+        file make
+
+ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
+    CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -cpu power11 -L /usr/powerpc64-linux-gnu" \
+    CC=powerpc64-linux-gnu-gcc \
+    OBJDUMP=powerpc64-linux-gnu-objdump \
+    STDARCH_TEST_SKIP_FEATURE=vsx \
+#   These 2 tests have erratic behaviour with qemu, see https://gitlab.com/qemu-project/qemu/-/issues/1623#note_2449012173
+    STDARCH_TEST_SKIP_FUNCTION=vec_lde_u16,vec_lde_u32
--- a/library/stdarch/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
@ -0,0 +1,12 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc libc6-dev qemu-user ca-certificates \
+        gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
+        file make
+
+# Work around qemu triggering a sigill on vec_subs if the cpu target is not defined.
+ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
+    CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -cpu power11 -L /usr/powerpc64le-linux-gnu" \
+    CC=powerpc64le-linux-gnu-gcc \
+    OBJDUMP=powerpc64le-linux-gnu-objdump
--- a/library/stdarch/ci/docker/riscv32gc-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/riscv32gc-unknown-linux-gnu/Dockerfile
@ -0,0 +1,15 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc libc6-dev qemu-user ca-certificates \
+        wget xz-utils make file llvm
+
+ENV VERSION=2025.01.20
+
+RUN wget "https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${VERSION}/riscv32-glibc-ubuntu-24.04-gcc-nightly-${VERSION}-nightly.tar.xz" \
+    -O riscv-toolchain.tar.xz
+RUN tar -xJf riscv-toolchain.tar.xz
+
+ENV CARGO_TARGET_RISCV32GC_UNKNOWN_LINUX_GNU_LINKER=/riscv/bin/riscv32-unknown-linux-gnu-gcc \
+    CARGO_TARGET_RISCV32GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv32 -cpu max -L /riscv/sysroot" \
+    OBJDUMP=llvm-objdump
--- a/library/stdarch/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
@ -0,0 +1,10 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc libc6-dev qemu-user ca-certificates \
+        gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \
+        llvm
+
+ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc \
+    CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv64 -cpu max -L /usr/riscv64-linux-gnu" \
+    OBJDUMP=llvm-objdump
--- a/library/stdarch/ci/docker/s390x-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/s390x-unknown-linux-gnu/Dockerfile
@ -0,0 +1,14 @@
+FROM ubuntu:25.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        curl ca-certificates \
+        gcc libc6-dev \
+        gcc-s390x-linux-gnu libc6-dev-s390x-cross \
+        qemu-user \
+        make \
+        clang \
+        file
+
+ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
+    CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -cpu max -L /usr/s390x-linux-gnu" \
+    OBJDUMP=s390x-linux-gnu-objdump
--- a/library/stdarch/ci/docker/wasm32-wasip1/Dockerfile
+++ b/library/stdarch/ci/docker/wasm32-wasip1/Dockerfile
@ -0,0 +1,13 @@
+FROM ubuntu:25.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update -y && apt-get install -y --no-install-recommends \
+  ca-certificates \
+  curl \
+  xz-utils \
+  clang
+
+RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasmtime-v18.0.2-x86_64-linux.tar.xz | tar xJf -
+ENV PATH=$PATH:/wasmtime-v18.0.2-x86_64-linux
+
+ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime --dir /checkout/target/wasm32-wasip1/release/deps::."
--- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@ -0,0 +1,18 @@
+FROM ubuntu:25.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc \
+  libc6-dev \
+  file \
+  make \
+  ca-certificates \
+  wget \
+  xz-utils
+
+RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.53.0-2025-03-16-lin.tar.xz -O sde.tar.xz
+RUN mkdir intel-sde
+RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
+ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
+            -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \
+            -rtm-mode full -tsx --"
+# These tests fail with SDE as it doesn't support saving register data
+ENV STDARCH_TEST_SKIP_FUNCTION="xsave,xsaveopt,xsave64,xsaveopt64"
--- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
+++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
@ -0,0 +1,71 @@
+# Copyright (C) 2024-2024 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials, and your
+# use of them is governed by the express license under which they were provided to
+# you ("License"). Unless the License provides otherwise, you may not use, modify,
+# copy, publish, distribute, disclose or transmit this software or the related
+# documents without Intel's prior written permission.
+#
+# This software and the related documents are provided as is, with no express or
+# implied warranties, other than those that are expressly stated in the License.
+#
+# The CPUID information in this file is for software enabling purposes only and
+# it is not a full and accurate representation of the CPU under development which
+# it represents.
+# The CPUID information in this file is not a guarantee of the availability of
+# features or characteristics in the final released CPU.
+#
+# CPUID_VERSION = 1.0
+#      Input      =>               Output
+# EAX      ECX    =>   EAX      EBX      ECX      EDX
+00000000 ******** => 00000024 68747541 444d4163 69746e65
+00000001 ******** => 000d06f0 00100800 7ffaf3ff bfebfbff
+00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000
+00000003 ******** => 00000000 00000000 00000000 00000000
+00000004 00000000 => 7c004121 02c0003f 0000003f 00000000 #Deterministic Cache
+00000004 00000001 => 7c004122 01c0003f 0000003f 00000000
+00000004 00000002 => 7c004143 03c0003f 000007ff 00000000
+00000004 00000003 => 7c0fc163 04c0003f 0005ffff 00000004
+00000004 00000004 => 00000000 00000000 00000000 00000000
+00000005 ******** => 00000040 00000040 00000003 00042120 #MONITOR/MWAIT
+00000006 ******** => 00000077 00000002 00000001 00000000 #Thermal and Power
+00000007 00000000 => 00000001 f3bfbfbf bbc05ffe 03d55130 #Extended Features
+00000007 00000001 => 88ee00bf 00000002 00000000 1d29cd3e
+00000008 ******** => 00000000 00000000 00000000 00000000
+00000009 ******** => 00000000 00000000 00000000 00000000 #Direct Cache
+0000000a ******** => 07300403 00000000 00000000 00000603
+0000000b 00000000 => 00000001 00000002 00000100 0000001e #Extended Topology
+0000000b 00000001 => 00000004 00000002 00000201 0000001e
+0000000c ******** => 00000000 00000000 00000000 00000000
+0000000d 00000000 => 000e02e7 00002b00 00002b00 00000000 #xcr0
+0000000d 00000001 => 0000001f 00000240 00000100 00000000
+0000000d 00000002 => 00000100 00000240 00000000 00000000
+0000000d 00000005 => 00000040 00000440 00000000 00000000 #zmasks
+0000000d 00000006 => 00000200 00000480 00000000 00000000 #zmmh
+0000000d 00000007 => 00000400 00000680 00000000 00000000 #zmm
+0000000d 00000011 => 00000040 00000ac0 00000002 00000000 #tileconfig
+0000000d 00000012 => 00002000 00000b00 00000006 00000000 #tiles
+0000000d 00000013 => 00000080 000003c0 00000000 00000000 #APX
+00000014 00000000 => 00000000 00000010 00000000 00000000 #ptwrite
+00000019 ******** => 00000000 00000005 00000000 00000000 #Key Locker
+0000001d 00000000 => 00000001 00000000 00000000 00000000 #AMX Tile
+0000001d 00000001 => 04002000 00080040 00000010 00000000 #AMX Palette1
+0000001e 00000000 => 00000001 00004010 00000000 00000000 #AMX Tmul
+0000001e 00000001 => 000001ff 00000000 00000000 00000000
+0000001f 00000000 => 00000001 00000002 00000100 0000001e
+0000001f 00000001 => 00000007 00000070 00000201 0000001e
+0000001f 00000002 => 00000000 00000000 00000002 0000001e
+00000024 00000000 => 00000000 00070002 00000000 00000000 #AVX10
+80000000 ******** => 80000008 00000000 00000000 00000000
+80000001 ******** => 00000000 00000000 00200961 2c100000
+80000002 ******** => 00000000 00000000 00000000 00000000
+80000003 ******** => 00000000 00000000 00000000 00000000
+80000004 ******** => 00000000 00000000 00000000 00000000
+80000005 ******** => 00000000 00000000 00000000 00000000
+80000006 ******** => 00000000 00000000 01006040 00000000
+80000007 ******** => 00000000 00000000 00000000 00000100
+80000008 ******** => 00003028 00000200 00000200 00000000
+
+# This file was copied from intel-sde/misc/cpuid/dmr/cpuid.def, and modified to
+# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM`,
+# `AVX512_VP2INTERSECT` and the VEX variants of AVX512 was added in the CPUID.
--- a/library/stdarch/ci/dox.sh
+++ b/library/stdarch/ci/dox.sh
@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# Builds documentation for all target triples that we have a registered URL for
+# in liblibc. This scrapes the list of triples to document from `src/lib.rs`
+# which has a bunch of `html_root_url` directives we pick up.
+
+set -ex
+
+export RUSTDOCFLAGS="-D warnings"
+
+dox() {
+  if [ "$CI" != "" ]; then
+    rustup target add "${1}" || true
+  fi
+
+  cargo clean --target "${1}"
+
+  cargo build --verbose --target "${1}" --manifest-path crates/core_arch/Cargo.toml
+  cargo build --verbose --target "${1}" --manifest-path crates/std_detect/Cargo.toml
+
+  cargo doc --verbose --target "${1}" --manifest-path crates/core_arch/Cargo.toml
+  cargo doc --verbose --target "${1}" --manifest-path crates/std_detect/Cargo.toml
+}
+
+if [ -z "$1" ]; then
+  dox i686-unknown-linux-gnu
+  dox x86_64-unknown-linux-gnu
+  dox armv7-unknown-linux-gnueabihf
+  dox aarch64-unknown-linux-gnu
+  dox powerpc-unknown-linux-gnu
+  dox powerpc64le-unknown-linux-gnu
+  dox loongarch64-unknown-linux-gnu
+  # MIPS targets disabled since they are dropped to tier 3.
+  # See https://github.com/rust-lang/compiler-team/issues/648
+  #dox mips-unknown-linux-gnu
+  #dox mips64-unknown-linux-gnuabi64
+  dox wasm32-unknown-unknown
+  dox nvptx64-nvidia-cuda
+else
+  dox "${1}"
+fi
--- a/library/stdarch/ci/run-docker.sh
+++ b/library/stdarch/ci/run-docker.sh
@ -0,0 +1,60 @@
+#!/usr/bin/env sh
+
+# Small script to run tests for a target (or all targets) inside all the
+# respective docker images.
+
+set -ex
+
+if [ $# -lt 1 ]; then
+    >&2 echo "Usage: $0 <TARGET>"
+    exit 1
+fi
+
+run() {
+    # Set the linker that is used for the host (e.g. when compiling a build.rs)
+    # This overrides any configuration in e.g. `.cargo/config.toml`, which will
+    # probably not work within the docker container.
+    HOST_LINKER="CARGO_TARGET_$(rustc --print host-tuple | tr '[:lower:]-' '[:upper:]_')_LINKER"
+
+    # Prevent `Read-only file system (os error 30)`.
+    cargo generate-lockfile
+
+    echo "Building docker container for TARGET=${1}"
+    docker build -t stdarch -f "ci/docker/${1}/Dockerfile" ci/
+    mkdir -p target c_programs rust_programs
+    echo "Running docker"
+    # shellcheck disable=SC2016
+    docker run \
+      --rm \
+      --user "$(id -u)":"$(id -g)" \
+      --env CARGO_HOME=/cargo \
+      --env CARGO_TARGET_DIR=/checkout/target \
+      --env TARGET="${1}" \
+      --env "${HOST_LINKER}"="cc" \
+      --env STDARCH_TEST_EVERYTHING \
+      --env STDARCH_DISABLE_ASSERT_INSTR \
+      --env NOSTD \
+      --env NORUN \
+      --env RUSTFLAGS \
+      --env CARGO_UNSTABLE_BUILD_STD \
+      --env RUST_STD_DETECT_UNSTABLE \
+      --volume "${HOME}/.cargo":/cargo \
+      --volume "$(rustc --print sysroot)":/rust:ro \
+      --volume "$(pwd)":/checkout:ro \
+      --volume "$(pwd)"/target:/checkout/target \
+      --volume "$(pwd)"/c_programs:/checkout/c_programs \
+      --volume "$(pwd)"/rust_programs:/checkout/rust_programs \
+      --init \
+      --workdir /checkout \
+      --privileged \
+      stdarch \
+      sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh ${1}"
+}
+
+if [ -z "$1" ]; then
+  for d in ci/docker/*; do
+    run "${d}"
+  done
+else
+  run "${1}"
+fi
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@ -0,0 +1,203 @@
+#!/usr/bin/env sh
+
+set -ex
+
+: "${TARGET?The TARGET environment variable must be set.}"
+
+# Tests are all super fast anyway, and they fault often enough on travis that
+# having only one thread increases debuggability to be worth it.
+#export RUST_BACKTRACE=full
+#export RUST_TEST_NOCAPTURE=1
+#export RUST_TEST_THREADS=1
+
+export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir"
+export HOST_RUSTFLAGS="${RUSTFLAGS}"
+export PROFILE="${PROFILE:="--profile=release"}"
+
+case ${TARGET} in
+    # On Windows the linker performs identical COMDAT folding (ICF) by default
+    # in release mode which removes identical COMDAT sections. This interferes
+    # with our instruction assertions just like LLVM's MergeFunctions pass so
+    # we disable it.
+    *-pc-windows-msvc)
+        export RUSTFLAGS="${RUSTFLAGS} -Clink-args=/OPT:NOICF"
+        ;;
+    # On 32-bit use a static relocation model which avoids some extra
+    # instructions when dealing with static data, notably allowing some
+    # instruction assertion checks to pass below the 20 instruction limit. If
+    # this is the default, dynamic, then too many instructions are generated
+    # when we assert the instruction for a function and it causes tests to fail.
+    i686-* | i586-*)
+        export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static"
+        ;;
+    # Some x86_64 targets enable by default more features beyond SSE2,
+    # which cause some instruction assertion checks to fail.
+    x86_64-*)
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=-sse3"
+        ;;
+    #Unoptimized build uses fast-isel which breaks with msa
+    mips-* | mipsel-*)
+	export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
+	;;
+    armv7-*eabihf | thumbv7-*eabihf)
+        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
+        ;;
+    # Some of our test dependencies use the deprecated `gcc` crates which
+    # doesn't detect RISC-V compilers automatically, so do it manually here.
+    riscv*)
+        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc"
+        ;;
+esac
+
+echo "RUSTFLAGS=${RUSTFLAGS}"
+echo "OBJDUMP=${OBJDUMP}"
+echo "STDARCH_DISABLE_ASSERT_INSTR=${STDARCH_DISABLE_ASSERT_INSTR}"
+echo "STDARCH_TEST_EVERYTHING=${STDARCH_TEST_EVERYTHING}"
+echo "STDARCH_TEST_SKIP_FEATURE=${STDARCH_TEST_SKIP_FEATURE}"
+echo "STDARCH_TEST_SKIP_FUNCTION=${STDARCH_TEST_SKIP_FUNCTION}"
+echo "PROFILE=${PROFILE}"
+
+cargo_test() {
+    cmd="cargo"
+    subcmd="test"
+    if [ "$NORUN" = "1" ]; then
+        export subcmd="build"
+    fi
+    cmd="$cmd ${subcmd} --target=$TARGET $1"
+    cmd="$cmd -- $2"
+
+    case ${TARGET} in
+        # wasm targets can't catch panics so if a test failures make sure the test
+        # harness isn't trying to capture output, otherwise we won't get any useful
+        # output.
+        wasm32*)
+            cmd="$cmd --nocapture"
+            ;;
+    esac
+    $cmd
+}
+
+CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml"
+STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml"
+STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml"
+INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml"
+
+cargo_test "${CORE_ARCH} ${PROFILE}"
+
+if [ "$NOSTD" != "1" ]; then
+    cargo_test "${STD_DETECT} ${PROFILE}"
+
+    cargo_test "${STD_DETECT} --no-default-features"
+    cargo_test "${STD_DETECT} --no-default-features --features=std_detect_file_io"
+    cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval"
+    cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval,std_detect_file_io"
+
+    cargo_test "${STDARCH_EXAMPLES} ${PROFILE}"
+fi
+
+
+# Test targets compiled with extra features.
+case ${TARGET} in
+    x86_64-unknown-linux-gnu)
+        export STDARCH_DISABLE_ASSERT_INSTR=1
+
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
+        cargo_test "${PROFILE}"
+
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx512f"
+        cargo_test "${PROFILE}"
+        ;;
+    x86_64* | i686*)
+        export STDARCH_DISABLE_ASSERT_INSTR=1
+
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
+        cargo_test "${PROFILE}"
+        ;;
+    # FIXME: don't build anymore
+    #mips-*gnu* | mipsel-*gnu*)
+    #    export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa,+fp64,+mips32r5"
+    #    cargo_test "${PROFILE}"
+	  #    ;;
+    mips64*)
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa"
+        cargo_test "${PROFILE}"
+	      ;;
+    s390x*)
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vector-enhancements-1"
+        cargo_test "${PROFILE}"
+	      ;;
+    powerpc64*)
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec"
+        cargo_test "${PROFILE}"
+
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vsx"
+        cargo_test "${PROFILE}"
+        ;;
+    powerpc*)
+        # qemu has a bug in PPC32 which leads to a crash when compiled with `vsx`
+        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec"
+        cargo_test "${PROFILE}"
+        ;;
+
+    # Setup aarch64 & armv7 specific variables, the runner, along with some 
+    # tests to skip
+    aarch64-unknown-linux-gnu*)
+        TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
+        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
+        TEST_CXX_COMPILER="clang++-19"
+        TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
+        ;;
+
+    aarch64_be-unknown-linux-gnu*)
+        TEST_CPPFLAGS="-fuse-ld=lld"
+        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
+        TEST_CXX_COMPILER="clang++-19"
+        TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
+        ;;
+
+    armv7-unknown-linux-gnueabihf*)
+        TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
+        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
+        TEST_CXX_COMPILER="clang++-19"
+        TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
+        ;;
+    *)
+        ;;
+
+esac
+
+# Arm specific
+case "${TARGET}" in
+    aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*)
+        CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
+            cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
+            --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
+            --runner "${TEST_RUNNER}" \
+            --cppcompiler "${TEST_CXX_COMPILER}" \
+            --skip "${TEST_SKIP_INTRINSICS}" \
+            --target "${TARGET}"
+        ;;
+
+    aarch64_be-unknown-linux-gnu*)
+        CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
+            cargo run "${INTRINSIC_TEST}" "${PROFILE}"  \
+            --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
+            --runner "${TEST_RUNNER}" \
+            --cppcompiler "${TEST_CXX_COMPILER}" \
+            --skip "${TEST_SKIP_INTRINSICS}" \
+            --target "${TARGET}" \
+            --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
+            --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}"
+        ;;
+     *)
+        ;;
+esac
+
+if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
+    # Test examples
+    (
+        cd examples
+        cargo test --target "$TARGET" "${PROFILE}"
+        echo test | cargo run --target "$TARGET" "${PROFILE}" hex
+    )
+fi
--- a/library/stdarch/ci/style.sh
+++ b/library/stdarch/ci/style.sh
@ -0,0 +1,22 @@
+#!/usr/bin/env sh
+
+set -ex
+
+if rustup component add rustfmt-preview ; then
+    command -v rustfmt
+    rustfmt -V
+    cargo fmt --all -- --check
+fi
+
+# if rustup component add clippy-preview ; then
+#     cargo clippy -V
+#     cargo clippy --all -- -D clippy::pedantic
+# fi
+
+if shellcheck --version ; then
+    shellcheck -e SC2103 ci/*.sh
+else
+    echo "shellcheck not found"
+    exit 1
+fi
+
--- a/library/stdarch/crates/assert-instr-macro/Cargo.toml
+++ b/library/stdarch/crates/assert-instr-macro/Cargo.toml
@ -0,0 +1,17 @@
+[package]
+name = "assert-instr-macro"
+version = "0.1.0"
+authors = ["Alex Crichton <alex@alexcrichton.com>"]
+edition = "2024"
+
+[lib]
+proc-macro = true
+test = false
+
+[dependencies]
+proc-macro2 = "1.0"
+quote = "1.0"
+syn = { version = "2.0", features = ["full"] }
+
+[lints.rust]
+unexpected_cfgs = {level = "warn", check-cfg = ['cfg(optimized)'] }
--- a/library/stdarch/crates/assert-instr-macro/build.rs
+++ b/library/stdarch/crates/assert-instr-macro/build.rs
@ -0,0 +1,12 @@
+use std::env;
+
+fn main() {
+    let opt_level = env::var("OPT_LEVEL")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(0);
+    let profile = env::var("PROFILE").unwrap_or_default();
+    if profile == "release" || opt_level >= 2 {
+        println!("cargo:rustc-cfg=optimized");
+    }
+}
--- a/library/stdarch/crates/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs
@ -0,0 +1,224 @@
+//! Implementation of the `#[assert_instr]` macro
+//!
+//! This macro is used when testing the `stdarch` crate and is used to generate
+//! test cases to assert that functions do indeed contain the instructions that
+//! we're expecting them to contain.
+//!
+//! The procedural macro here is relatively simple, it simply appends a
+//! `#[test]` function to the original token stream which asserts that the
+//! function itself contains the relevant instruction.
+#![deny(rust_2018_idioms)]
+
+#[macro_use]
+extern crate quote;
+
+use proc_macro2::TokenStream;
+use quote::ToTokens;
+
+#[proc_macro_attribute]
+pub fn assert_instr(
+    attr: proc_macro::TokenStream,
+    item: proc_macro::TokenStream,
+) -> proc_macro::TokenStream {
+    let invoc = match syn::parse::<Invoc>(attr) {
+        Ok(s) => s,
+        Err(e) => return e.to_compile_error().into(),
+    };
+    let item = match syn::parse::<syn::Item>(item) {
+        Ok(s) => s,
+        Err(e) => return e.to_compile_error().into(),
+    };
+    let func = match item {
+        syn::Item::Fn(ref f) => f,
+        _ => panic!("must be attached to a function"),
+    };
+
+    let instr = &invoc.instr;
+    let name = &func.sig.ident;
+    let maybe_allow_deprecated = if func
+        .attrs
+        .iter()
+        .any(|attr| attr.path().is_ident("deprecated"))
+    {
+        quote! { #[allow(deprecated)] }
+    } else {
+        quote! {}
+    };
+
+    // Disable assert_instr for x86 targets compiled with avx enabled, which
+    // causes LLVM to generate different intrinsics that the ones we are
+    // testing for.
+    let disable_assert_instr = std::env::var("STDARCH_DISABLE_ASSERT_INSTR").is_ok();
+
+    // If instruction tests are disabled avoid emitting this shim at all, just
+    // return the original item without our attribute.
+    if !cfg!(optimized) || disable_assert_instr {
+        return (quote! { #item }).into();
+    }
+
+    let instr_str = instr
+        .replace(['.', '/', ':'], "_")
+        .replace(char::is_whitespace, "");
+    let assert_name = syn::Ident::new(&format!("assert_{name}_{instr_str}"), name.span());
+    // These name has to be unique enough for us to find it in the disassembly later on:
+    let shim_name = syn::Ident::new(
+        &format!("stdarch_test_shim_{name}_{instr_str}"),
+        name.span(),
+    );
+    let mut inputs = Vec::new();
+    let mut input_vals = Vec::new();
+    let mut const_vals = Vec::new();
+    let ret = &func.sig.output;
+    for arg in func.sig.inputs.iter() {
+        let capture = match *arg {
+            syn::FnArg::Typed(ref c) => c,
+            ref v => panic!(
+                "arguments must not have patterns: `{:?}`",
+                v.clone().into_token_stream()
+            ),
+        };
+        let ident = match *capture.pat {
+            syn::Pat::Ident(ref i) => &i.ident,
+            _ => panic!("must have bare arguments"),
+        };
+        if let Some((_, tokens)) = invoc.args.iter().find(|a| *ident == a.0) {
+            input_vals.push(quote! { #tokens });
+        } else {
+            inputs.push(capture);
+            input_vals.push(quote! { #ident });
+        }
+    }
+    for arg in func.sig.generics.params.iter() {
+        let c = match *arg {
+            syn::GenericParam::Const(ref c) => c,
+            ref v => panic!(
+                "only const generics are allowed: `{:?}`",
+                v.clone().into_token_stream()
+            ),
+        };
+        if let Some((_, tokens)) = invoc.args.iter().find(|a| c.ident == a.0) {
+            const_vals.push(quote! { #tokens });
+        } else {
+            panic!("const generics must have a value for tests");
+        }
+    }
+
+    let attrs = func
+        .attrs
+        .iter()
+        .filter(|attr| {
+            attr.path()
+                .segments
+                .first()
+                .expect("attr.path.segments.first() failed")
+                .ident
+                .to_string()
+                .starts_with("target")
+        })
+        .collect::<Vec<_>>();
+    let attrs = Append(&attrs);
+
+    // Use an ABI on Windows that passes SIMD values in registers, like what
+    // happens on Unix (I think?) by default.
+    let abi = if cfg!(windows) {
+        let target = std::env::var("TARGET").unwrap();
+        if target.contains("x86_64") {
+            syn::LitStr::new("sysv64", proc_macro2::Span::call_site())
+        } else if target.contains("86") {
+            syn::LitStr::new("vectorcall", proc_macro2::Span::call_site())
+        } else {
+            syn::LitStr::new("C", proc_macro2::Span::call_site())
+        }
+    } else {
+        syn::LitStr::new("C", proc_macro2::Span::call_site())
+    };
+    let to_test = quote! {
+        #attrs
+        #maybe_allow_deprecated
+        #[unsafe(no_mangle)]
+        #[inline(never)]
+        pub unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
+            #name::<#(#const_vals),*>(#(#input_vals),*)
+        }
+    };
+
+    let tokens: TokenStream = quote! {
+        #[test]
+        #[allow(non_snake_case)]
+        fn #assert_name() {
+            #to_test
+
+            ::stdarch_test::assert(#shim_name as usize, stringify!(#shim_name), #instr);
+        }
+    };
+
+    let tokens: TokenStream = quote! {
+        #item
+        #tokens
+    };
+    tokens.into()
+}
+
+struct Invoc {
+    instr: String,
+    args: Vec<(syn::Ident, syn::Expr)>,
+}
+
+impl syn::parse::Parse for Invoc {
+    fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
+        use syn::{Token, ext::IdentExt};
+
+        let mut instr = String::new();
+        while !input.is_empty() {
+            if input.parse::<Token![,]>().is_ok() {
+                break;
+            }
+            if let Ok(ident) = syn::Ident::parse_any(input) {
+                instr.push_str(&ident.to_string());
+                continue;
+            }
+            if input.parse::<Token![.]>().is_ok() {
+                instr.push('.');
+                continue;
+            }
+            if let Ok(s) = input.parse::<syn::LitStr>() {
+                instr.push_str(&s.value());
+                continue;
+            }
+            println!("{:?}", input.cursor().token_stream());
+            return Err(input.error("expected an instruction"));
+        }
+        if instr.is_empty() {
+            return Err(input.error("expected an instruction before comma"));
+        }
+        let mut args = Vec::new();
+        while !input.is_empty() {
+            let name = input.parse::<syn::Ident>()?;
+            input.parse::<Token![=]>()?;
+            let expr = input.parse::<syn::Expr>()?;
+            args.push((name, expr));
+
+            if input.parse::<Token![,]>().is_err() {
+                if !input.is_empty() {
+                    return Err(input.error("extra tokens at end"));
+                }
+                break;
+            }
+        }
+        Ok(Self { instr, args })
+    }
+}
+
+struct Append<T>(T);
+
+impl<T> quote::ToTokens for Append<T>
+where
+    T: Clone + IntoIterator,
+    T::Item: quote::ToTokens,
+{
+    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
+        for item in self.0.clone() {
+            item.to_tokens(tokens);
+        }
+    }
+}
--- a/library/stdarch/crates/core_arch/Cargo.toml
+++ b/library/stdarch/crates/core_arch/Cargo.toml
@ -0,0 +1,33 @@
+[package]
+name = "core_arch"
+version = "0.1.5"
+authors = [
+    "Alex Crichton <alex@alexcrichton.com>",
+    "Andrew Gallant <jamslam@gmail.com>",
+    "Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>",
+]
+description = "`core::arch` - Rust's core library architecture-specific intrinsics."
+homepage = "https://github.com/rust-lang/stdarch"
+repository = "https://github.com/rust-lang/stdarch"
+readme = "README.md"
+keywords = ["core", "simd", "arch", "intrinsics"]
+categories = ["hardware-support", "no-std"]
+license = "MIT OR Apache-2.0"
+edition = "2024"
+
+[badges]
+is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
+is-it-maintained-open-issues = { repository = "rust-lang/stdarch" }
+maintenance = { status = "experimental" }
+
+[dev-dependencies]
+stdarch-test = { version = "0.*", path = "../stdarch-test" }
+std_detect = { version = "0.*", path = "../std_detect" }
+
+[target.'cfg(all(target_arch = "x86_64", target_os = "linux"))'.dev-dependencies]
+syscalls = { version = "0.6.18", default-features = false }
+
+[lints.clippy]
+too_long_first_doc_paragraph = "allow"
+missing_transmute_annotations = "allow"
+useless_transmute = "allow"
--- a/library/stdarch/crates/core_arch/LICENSE-APACHE
+++ b/library/stdarch/crates/core_arch/LICENSE-APACHE
@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
--- a/library/stdarch/crates/core_arch/LICENSE-MIT
+++ b/library/stdarch/crates/core_arch/LICENSE-MIT
@ -0,0 +1,25 @@
+Copyright (c) 2017 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
--- a/library/stdarch/crates/core_arch/MISSING.md
+++ b/library/stdarch/crates/core_arch/MISSING.md
@ -0,0 +1,116 @@
+## The following neon instructions are currently not implemented in stdarch
+
+### Not implemented on arm:
+
+`vcadd_rot270_f32`
+
+`vcadd_rot90_f32`
+
+`vcaddq_rot270_f32`
+
+`vcaddq_rot90_f32`
+
+`vdot_s32`
+
+`vdot_u32`
+
+`vdotq_s32`
+
+`vdotq_u32`
+
+`vdot_lane_s32`
+
+`vdot_lane_u32`
+
+`vdotq_lane_s32`
+
+`vdotq_lane_u32`
+
+`vcmla_f32`
+
+`vcmla_lane_f32`
+
+`vcmla_laneq_f32`
+
+`vcmla_rot180_f32`
+
+`vcmla_rot180_lane_f32`
+
+`vcmla_rot180_laneq_f32`
+
+`vcmla_rot270_f32`
+
+`vcmla_rot270_lane_f32`
+
+`vcmla_rot270_laneq_f32`
+
+`vcmla_rot90_f32`
+
+`vcmla_rot90_lane_f32`
+
+`vcmla_rot90_laneq_f32`
+
+`vcmlaq_f32`
+
+`vcmlaq_lane_f32`
+
+`vcmlaq_laneq_f32`
+
+`vcmlaq_rot180_f32`
+
+`vcmlaq_rot180_lane_f32`
+
+`vcmlaq_rot180_laneq_f32`
+
+`vcmlaq_rot270_f32`
+
+`vcmlaq_rot270_lane_f32`
+
+`vcmlaq_rot270_laneq_f32`
+
+`vcmlaq_rot90_f32`
+
+`vcmlaq_rot90_lane_f32`
+
+`vcmlaq_rot90_laneq_f32`
+
+### Not implemented in LLVM:
+
+`vrnd32x_f64`
+
+`vrnd32xq_f64`
+
+`vrnd32z_f64`
+
+`vrnd32zq_f64`
+
+`vrnd64x_f64`
+
+`vrnd64xq_f64`
+
+`vrnd64z_f64`
+
+`vrnd64zq_f64`
+
+### LLVM Select errors may occur:
+
+`vsudot_lane_s32`
+
+`vsudot_laneq_s32`
+
+`vsudotq_lane_s32`
+
+`vsudotq_laneq_s32`
+
+`vusdot_lane_s32`
+
+`vusdot_laneq_s32`
+
+`vusdot_s32`
+
+`vusdotq_lane_s32`
+
+`vusdotq_laneq_s32`
+
+`vusdotq_s32v`
+
--- a/library/stdarch/crates/core_arch/README.md
+++ b/library/stdarch/crates/core_arch/README.md
@ -0,0 +1,58 @@
+`core::arch` - Rust's core library architecture-specific intrinsics
+=======
+
+The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD).
+
+# Usage 
+
+`core::arch` is available as part of `libcore` and it is re-exported by
+`libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate.
+
+Using `core::arch` via this crate requires nightly Rust, and it can (and does)
+break often. The only cases in which you should consider using it via this crate
+are:
+
+* if you need to re-compile `core::arch` yourself, e.g., with particular
+  target-features enabled that are not enabled for `libcore`/`libstd`. Note: if
+  you need to re-compile it for a non-standard target, please prefer using
+  `xargo` and re-compiling `libcore`/`libstd` as appropriate instead of using
+  this crate.
+  
+* using some features that might not be available even behind unstable Rust
+  features. We try to keep these to a minimum. If you need to use some of these
+  features, please open an issue so that we can expose them in nightly Rust and
+  you can use them from there.
+
+# Documentation
+
+* [Documentation - i686][i686]
+* [Documentation - x86\_64][x86_64]
+* [Documentation - arm][arm]
+* [Documentation - aarch64][aarch64]
+* [Documentation - powerpc][powerpc]
+* [Documentation - powerpc64][powerpc64]
+* [How to get started][contrib]
+* [How to help implement intrinsics][help-implement]
+
+[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md
+[help-implement]: https://github.com/rust-lang/stdarch/issues/40
+[i686]: https://rust-lang.github.io/stdarch/i686/core_arch/
+[x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/
+[arm]: https://rust-lang.github.io/stdarch/arm/core_arch/
+[aarch64]: https://rust-lang.github.io/stdarch/aarch64/core_arch/
+[powerpc]: https://rust-lang.github.io/stdarch/powerpc/core_arch/
+[powerpc64]: https://rust-lang.github.io/stdarch/powerpc64/core_arch/
+
+# License
+
+`core_arch` is primarily distributed under the terms of both the MIT license and
+the Apache License (Version 2.0), with portions covered by various BSD-like
+licenses.
+
+See LICENSE-APACHE, and LICENSE-MIT for details.
+
+# Contribution
+
+Unless you explicitly state otherwise, any contribution intentionally submitted
+for inclusion in `core_arch` by you, as defined in the Apache-2.0 license,
+shall be dual licensed as above, without any additional terms or conditions.
--- a/library/stdarch/crates/core_arch/missing-x86.md
+++ b/library/stdarch/crates/core_arch/missing-x86.md
@ -0,0 +1,258 @@
+
+<details><summary>["AMX-BF16"]</summary><p>
+
+  * [ ] [`__tile_dpbf16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbf16ps)
+</p></details>
+
+
+<details><summary>["AMX-COMPLEX"]</summary><p>
+
+  * [ ] [`__tile_cmmimfp16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_cmmimfp16ps)
+  * [ ] [`__tile_cmmrlfp16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_cmmrlfp16ps)
+</p></details>
+
+
+<details><summary>["AMX-FP16"]</summary><p>
+
+  * [ ] [`__tile_dpfp16ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpfp16ps)
+</p></details>
+
+
+<details><summary>["AMX-INT8"]</summary><p>
+
+  * [ ] [`__tile_dpbssd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbssd)
+  * [ ] [`__tile_dpbsud`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbsud)
+  * [ ] [`__tile_dpbusd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbusd)
+  * [ ] [`__tile_dpbuud`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_dpbuud)
+</p></details>
+
+
+<details><summary>["AMX-TILE"]</summary><p>
+
+  * [ ] [`__tile_loadd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_loadd)
+  * [ ] [`__tile_stored`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_stored)
+  * [ ] [`__tile_stream_loadd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_stream_loadd)
+  * [ ] [`__tile_zero`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__tile_zero)
+</p></details>
+
+
+<details><summary>["AVX512_FP16"]</summary><p>
+
+  * [ ] [`_mm256_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pch)
+  * [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
+  * [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
+</p></details>
+
+
+<details><summary>["AVX512_VP2INTERSECT", "AVX512F"]</summary><p>
+
+  * [ ] [`_mm512_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32)
+  * [ ] [`_mm512_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi64)
+</p></details>
+
+
+<details><summary>["AVX512_VP2INTERSECT", "AVX512VL"]</summary><p>
+
+  * [ ] [`_mm256_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi32)
+  * [ ] [`_mm256_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi64)
+  * [ ] [`_mm_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi32)
+  * [ ] [`_mm_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi64)
+</p></details>
+
+
+<details><summary>["CET_SS"]</summary><p>
+
+  * [ ] [`_clrssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clrssbsy)
+  * [ ] [`_get_ssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_get_ssp)
+  * [ ] [`_get_ssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_get_ssp)
+  * [ ] [`_inc_ssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_inc_ssp)
+  * [ ] [`_incsspd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_incsspd)
+  * [ ] [`_incsspq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_incsspq)
+  * [ ] [`_rdsspd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdsspd_i32)
+  * [ ] [`_rdsspq_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdsspq_i64)
+  * [ ] [`_rstorssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rstorssp)
+  * [ ] [`_saveprevssp`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_saveprevssp)
+  * [ ] [`_setssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_setssbsy)
+  * [ ] [`_wrssd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrssd)
+  * [ ] [`_wrssq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrssq)
+  * [ ] [`_wrussd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrussd)
+  * [ ] [`_wrussq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wrussq)
+</p></details>
+
+
+<details><summary>["CLDEMOTE"]</summary><p>
+
+  * [ ] [`_mm_cldemote`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cldemote)
+</p></details>
+
+
+<details><summary>["CLFLUSHOPT"]</summary><p>
+
+  * [ ] [`_mm_clflushopt`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflushopt)
+</p></details>
+
+
+<details><summary>["CLWB"]</summary><p>
+
+  * [ ] [`_mm_clwb`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clwb)
+</p></details>
+
+
+<details><summary>["CMPCCXADD"]</summary><p>
+
+  * [ ] [`_cmpccxadd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cmpccxadd_epi32)
+  * [ ] [`_cmpccxadd_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cmpccxadd_epi64)
+</p></details>
+
+
+<details><summary>["ENQCMD"]</summary><p>
+
+  * [ ] [`_enqcmd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_enqcmd)
+  * [ ] [`_enqcmds`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_enqcmds)
+</p></details>
+
+
+<details><summary>["FSGSBASE"]</summary><p>
+
+  * [ ] [`_readfsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readfsbase_u32)
+  * [ ] [`_readfsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readfsbase_u64)
+  * [ ] [`_readgsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readgsbase_u32)
+  * [ ] [`_readgsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_readgsbase_u64)
+  * [ ] [`_writefsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writefsbase_u32)
+  * [ ] [`_writefsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writefsbase_u64)
+  * [ ] [`_writegsbase_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writegsbase_u32)
+  * [ ] [`_writegsbase_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_writegsbase_u64)
+</p></details>
+
+
+<details><summary>["HRESET"]</summary><p>
+
+  * [ ] [`_hreset`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_hreset)
+</p></details>
+
+
+<details><summary>["INVPCID"]</summary><p>
+
+  * [ ] [`_invpcid`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_invpcid)
+</p></details>
+
+
+<details><summary>["MONITOR"]</summary><p>
+
+  * [ ] [`_mm_monitor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_monitor)
+  * [ ] [`_mm_mwait`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mwait)
+</p></details>
+
+
+<details><summary>["MOVBE"]</summary><p>
+
+  * [ ] [`_loadbe_i16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_loadbe_i16)
+  * [ ] [`_loadbe_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_loadbe_i32)
+  * [ ] [`_loadbe_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_loadbe_i64)
+  * [ ] [`_storebe_i16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_storebe_i16)
+  * [ ] [`_storebe_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_storebe_i32)
+  * [ ] [`_storebe_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_storebe_i64)
+</p></details>
+
+
+<details><summary>["MOVDIR64B"]</summary><p>
+
+  * [ ] [`_movdir64b`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_movdir64b)
+</p></details>
+
+
+<details><summary>["MOVDIRI"]</summary><p>
+
+  * [ ] [`_directstoreu_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_directstoreu_u32)
+  * [ ] [`_directstoreu_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_directstoreu_u64)
+</p></details>
+
+
+<details><summary>["PCONFIG"]</summary><p>
+
+  * [ ] [`_pconfig_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pconfig_u32)
+</p></details>
+
+
+<details><summary>["POPCNT"]</summary><p>
+
+  * [ ] [`_mm_popcnt_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u32)
+  * [ ] [`_mm_popcnt_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u64)
+</p></details>
+
+
+<details><summary>["PREFETCHI"]</summary><p>
+
+  * [ ] [`_m_prefetchit0`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_prefetchit0)
+  * [ ] [`_m_prefetchit1`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_prefetchit1)
+</p></details>
+
+
+<details><summary>["RAO_INT"]</summary><p>
+
+  * [ ] [`_aadd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aadd_i32)
+  * [ ] [`_aadd_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aadd_i64)
+  * [ ] [`_aand_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aand_i32)
+  * [ ] [`_aand_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aand_i64)
+  * [ ] [`_aor_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aor_i32)
+  * [ ] [`_aor_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_aor_i64)
+  * [ ] [`_axor_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_axor_i32)
+  * [ ] [`_axor_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_axor_i64)
+</p></details>
+
+
+<details><summary>["RDPID"]</summary><p>
+
+  * [ ] [`_rdpid_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdpid_u32)
+</p></details>
+
+
+<details><summary>["SERIALIZE"]</summary><p>
+
+  * [ ] [`_serialize`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_serialize)
+</p></details>
+
+
+<details><summary>["SSE"]</summary><p>
+
+  * [ ] [`_mm_free`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free)
+  * [ ] [`_mm_malloc`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_malloc)
+</p></details>
+
+
+<details><summary>["TSXLDTRK"]</summary><p>
+
+  * [ ] [`_xresldtrk`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xresldtrk)
+  * [ ] [`_xsusldtrk`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsusldtrk)
+</p></details>
+
+
+<details><summary>["UINTR"]</summary><p>
+
+  * [ ] [`_clui`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clui)
+  * [ ] [`_senduipi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_senduipi)
+  * [ ] [`_stui`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_stui)
+  * [ ] [`_testui`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_testui)
+</p></details>
+
+
+<details><summary>["USER_MSR"]</summary><p>
+
+  * [ ] [`_urdmsr`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_urdmsr)
+  * [ ] [`_uwrmsr`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_uwrmsr)
+</p></details>
+
+
+<details><summary>["WAITPKG"]</summary><p>
+
+  * [ ] [`_tpause`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tpause)
+  * [ ] [`_umonitor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_umonitor)
+  * [ ] [`_umwait`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_umwait)
+</p></details>
+
+
+<details><summary>["WBNOINVD"]</summary><p>
+
+  * [ ] [`_wbnoinvd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_wbnoinvd)
+</p></details>
+
--- a/library/stdarch/crates/core_arch/rustfmt.toml
+++ b/library/stdarch/crates/core_arch/rustfmt.toml
@ -0,0 +1,3 @@
+ignore = [
+    "src/simd.rs",
+]
--- a/library/stdarch/crates/core_arch/src/aarch64/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/mod.rs
@ -0,0 +1,39 @@
+//! AArch64 intrinsics.
+//!
+//! The reference for NEON is [Arm's NEON Intrinsics Reference][arm_ref]. The
+//! [Arm's NEON Intrinsics Online Database][arm_dat] is also useful.
+//!
+//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
+//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
+
+#![cfg_attr(
+    all(target_arch = "aarch64", target_abi = "softfloat"),
+    // Just allow the warning: anyone soundly using the intrinsics has to enable
+    // the target feature, and that will generate a warning for them.
+    allow(aarch64_softfloat_neon)
+)]
+
+mod mte;
+#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
+pub use self::mte::*;
+
+mod neon;
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub use self::neon::*;
+
+mod tme;
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub use self::tme::*;
+
+mod prefetch;
+#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
+pub use self::prefetch::*;
+
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub use super::arm_shared::*;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[cfg(test)]
+pub(crate) mod test_support;
--- a/library/stdarch/crates/core_arch/src/aarch64/mte.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/mte.rs
@ -0,0 +1,171 @@
+//! AArch64 Memory tagging intrinsics
+//!
+//! [ACLE documentation](https://arm-software.github.io/acle/main/acle.html#markdown-toc-mte-intrinsics)
+
+unsafe extern "unadjusted" {
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.irg"
+    )]
+    fn irg_(ptr: *const (), exclude: i64) -> *const ();
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.gmi"
+    )]
+    fn gmi_(ptr: *const (), exclude: i64) -> i64;
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.ldg"
+    )]
+    fn ldg_(ptr: *const (), tag_ptr: *const ()) -> *const ();
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.stg"
+    )]
+    fn stg_(tagged_ptr: *const (), addr_to_tag: *const ());
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.addg"
+    )]
+    fn addg_(ptr: *const (), value: i64) -> *const ();
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.subp"
+    )]
+    fn subp_(ptr_a: *const (), ptr_b: *const ()) -> i64;
+}
+
+/// Return a pointer containing a randomly generated logical address tag.
+///
+/// `src`: A pointer containing an address.
+/// `mask`: A mask where each of the lower 16 bits specifies logical
+///         tags which must be excluded from consideration. Zero excludes no
+///         tags.
+///
+/// The returned pointer contains a copy of the `src` address, but with a
+/// randomly generated logical tag, excluding any specified by `mask`.
+///
+/// SAFETY: The pointer provided by this intrinsic will be invalid until the memory
+/// has been appropriately tagged with `__arm_mte_set_tag`. If using that intrinsic
+/// on the provided pointer is itself invalid, then it will be permanently invalid
+/// and Undefined Behavior to dereference it.
+#[inline]
+#[target_feature(enable = "mte")]
+#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
+pub unsafe fn __arm_mte_create_random_tag<T>(src: *const T, mask: u64) -> *const T {
+    irg_(src as *const (), mask as i64) as *const T
+}
+
+/// Return a pointer with the logical address tag offset by a value.
+///
+/// `src`: A pointer containing an address and a logical tag.
+/// `OFFSET`: A compile-time constant value in the range [0, 15].
+///
+/// Adds offset to the logical address tag in `src`, wrapping if the result is
+/// outside of the valid 16 tags.
+///
+/// SAFETY: See `__arm_mte_create_random_tag`.
+#[inline]
+#[target_feature(enable = "mte")]
+#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
+pub unsafe fn __arm_mte_increment_tag<const OFFSET: i64, T>(src: *const T) -> *const T {
+    addg_(src as *const (), OFFSET) as *const T
+}
+
+/// Add a logical tag to the set of excluded logical tags.
+///
+/// `src`: A pointer containing an address and a logical tag.
+/// `excluded`: A mask where the lower 16 bits each specify currently-excluded
+///             logical tags.
+///
+/// Adds the logical tag stored in `src` to the set in `excluded`, and returns
+/// the result.
+#[inline]
+#[target_feature(enable = "mte")]
+#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
+pub unsafe fn __arm_mte_exclude_tag<T>(src: *const T, excluded: u64) -> u64 {
+    gmi_(src as *const (), excluded as i64) as u64
+}
+
+/// Store an allocation tag for the 16-byte granule of memory.
+///
+/// `tag_address`: A pointer containing an address and a logical tag, which
+///                must be 16-byte aligned.
+///
+/// SAFETY: `tag_address` must be 16-byte aligned. The tag will apply to the
+/// entire 16-byte memory granule.
+#[inline]
+#[target_feature(enable = "mte")]
+#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
+pub unsafe fn __arm_mte_set_tag<T>(tag_address: *const T) {
+    stg_(tag_address as *const (), tag_address as *const ());
+}
+
+/// Load an allocation tag from memory, returning a new pointer with the
+/// corresponding logical tag.
+///
+/// `address`: A pointer containing an address from which allocation tag memory
+///            is read. This does not need to be 16-byte aligned.
+#[inline]
+#[target_feature(enable = "mte")]
+#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
+pub unsafe fn __arm_mte_get_tag<T>(address: *const T) -> *const T {
+    ldg_(address as *const (), address as *const ()) as *const T
+}
+
+/// Calculate the difference between the address parts of two pointers, ignoring
+/// the tags, and sign-extending the result.
+#[inline]
+#[target_feature(enable = "mte")]
+#[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
+pub unsafe fn __arm_mte_ptrdiff<T, U>(a: *const T, b: *const U) -> i64 {
+    subp_(a as *const (), b as *const ())
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use stdarch_test::assert_instr;
+
+    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(irg))] // FIXME: MSVC  `dumpbin` doesn't support MTE
+    #[allow(dead_code)]
+    #[target_feature(enable = "mte")]
+    unsafe fn test_arm_mte_create_random_tag(src: *const (), mask: u64) -> *const () {
+        __arm_mte_create_random_tag(src, mask)
+    }
+
+    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(addg))]
+    #[allow(dead_code)]
+    #[target_feature(enable = "mte")]
+    unsafe fn test_arm_mte_increment_tag(src: *const ()) -> *const () {
+        __arm_mte_increment_tag::<1, _>(src)
+    }
+
+    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(gmi))]
+    #[allow(dead_code)]
+    #[target_feature(enable = "mte")]
+    unsafe fn test_arm_mte_exclude_tag(src: *const (), excluded: u64) -> u64 {
+        __arm_mte_exclude_tag(src, excluded)
+    }
+
+    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stg))]
+    #[allow(dead_code)]
+    #[target_feature(enable = "mte")]
+    unsafe fn test_arm_mte_set_tag(src: *const ()) {
+        __arm_mte_set_tag(src)
+    }
+
+    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldg))]
+    #[allow(dead_code)]
+    #[target_feature(enable = "mte")]
+    unsafe fn test_arm_mte_get_tag(src: *const ()) -> *const () {
+        __arm_mte_get_tag(src)
+    }
+
+    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(subp))]
+    #[allow(dead_code)]
+    #[target_feature(enable = "mte")]
+    unsafe fn test_arm_mte_ptrdiff(a: *const (), b: *const ()) -> i64 {
+        __arm_mte_ptrdiff(a, b)
+    }
+}
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
--- a/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
@ -0,0 +1,80 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.prefetch"]
+    fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
+}
+
+/// See [`prefetch`](fn._prefetch.html).
+#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
+pub const _PREFETCH_READ: i32 = 0;
+
+/// See [`prefetch`](fn._prefetch.html).
+#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
+pub const _PREFETCH_WRITE: i32 = 1;
+
+/// See [`prefetch`](fn._prefetch.html).
+#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
+pub const _PREFETCH_LOCALITY0: i32 = 0;
+
+/// See [`prefetch`](fn._prefetch.html).
+#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
+pub const _PREFETCH_LOCALITY1: i32 = 1;
+
+/// See [`prefetch`](fn._prefetch.html).
+#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
+pub const _PREFETCH_LOCALITY2: i32 = 2;
+
+/// See [`prefetch`](fn._prefetch.html).
+#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
+pub const _PREFETCH_LOCALITY3: i32 = 3;
+
+/// Fetch the cache line that contains address `p` using the given `RW` and `LOCALITY`.
+///
+/// The `RW` must be one of:
+///
+/// * [`_PREFETCH_READ`](constant._PREFETCH_READ.html): the prefetch is preparing
+///   for a read.
+///
+/// * [`_PREFETCH_WRITE`](constant._PREFETCH_WRITE.html): the prefetch is preparing
+///   for a write.
+///
+/// The `LOCALITY` must be one of:
+///
+/// * [`_PREFETCH_LOCALITY0`](constant._PREFETCH_LOCALITY0.html): Streaming or
+///   non-temporal prefetch, for data that is used only once.
+///
+/// * [`_PREFETCH_LOCALITY1`](constant._PREFETCH_LOCALITY1.html): Fetch into level 3 cache.
+///
+/// * [`_PREFETCH_LOCALITY2`](constant._PREFETCH_LOCALITY2.html): Fetch into level 2 cache.
+///
+/// * [`_PREFETCH_LOCALITY3`](constant._PREFETCH_LOCALITY3.html): Fetch into level 1 cache.
+///
+/// The prefetch memory instructions signal to the memory system that memory accesses
+/// from a specified address are likely to occur in the near future. The memory system
+/// can respond by taking actions that are expected to speed up the memory access when
+/// they do occur, such as preloading the specified address into one or more caches.
+/// Because these signals are only hints, it is valid for a particular CPU to treat
+/// any or all prefetch instructions as a NOP.
+///
+///
+/// [Arm's documentation](https://developer.arm.com/documentation/den0024/a/the-a64-instruction-set/memory-access-instructions/prefetching-memory?lang=en)
+#[inline(always)]
+#[cfg_attr(test, assert_instr("prfm pldl1strm", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY0))]
+#[cfg_attr(test, assert_instr("prfm pldl3keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY1))]
+#[cfg_attr(test, assert_instr("prfm pldl2keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY2))]
+#[cfg_attr(test, assert_instr("prfm pldl1keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY3))]
+#[cfg_attr(test, assert_instr("prfm pstl1strm", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY0))]
+#[cfg_attr(test, assert_instr("prfm pstl3keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY1))]
+#[cfg_attr(test, assert_instr("prfm pstl2keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY2))]
+#[cfg_attr(test, assert_instr("prfm pstl1keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY3))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
+// FIXME: Replace this with the standard ACLE __pld/__pldx/__pli/__plix intrinsics
+pub unsafe fn _prefetch<const RW: i32, const LOCALITY: i32>(p: *const i8) {
+    // We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache).
+    static_assert_uimm_bits!(RW, 1);
+    static_assert_uimm_bits!(LOCALITY, 2);
+    prefetch(p, RW, LOCALITY, 1);
+}
--- a/library/stdarch/crates/core_arch/src/aarch64/test_support.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/test_support.rs
@ -0,0 +1,184 @@
+use crate::core_arch::{aarch64::neon::*, arm_shared::*, simd::*};
+use std::{mem::transmute, vec::Vec};
+
+macro_rules! V_u64 {
+    () => {
+        vec![
+            0x0000000000000000u64,
+            0x0101010101010101u64,
+            0x0202020202020202u64,
+            0x0F0F0F0F0F0F0F0Fu64,
+            0x8080808080808080u64,
+            0xF0F0F0F0F0F0F0F0u64,
+            0xFFFFFFFFFFFFFFFFu64,
+        ]
+    };
+}
+
+macro_rules! V_f64 {
+    () => {
+        vec![
+            0.0f64,
+            1.0f64,
+            -1.0f64,
+            1.2f64,
+            2.4f64,
+            f64::MAX,
+            f64::MIN,
+            f64::INFINITY,
+            f64::NEG_INFINITY,
+            f64::NAN,
+        ]
+    };
+}
+
+macro_rules! to64 {
+    ($t : ident) => {
+        |v: $t| -> u64 { transmute(v) }
+    };
+}
+
+macro_rules! to128 {
+    ($t : ident) => {
+        |v: $t| -> u128 { transmute(v) }
+    };
+}
+
+pub(crate) fn test<T, U, V, W, X>(
+    vals: Vec<T>,
+    fill1: fn(T) -> V,
+    fill2: fn(U) -> W,
+    cast: fn(W) -> X,
+    test_fun: fn(V, V) -> W,
+    verify_fun: fn(T, T) -> U,
+) where
+    T: Copy + core::fmt::Debug,
+    U: Copy + core::fmt::Debug + std::cmp::PartialEq,
+    V: Copy + core::fmt::Debug,
+    W: Copy + core::fmt::Debug,
+    X: Copy + core::fmt::Debug + std::cmp::PartialEq,
+{
+    let pairs = vals.iter().zip(vals.iter());
+
+    for (i, j) in pairs {
+        let a: V = fill1(*i);
+        let b: V = fill1(*j);
+
+        let actual_pre: W = test_fun(a, b);
+        let expected_pre: W = fill2(verify_fun(*i, *j));
+
+        let actual: X = cast(actual_pre);
+        let expected: X = cast(expected_pre);
+
+        assert_eq!(
+            actual, expected,
+            "[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
+            *i, *j, &a, &b, actual_pre, &a, &b, expected_pre
+        );
+    }
+}
+
+macro_rules! gen_test_fn {
+    ($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
+        pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
+            unsafe {
+                test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
+            };
+        }
+    };
+}
+
+macro_rules! gen_fill_fn {
+    ($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
+        pub(crate) fn $id(val: $in_t) -> $out_t {
+            let initial: [$in_t; $num_els] = [val; $num_els];
+            let result: $cmp_t = unsafe { transmute(initial) };
+            let result_out: $out_t = unsafe { transmute(result) };
+
+            // println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
+
+            result_out
+        }
+    };
+}
+
+gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
+gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
+gen_fill_fn!(fill_f64, 64, 1, f64, float64x1_t, u64);
+gen_fill_fn!(fillq_f64, 64, 2, f64, float64x2_t, u128);
+gen_fill_fn!(fill_p64, 64, 1, u64, poly64x1_t, u64);
+gen_fill_fn!(fillq_p64, 64, 2, u64, poly64x2_t, u128);
+
+gen_test_fn!(
+    test_ari_f64,
+    f64,
+    f64,
+    float64x1_t,
+    float64x1_t,
+    u64,
+    V_f64!(),
+    fill_f64,
+    fill_f64,
+    to64!(float64x1_t)
+);
+gen_test_fn!(
+    test_cmp_f64,
+    f64,
+    u64,
+    float64x1_t,
+    uint64x1_t,
+    u64,
+    V_f64!(),
+    fill_f64,
+    fill_u64,
+    to64!(uint64x1_t)
+);
+gen_test_fn!(
+    testq_ari_f64,
+    f64,
+    f64,
+    float64x2_t,
+    float64x2_t,
+    u128,
+    V_f64!(),
+    fillq_f64,
+    fillq_f64,
+    to128!(float64x2_t)
+);
+gen_test_fn!(
+    testq_cmp_f64,
+    f64,
+    u64,
+    float64x2_t,
+    uint64x2_t,
+    u128,
+    V_f64!(),
+    fillq_f64,
+    fillq_u64,
+    to128!(uint64x2_t)
+);
+
+gen_test_fn!(
+    test_cmp_p64,
+    u64,
+    u64,
+    poly64x1_t,
+    uint64x1_t,
+    u64,
+    V_u64!(),
+    fill_p64,
+    fill_u64,
+    to64!(uint64x1_t)
+);
+gen_test_fn!(
+    testq_cmp_p64,
+    u64,
+    u64,
+    poly64x2_t,
+    uint64x2_t,
+    u128,
+    V_u64!(),
+    fillq_p64,
+    fillq_u64,
+    to128!(uint64x2_t)
+);
--- a/library/stdarch/crates/core_arch/src/aarch64/tme.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/tme.rs
@ -0,0 +1,201 @@
+//! ARM's Transactional Memory Extensions (TME).
+//!
+//! This CPU feature is available on Aarch64 - A architecture profile.
+//! This feature is in the non-neon feature set. TME specific vendor documentation can
+//! be found [TME Intrinsics Introduction][tme_intrinsics_intro].
+//!
+//! The reference is [ACLE Q4 2019][acle_q4_2019_ref].
+//!
+//! ACLE has a section for TME extensions and state masks for aborts and failure codes.
+//! [ARM A64 Architecture Register Datasheet][a_profile_future] also describes possible failure code scenarios.
+//!
+//! [acle_q4_2019_ref]: https://static.docs.arm.com/101028/0010/ACLE_2019Q4_release-0010.pdf
+//! [tme_intrinsics_intro]: https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics
+//! [llvm_aarch64_int]: https://github.com/llvm/llvm-project/commit/a36d31478c182903523e04eb271bbf102bfab2cc#diff-ff24e1c35f4d54f1110ce5d90c709319R626-R646
+//! [a_profile_future]: https://static.docs.arm.com/ddi0601/a/SysReg_xml_futureA-2019-04.pdf?_ga=2.116560387.441514988.1590524918-1110153136.1588469296
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.aarch64.tstart"]
+    fn aarch64_tstart() -> u64;
+    #[link_name = "llvm.aarch64.tcommit"]
+    fn aarch64_tcommit();
+    #[link_name = "llvm.aarch64.tcancel"]
+    fn aarch64_tcancel(imm0: u64);
+    #[link_name = "llvm.aarch64.ttest"]
+    fn aarch64_ttest() -> u64;
+}
+
+/// Transaction successfully started.
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
+
+/// Extraction mask for failure reason
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
+
+/// Transaction retry is possible.
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_RTRY: u64 = 1 << 15;
+
+/// Transaction executed a TCANCEL instruction
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_CNCL: u64 = 1 << 16;
+
+/// Transaction aborted because a conflict occurred
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_MEM: u64 = 1 << 17;
+
+/// Fallback error type for any other reason
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_IMP: u64 = 1 << 18;
+
+/// Transaction aborted because a non-permissible operation was attempted
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_ERR: u64 = 1 << 19;
+
+/// Transaction aborted due to read or write set limit was exceeded
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_SIZE: u64 = 1 << 20;
+
+/// Transaction aborted due to transactional nesting level was exceeded
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_NEST: u64 = 1 << 21;
+
+/// Transaction aborted due to a debug trap.
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_DBG: u64 = 1 << 22;
+
+/// Transaction failed from interrupt
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_INT: u64 = 1 << 23;
+
+/// Indicates a TRIVIAL version of TM is available
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
+
+// NOTE: Tests for these instructions are disabled on MSVC as dumpbin doesn't
+// understand these instructions.
+
+/// Starts a new transaction. When the transaction starts successfully the return value is 0.
+/// If the transaction fails, all state modifications are discarded and a cause of the failure
+/// is encoded in the return value.
+///
+/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
+#[inline]
+#[target_feature(enable = "tme")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tstart))]
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub unsafe fn __tstart() -> u64 {
+    aarch64_tstart()
+}
+
+/// Commits the current transaction. For a nested transaction, the only effect is that the
+/// transactional nesting depth is decreased. For an outer transaction, the state modifications
+/// performed transactionally are committed to the architectural state.
+///
+/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
+#[inline]
+#[target_feature(enable = "tme")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tcommit))]
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub unsafe fn __tcommit() {
+    aarch64_tcommit()
+}
+
+/// Cancels the current transaction and discards all state modifications that were performed transactionally.
+///
+/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
+#[inline]
+#[target_feature(enable = "tme")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc")),
+    assert_instr(tcancel, IMM16 = 0x0)
+)]
+#[rustc_legacy_const_generics(0)]
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub unsafe fn __tcancel<const IMM16: u64>() {
+    static_assert!(IMM16 <= 65535);
+    aarch64_tcancel(IMM16);
+}
+
+/// Tests if executing inside a transaction. If no transaction is currently executing,
+/// the return value is 0. Otherwise, this intrinsic returns the depth of the transaction.
+///
+/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
+#[inline]
+#[target_feature(enable = "tme")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ttest))]
+#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
+pub unsafe fn __ttest() -> u64 {
+    aarch64_ttest()
+}
+
+#[cfg(test)]
+mod tests {
+    use stdarch_test::simd_test;
+
+    use crate::core_arch::aarch64::*;
+
+    const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
+
+    #[simd_test(enable = "tme")]
+    unsafe fn test_tstart() {
+        let mut x = 0;
+        for i in 0..10 {
+            let code = tme::__tstart();
+            if code == _TMSTART_SUCCESS {
+                x += 1;
+                assert_eq!(x, i + 1);
+                break;
+            }
+            assert_eq!(x, 0);
+        }
+    }
+
+    #[simd_test(enable = "tme")]
+    unsafe fn test_tcommit() {
+        let mut x = 0;
+        for i in 0..10 {
+            let code = tme::__tstart();
+            if code == _TMSTART_SUCCESS {
+                x += 1;
+                assert_eq!(x, i + 1);
+                tme::__tcommit();
+            }
+            assert_eq!(x, i + 1);
+        }
+    }
+
+    #[simd_test(enable = "tme")]
+    unsafe fn test_tcancel() {
+        let mut x = 0;
+
+        for i in 0..10 {
+            let code = tme::__tstart();
+            if code == _TMSTART_SUCCESS {
+                x += 1;
+                assert_eq!(x, i + 1);
+                tme::__tcancel::<CANCEL_CODE>();
+                break;
+            }
+        }
+
+        assert_eq!(x, 0);
+    }
+
+    #[simd_test(enable = "tme")]
+    unsafe fn test_ttest() {
+        for _ in 0..10 {
+            let code = tme::__tstart();
+            if code == _TMSTART_SUCCESS {
+                if tme::__ttest() == 2 {
+                    tme::__tcancel::<CANCEL_CODE>();
+                    break;
+                }
+            }
+        }
+    }
+}
--- a/library/stdarch/crates/core_arch/src/arm/dsp.rs
+++ b/library/stdarch/crates/core_arch/src/arm/dsp.rs
@ -0,0 +1,390 @@
+//! # References:
+//!
+//! - Section 8.3 "16-bit multiplications"
+//!
+//! Intrinsics that could live here:
+//!
+//! - \[x\] __smulbb
+//! - \[x\] __smulbt
+//! - \[x\] __smultb
+//! - \[x\] __smultt
+//! - \[x\] __smulwb
+//! - \[x\] __smulwt
+//! - \[x\] __qadd
+//! - \[x\] __qsub
+//! - \[x\] __qdbl
+//! - \[x\] __smlabb
+//! - \[x\] __smlabt
+//! - \[x\] __smlatb
+//! - \[x\] __smlatt
+//! - \[x\] __smlawb
+//! - \[x\] __smlawt
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.arm.smulbb"]
+    fn arm_smulbb(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smulbt"]
+    fn arm_smulbt(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smultb"]
+    fn arm_smultb(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smultt"]
+    fn arm_smultt(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smulwb"]
+    fn arm_smulwb(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smulwt"]
+    fn arm_smulwt(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.qadd"]
+    fn arm_qadd(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.qsub"]
+    fn arm_qsub(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smlabb"]
+    fn arm_smlabb(a: i32, b: i32, c: i32) -> i32;
+
+    #[link_name = "llvm.arm.smlabt"]
+    fn arm_smlabt(a: i32, b: i32, c: i32) -> i32;
+
+    #[link_name = "llvm.arm.smlatb"]
+    fn arm_smlatb(a: i32, b: i32, c: i32) -> i32;
+
+    #[link_name = "llvm.arm.smlatt"]
+    fn arm_smlatt(a: i32, b: i32, c: i32) -> i32;
+
+    #[link_name = "llvm.arm.smlawb"]
+    fn arm_smlawb(a: i32, b: i32, c: i32) -> i32;
+
+    #[link_name = "llvm.arm.smlawt"]
+    fn arm_smlawt(a: i32, b: i32, c: i32) -> i32;
+}
+
+/// Insert a SMULBB instruction
+///
+/// Returns the equivalent of a\[0\] * b\[0\]
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+#[inline]
+#[cfg_attr(test, assert_instr(smulbb))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smulbb(a: i32, b: i32) -> i32 {
+    arm_smulbb(a, b)
+}
+
+/// Insert a SMULTB instruction
+///
+/// Returns the equivalent of a\[0\] * b\[1\]
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+#[inline]
+#[cfg_attr(test, assert_instr(smultb))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smultb(a: i32, b: i32) -> i32 {
+    arm_smultb(a, b)
+}
+
+/// Insert a SMULTB instruction
+///
+/// Returns the equivalent of a\[1\] * b\[0\]
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+#[inline]
+#[cfg_attr(test, assert_instr(smulbt))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smulbt(a: i32, b: i32) -> i32 {
+    arm_smulbt(a, b)
+}
+
+/// Insert a SMULTT instruction
+///
+/// Returns the equivalent of a\[1\] * b\[1\]
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+#[inline]
+#[cfg_attr(test, assert_instr(smultt))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smultt(a: i32, b: i32) -> i32 {
+    arm_smultt(a, b)
+}
+
+/// Insert a SMULWB instruction
+///
+/// Multiplies the 32-bit signed first operand with the low halfword
+/// (as a 16-bit signed integer) of the second operand.
+/// Return the top 32 bits of the 48-bit product
+#[inline]
+#[cfg_attr(test, assert_instr(smulwb))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smulwb(a: i32, b: i32) -> i32 {
+    arm_smulwb(a, b)
+}
+
+/// Insert a SMULWT instruction
+///
+/// Multiplies the 32-bit signed first operand with the high halfword
+/// (as a 16-bit signed integer) of the second operand.
+/// Return the top 32 bits of the 48-bit product
+#[inline]
+#[cfg_attr(test, assert_instr(smulwt))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smulwt(a: i32, b: i32) -> i32 {
+    arm_smulwt(a, b)
+}
+
+/// Signed saturating addition
+///
+/// Returns the 32-bit saturating signed equivalent of a + b.
+/// Sets the Q flag if saturation occurs.
+#[inline]
+#[cfg_attr(test, assert_instr(qadd))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __qadd(a: i32, b: i32) -> i32 {
+    arm_qadd(a, b)
+}
+
+/// Signed saturating subtraction
+///
+/// Returns the 32-bit saturating signed equivalent of a - b.
+/// Sets the Q flag if saturation occurs.
+#[inline]
+#[cfg_attr(test, assert_instr(qsub))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __qsub(a: i32, b: i32) -> i32 {
+    arm_qsub(a, b)
+}
+
+/// Insert a QADD instruction
+///
+/// Returns the 32-bit saturating signed equivalent of a + a
+/// Sets the Q flag if saturation occurs.
+#[inline]
+#[cfg_attr(test, assert_instr(qadd))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __qdbl(a: i32) -> i32 {
+    arm_qadd(a, a)
+}
+
+/// Insert a SMLABB instruction
+///
+/// Returns the equivalent of a\[0\] * b\[0\] + c
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+/// Sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smlabb))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smlabb(a: i32, b: i32, c: i32) -> i32 {
+    arm_smlabb(a, b, c)
+}
+
+/// Insert a SMLABT instruction
+///
+/// Returns the equivalent of a\[0\] * b\[1\] + c
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+/// Sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smlabt))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smlabt(a: i32, b: i32, c: i32) -> i32 {
+    arm_smlabt(a, b, c)
+}
+
+/// Insert a SMLATB instruction
+///
+/// Returns the equivalent of a\[1\] * b\[0\] + c
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+/// Sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smlatb))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smlatb(a: i32, b: i32, c: i32) -> i32 {
+    arm_smlatb(a, b, c)
+}
+
+/// Insert a SMLATT instruction
+///
+/// Returns the equivalent of a\[1\] * b\[1\] + c
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+/// Sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smlatt))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smlatt(a: i32, b: i32, c: i32) -> i32 {
+    arm_smlatt(a, b, c)
+}
+
+/// Insert a SMLAWB instruction
+///
+/// Returns the equivalent of (a * b\[0\] + (c << 16)) >> 16
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+/// Sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smlawb))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smlawb(a: i32, b: i32, c: i32) -> i32 {
+    arm_smlawb(a, b, c)
+}
+
+/// Insert a SMLAWT instruction
+///
+/// Returns the equivalent of (a * b\[1\] + (c << 16)) >> 16
+/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
+/// Sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smlawt))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smlawt(a: i32, b: i32, c: i32) -> i32 {
+    arm_smlawt(a, b, c)
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core_arch::{
+        arm::*,
+        simd::{i8x4, i16x2, u8x4},
+    };
+    use std::mem::transmute;
+    use stdarch_test::simd_test;
+
+    #[test]
+    fn smulbb() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = i16x2::new(30, 40);
+            assert_eq!(super::__smulbb(transmute(a), transmute(b)), 10 * 30);
+        }
+    }
+
+    #[test]
+    fn smulbt() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = i16x2::new(30, 40);
+            assert_eq!(super::__smulbt(transmute(a), transmute(b)), 10 * 40);
+        }
+    }
+
+    #[test]
+    fn smultb() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = i16x2::new(30, 40);
+            assert_eq!(super::__smultb(transmute(a), transmute(b)), 20 * 30);
+        }
+    }
+
+    #[test]
+    fn smultt() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = i16x2::new(30, 40);
+            assert_eq!(super::__smultt(transmute(a), transmute(b)), 20 * 40);
+        }
+    }
+
+    #[test]
+    fn smulwb() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = 30;
+            assert_eq!(super::__smulwb(transmute(a), b), 20 * b);
+        }
+    }
+
+    #[test]
+    fn smulwt() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = 30;
+            assert_eq!(super::__smulwt(transmute(a), b), (10 * b) >> 16);
+        }
+    }
+
+    #[test]
+    fn qadd() {
+        unsafe {
+            assert_eq!(super::__qadd(-10, 60), 50);
+            assert_eq!(super::__qadd(i32::MAX, 10), i32::MAX);
+            assert_eq!(super::__qadd(i32::MIN, -10), i32::MIN);
+        }
+    }
+
+    #[test]
+    fn qsub() {
+        unsafe {
+            assert_eq!(super::__qsub(10, 60), -50);
+            assert_eq!(super::__qsub(i32::MAX, -10), i32::MAX);
+            assert_eq!(super::__qsub(i32::MIN, 10), i32::MIN);
+        }
+    }
+
+    fn qdbl() {
+        unsafe {
+            assert_eq!(super::__qdbl(10), 20);
+            assert_eq!(super::__qdbl(i32::MAX), i32::MAX);
+        }
+    }
+
+    fn smlabb() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = i16x2::new(30, 40);
+            let c = 50;
+            let r = (10 * 30) + c;
+            assert_eq!(super::__smlabb(transmute(a), transmute(b), c), r);
+        }
+    }
+
+    fn smlabt() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = i16x2::new(30, 40);
+            let c = 50;
+            let r = (10 * 40) + c;
+            assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
+        }
+    }
+
+    fn smlatb() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = i16x2::new(30, 40);
+            let c = 50;
+            let r = (20 * 30) + c;
+            assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
+        }
+    }
+
+    fn smlatt() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = i16x2::new(30, 40);
+            let c = 50;
+            let r = (20 * 40) + c;
+            assert_eq!(super::__smlatt(transmute(a), transmute(b), c), r);
+        }
+    }
+
+    fn smlawb() {
+        unsafe {
+            let a: i32 = 10;
+            let b = i16x2::new(30, 40);
+            let c: i32 = 50;
+            let r: i32 = ((a * 30) + (c << 16)) >> 16;
+            assert_eq!(super::__smlawb(a, transmute(b), c), r);
+        }
+    }
+
+    fn smlawt() {
+        unsafe {
+            let a: i32 = 10;
+            let b = i16x2::new(30, 40);
+            let c: i32 = 50;
+            let r: i32 = ((a * 40) + (c << 16)) >> 16;
+            assert_eq!(super::__smlawt(a, transmute(b), c), r);
+        }
+    }
+}
--- a/library/stdarch/crates/core_arch/src/arm/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm/mod.rs
@ -0,0 +1,66 @@
+//! ARM intrinsics.
+//!
+//! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
+//! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
+//!
+//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
+//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
+
+// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT)
+#[cfg(any(target_feature = "v6", doc))]
+mod sat;
+
+#[cfg(any(target_feature = "v6", doc))]
+#[unstable(feature = "stdarch_arm_sat", issue = "none")]
+pub use self::sat::*;
+
+// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD)
+// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see
+// section 5.4.7)
+// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on
+// '+v5te' rather than on '+dsp'
+#[cfg(any(
+    // >= v5TE but excludes v7-M
+    all(target_feature = "v5te", not(target_feature = "mclass")),
+    // v7E-M
+    all(target_feature = "mclass", target_feature = "dsp"),
+    doc,
+))]
+mod dsp;
+
+#[cfg(any(
+    // >= v5TE but excludes v7-M
+    all(target_feature = "v5te", not(target_feature = "mclass")),
+    // v7E-M
+    all(target_feature = "mclass", target_feature = "dsp"),
+    doc,
+))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub use self::dsp::*;
+
+// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says
+// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated
+#[cfg(any(
+    // v7-A, v7-R
+    all(target_feature = "v6", not(target_feature = "mclass")),
+    // v7E-M
+    all(target_feature = "mclass", target_feature = "dsp"),
+    doc,
+))]
+mod simd32;
+
+#[cfg(any(
+    // v7-A, v7-R
+    all(target_feature = "v6", not(target_feature = "mclass")),
+    // v7E-M
+    all(target_feature = "mclass", target_feature = "dsp"),
+    doc,
+))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub use self::simd32::*;
+
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub use crate::core_arch::arm_shared::*;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
--- a/library/stdarch/crates/core_arch/src/arm/neon.rs
+++ b/library/stdarch/crates/core_arch/src/arm/neon.rs
@ -0,0 +1,136 @@
+use crate::core_arch::arm_shared::neon::*;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.arm.neon.vbsl.v8i8"]
+    fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
+    #[link_name = "llvm.arm.neon.vbsl.v16i8"]
+    fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+}
+
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(0 <= N && N <= 63);
+    transmute(vshiftins_v1i64(
+        transmute(a),
+        transmute(b),
+        int64x1_t::splat(N as i64),
+    ))
+}
+
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(0 <= N && N <= 63);
+    transmute(vshiftins_v2i64(
+        transmute(a),
+        transmute(b),
+        int64x2_t::splat(N as i64),
+    ))
+}
+
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(0 <= N && N <= 63);
+    let a: poly64x2_t = simd_shuffle!(a, a, [0, 1]);
+    let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
+    let ret_val: poly64x2_t = transmute(vshiftins_v2i64(
+        transmute(a),
+        transmute(b),
+        int64x2_t::splat(N as i64),
+    ));
+    simd_shuffle!(ret_val, ret_val, [0, 1])
+}
+
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(1 <= N && N <= 64);
+    transmute(vshiftins_v1i64(
+        transmute(a),
+        transmute(b),
+        int64x1_t::splat(-N as i64),
+    ))
+}
+
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(1 <= N && N <= 64);
+    transmute(vshiftins_v2i64(
+        transmute(a),
+        transmute(b),
+        int64x2_t::splat(-N as i64),
+    ))
+}
+
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(1 <= N && N <= 64);
+    let a: poly64x2_t = simd_shuffle!(a, a, [0, 1]);
+    let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
+    let ret_val: poly64x2_t = transmute(vshiftins_v2i64(
+        transmute(a),
+        transmute(b),
+        int64x2_t::splat(-N as i64),
+    ));
+    simd_shuffle!(ret_val, ret_val, [0, 1])
+}
--- a/library/stdarch/crates/core_arch/src/arm/sat.rs
+++ b/library/stdarch/crates/core_arch/src/arm/sat.rs
@ -0,0 +1,62 @@
+//! # References:
+//!
+//! - Section 8.4 "Saturating intrinsics"
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Saturates a 32-bit signed integer to a signed integer with a given
+/// bit width.
+#[unstable(feature = "stdarch_arm_sat", issue = "none")]
+#[inline]
+#[cfg_attr(test, assert_instr("ssat", WIDTH = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn __ssat<const WIDTH: u32>(x: i32) -> i32 {
+    static_assert!(matches!(WIDTH, 1..=32));
+    arm_ssat(x, WIDTH as i32)
+}
+
+/// Saturates a 32-bit signed integer to an unsigned integer with a given
+/// bit width.
+#[unstable(feature = "stdarch_arm_sat", issue = "none")]
+#[inline]
+#[cfg_attr(test, assert_instr("usat", WIDTH = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn __usat<const WIDTH: u32>(x: i32) -> u32 {
+    static_assert!(matches!(WIDTH, 1..=32));
+    arm_usat(x, WIDTH as i32)
+}
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.arm.ssat"]
+    fn arm_ssat(x: i32, y: i32) -> i32;
+
+    #[link_name = "llvm.arm.usat"]
+    fn arm_usat(x: i32, y: i32) -> u32;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use stdarch_test::simd_test;
+
+    #[test]
+    fn test_ssat() {
+        unsafe {
+            assert_eq!(__ssat::<8>(1), 1);
+            assert_eq!(__ssat::<8>(1000), 127);
+            assert_eq!(__ssat::<8>(-1), -1);
+            assert_eq!(__ssat::<8>(-1000), -128);
+        }
+    }
+
+    #[test]
+    fn test_usat() {
+        unsafe {
+            assert_eq!(__usat::<8>(1), 1);
+            assert_eq!(__usat::<8>(1000), 255);
+            assert_eq!(__usat::<8>(-1), 0);
+            assert_eq!(__usat::<8>(-1000), 0);
+        }
+    }
+}
--- a/library/stdarch/crates/core_arch/src/arm/simd32.rs
+++ b/library/stdarch/crates/core_arch/src/arm/simd32.rs
@ -0,0 +1,765 @@
+//! # References
+//!
+//! - Section 8.5 "32-bit SIMD intrinsics" of ACLE
+//!
+//! Intrinsics that could live here
+//!
+//! - \[x\] __sel
+//! - \[ \] __ssat16
+//! - \[ \] __usat16
+//! - \[ \] __sxtab16
+//! - \[ \] __sxtb16
+//! - \[ \] __uxtab16
+//! - \[ \] __uxtb16
+//! - \[x\] __qadd8
+//! - \[x\] __qsub8
+//! - \[x\] __sadd8
+//! - \[x\] __shadd8
+//! - \[x\] __shsub8
+//! - \[x\] __ssub8
+//! - \[ \] __uadd8
+//! - \[ \] __uhadd8
+//! - \[ \] __uhsub8
+//! - \[ \] __uqadd8
+//! - \[ \] __uqsub8
+//! - \[x\] __usub8
+//! - \[x\] __usad8
+//! - \[x\] __usada8
+//! - \[x\] __qadd16
+//! - \[x\] __qasx
+//! - \[x\] __qsax
+//! - \[x\] __qsub16
+//! - \[x\] __sadd16
+//! - \[x\] __sasx
+//! - \[x\] __shadd16
+//! - \[ \] __shasx
+//! - \[ \] __shsax
+//! - \[x\] __shsub16
+//! - \[ \] __ssax
+//! - \[ \] __ssub16
+//! - \[ \] __uadd16
+//! - \[ \] __uasx
+//! - \[ \] __uhadd16
+//! - \[ \] __uhasx
+//! - \[ \] __uhsax
+//! - \[ \] __uhsub16
+//! - \[ \] __uqadd16
+//! - \[ \] __uqasx
+//! - \[x\] __uqsax
+//! - \[ \] __uqsub16
+//! - \[ \] __usax
+//! - \[ \] __usub16
+//! - \[x\] __smlad
+//! - \[ \] __smladx
+//! - \[ \] __smlald
+//! - \[ \] __smlaldx
+//! - \[x\] __smlsd
+//! - \[ \] __smlsdx
+//! - \[ \] __smlsld
+//! - \[ \] __smlsldx
+//! - \[x\] __smuad
+//! - \[x\] __smuadx
+//! - \[x\] __smusd
+//! - \[x\] __smusdx
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+use crate::mem::transmute;
+
+/// ARM-specific vector of four packed `i8` packed into a 32-bit integer.
+#[allow(non_camel_case_types)]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub type int8x4_t = i32;
+
+/// ARM-specific vector of four packed `u8` packed into a 32-bit integer.
+#[allow(non_camel_case_types)]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub type uint8x4_t = u32;
+
+/// ARM-specific vector of two packed `i16` packed into a 32-bit integer.
+#[allow(non_camel_case_types)]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub type int16x2_t = i32;
+
+/// ARM-specific vector of two packed `u16` packed into a 32-bit integer.
+#[allow(non_camel_case_types)]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub type uint16x2_t = u32;
+
+macro_rules! dsp_call {
+    ($name:expr, $a:expr, $b:expr) => {
+        transmute($name(transmute($a), transmute($b)))
+    };
+}
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.arm.qadd8"]
+    fn arm_qadd8(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.qsub8"]
+    fn arm_qsub8(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.qsub16"]
+    fn arm_qsub16(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.qadd16"]
+    fn arm_qadd16(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.qasx"]
+    fn arm_qasx(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.qsax"]
+    fn arm_qsax(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.sadd16"]
+    fn arm_sadd16(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.sadd8"]
+    fn arm_sadd8(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smlad"]
+    fn arm_smlad(a: i32, b: i32, c: i32) -> i32;
+
+    #[link_name = "llvm.arm.smlsd"]
+    fn arm_smlsd(a: i32, b: i32, c: i32) -> i32;
+
+    #[link_name = "llvm.arm.sasx"]
+    fn arm_sasx(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.sel"]
+    fn arm_sel(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.shadd8"]
+    fn arm_shadd8(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.shadd16"]
+    fn arm_shadd16(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.shsub8"]
+    fn arm_shsub8(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.ssub8"]
+    fn arm_ssub8(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.usub8"]
+    fn arm_usub8(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.shsub16"]
+    fn arm_shsub16(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smuad"]
+    fn arm_smuad(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smuadx"]
+    fn arm_smuadx(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smusd"]
+    fn arm_smusd(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.smusdx"]
+    fn arm_smusdx(a: i32, b: i32) -> i32;
+
+    #[link_name = "llvm.arm.usad8"]
+    fn arm_usad8(a: i32, b: i32) -> u32;
+}
+
+/// Saturating four 8-bit integer additions
+///
+/// Returns the 8-bit signed equivalent of
+///
+/// res\[0\] = a\[0\] + b\[0\]
+/// res\[1\] = a\[1\] + b\[1\]
+/// res\[2\] = a\[2\] + b\[2\]
+/// res\[3\] = a\[3\] + b\[3\]
+#[inline]
+#[cfg_attr(test, assert_instr(qadd8))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
+    dsp_call!(arm_qadd8, a, b)
+}
+
+/// Saturating two 8-bit integer subtraction
+///
+/// Returns the 8-bit signed equivalent of
+///
+/// res\[0\] = a\[0\] - b\[0\]
+/// res\[1\] = a\[1\] - b\[1\]
+/// res\[2\] = a\[2\] - b\[2\]
+/// res\[3\] = a\[3\] - b\[3\]
+#[inline]
+#[cfg_attr(test, assert_instr(qsub8))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
+    dsp_call!(arm_qsub8, a, b)
+}
+
+/// Saturating two 16-bit integer subtraction
+///
+/// Returns the 16-bit signed equivalent of
+///
+/// res\[0\] = a\[0\] - b\[0\]
+/// res\[1\] = a\[1\] - b\[1\]
+#[inline]
+#[cfg_attr(test, assert_instr(qsub16))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
+    dsp_call!(arm_qsub16, a, b)
+}
+
+/// Saturating two 16-bit integer additions
+///
+/// Returns the 16-bit signed equivalent of
+///
+/// res\[0\] = a\[0\] + b\[0\]
+/// res\[1\] = a\[1\] + b\[1\]
+#[inline]
+#[cfg_attr(test, assert_instr(qadd16))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
+    dsp_call!(arm_qadd16, a, b)
+}
+
+/// Returns the 16-bit signed saturated equivalent of
+///
+/// res\[0\] = a\[0\] - b\[1\]
+/// res\[1\] = a\[1\] + b\[0\]
+#[inline]
+#[cfg_attr(test, assert_instr(qasx))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
+    dsp_call!(arm_qasx, a, b)
+}
+
+/// Returns the 16-bit signed saturated equivalent of
+///
+/// res\[0\] = a\[0\] + b\[1\]
+/// res\[1\] = a\[1\] - b\[0\]
+#[inline]
+#[cfg_attr(test, assert_instr(qsax))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t {
+    dsp_call!(arm_qsax, a, b)
+}
+
+/// Returns the 16-bit signed saturated equivalent of
+///
+/// res\[0\] = a\[0\] + b\[1\]
+/// res\[1\] = a\[1\] + b\[0\]
+///
+/// and the GE bits of the APSR are set.
+#[inline]
+#[cfg_attr(test, assert_instr(sadd16))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
+    dsp_call!(arm_sadd16, a, b)
+}
+
+/// Returns the 8-bit signed saturated equivalent of
+///
+/// res\[0\] = a\[0\] + b\[1\]
+/// res\[1\] = a\[1\] + b\[0\]
+/// res\[2\] = a\[2\] + b\[2\]
+/// res\[3\] = a\[3\] + b\[3\]
+///
+/// and the GE bits of the APSR are set.
+#[inline]
+#[cfg_attr(test, assert_instr(sadd8))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
+    dsp_call!(arm_sadd8, a, b)
+}
+
+/// Dual 16-bit Signed Multiply with Addition of products
+/// and 32-bit accumulation.
+///
+/// Returns the 16-bit signed equivalent of
+/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c
+#[inline]
+#[cfg_attr(test, assert_instr(smlad))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
+    arm_smlad(transmute(a), transmute(b), c)
+}
+
+/// Dual 16-bit Signed Multiply with Subtraction  of products
+/// and 32-bit accumulation and overflow detection.
+///
+/// Returns the 16-bit signed equivalent of
+/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c
+#[inline]
+#[cfg_attr(test, assert_instr(smlsd))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
+    arm_smlsd(transmute(a), transmute(b), c)
+}
+
+/// Returns the 16-bit signed equivalent of
+///
+/// res\[0\] = a\[0\] - b\[1\]
+/// res\[1\] = a\[1\] + b\[0\]
+///
+/// and the GE bits of the APSR are set.
+#[inline]
+#[cfg_attr(test, assert_instr(sasx))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
+    dsp_call!(arm_sasx, a, b)
+}
+
+/// Select bytes from each operand according to APSR GE flags
+///
+/// Returns the equivalent of
+///
+/// res\[0\] = GE\[0\] ? a\[0\] : b\[0\]
+/// res\[1\] = GE\[1\] ? a\[1\] : b\[1\]
+/// res\[2\] = GE\[2\] ? a\[2\] : b\[2\]
+/// res\[3\] = GE\[3\] ? a\[3\] : b\[3\]
+///
+/// where GE are bits of APSR
+#[inline]
+#[cfg_attr(test, assert_instr(sel))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __sel(a: int8x4_t, b: int8x4_t) -> int8x4_t {
+    dsp_call!(arm_sel, a, b)
+}
+
+/// Signed halving parallel byte-wise addition.
+///
+/// Returns the 8-bit signed equivalent of
+///
+/// res\[0\] = (a\[0\] + b\[0\]) / 2
+/// res\[1\] = (a\[1\] + b\[1\]) / 2
+/// res\[2\] = (a\[2\] + b\[2\]) / 2
+/// res\[3\] = (a\[3\] + b\[3\]) / 2
+#[inline]
+#[cfg_attr(test, assert_instr(shadd8))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
+    dsp_call!(arm_shadd8, a, b)
+}
+
+/// Signed halving parallel halfword-wise addition.
+///
+/// Returns the 16-bit signed equivalent of
+///
+/// res\[0\] = (a\[0\] + b\[0\]) / 2
+/// res\[1\] = (a\[1\] + b\[1\]) / 2
+#[inline]
+#[cfg_attr(test, assert_instr(shadd16))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
+    dsp_call!(arm_shadd16, a, b)
+}
+
+/// Signed halving parallel byte-wise subtraction.
+///
+/// Returns the 8-bit signed equivalent of
+///
+/// res\[0\] = (a\[0\] - b\[0\]) / 2
+/// res\[1\] = (a\[1\] - b\[1\]) / 2
+/// res\[2\] = (a\[2\] - b\[2\]) / 2
+/// res\[3\] = (a\[3\] - b\[3\]) / 2
+#[inline]
+#[cfg_attr(test, assert_instr(shsub8))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
+    dsp_call!(arm_shsub8, a, b)
+}
+
+/// Inserts a `USUB8` instruction.
+///
+/// Returns the 8-bit unsigned equivalent of
+///
+/// res\[0\] = a\[0\] - a\[0\]
+/// res\[1\] = a\[1\] - a\[1\]
+/// res\[2\] = a\[2\] - a\[2\]
+/// res\[3\] = a\[3\] - a\[3\]
+///
+/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
+/// The GE bits of the APSR are set.
+#[inline]
+#[cfg_attr(test, assert_instr(usub8))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t {
+    dsp_call!(arm_usub8, a, b)
+}
+
+/// Inserts a `SSUB8` instruction.
+///
+/// Returns the 8-bit signed equivalent of
+///
+/// res\[0\] = a\[0\] - a\[0\]
+/// res\[1\] = a\[1\] - a\[1\]
+/// res\[2\] = a\[2\] - a\[2\]
+/// res\[3\] = a\[3\] - a\[3\]
+///
+/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
+/// The GE bits of the APSR are set.
+#[inline]
+#[cfg_attr(test, assert_instr(ssub8))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __ssub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
+    dsp_call!(arm_ssub8, a, b)
+}
+
+/// Signed halving parallel halfword-wise subtraction.
+///
+/// Returns the 16-bit signed equivalent of
+///
+/// res\[0\] = (a\[0\] - b\[0\]) / 2
+/// res\[1\] = (a\[1\] - b\[1\]) / 2
+#[inline]
+#[cfg_attr(test, assert_instr(shsub16))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
+    dsp_call!(arm_shsub16, a, b)
+}
+
+/// Signed Dual Multiply Add.
+///
+/// Returns the equivalent of
+///
+/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\]
+///
+/// and sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smuad))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smuad(a: int16x2_t, b: int16x2_t) -> i32 {
+    arm_smuad(transmute(a), transmute(b))
+}
+
+/// Signed Dual Multiply Add Reversed.
+///
+/// Returns the equivalent of
+///
+/// res = a\[0\] * b\[1\] + a\[1\] * b\[0\]
+///
+/// and sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smuadx))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smuadx(a: int16x2_t, b: int16x2_t) -> i32 {
+    arm_smuadx(transmute(a), transmute(b))
+}
+
+/// Signed Dual Multiply Subtract.
+///
+/// Returns the equivalent of
+///
+/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\]
+///
+/// and sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smusd))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smusd(a: int16x2_t, b: int16x2_t) -> i32 {
+    arm_smusd(transmute(a), transmute(b))
+}
+
+/// Signed Dual Multiply Subtract Reversed.
+///
+/// Returns the equivalent of
+///
+/// res = a\[0\] * b\[1\] - a\[1\] * b\[0\]
+///
+/// and sets the Q flag if overflow occurs on the addition.
+#[inline]
+#[cfg_attr(test, assert_instr(smusdx))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __smusdx(a: int16x2_t, b: int16x2_t) -> i32 {
+    arm_smusdx(transmute(a), transmute(b))
+}
+
+/// Sum of 8-bit absolute differences.
+///
+/// Returns the 8-bit unsigned equivalent of
+///
+/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
+///          (a\[2\] - b\[2\]) + (a\[3\] - b\[3\])
+#[inline]
+#[cfg_attr(test, assert_instr(usad8))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __usad8(a: int8x4_t, b: int8x4_t) -> u32 {
+    arm_usad8(transmute(a), transmute(b))
+}
+
+/// Sum of 8-bit absolute differences and constant.
+///
+/// Returns the 8-bit unsigned equivalent of
+///
+/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
+///          (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c
+#[inline]
+#[cfg_attr(test, assert_instr(usad8))]
+#[unstable(feature = "stdarch_arm_dsp", issue = "117237")]
+pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 {
+    __usad8(a, b) + c
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core_arch::simd::{i8x4, i16x2, u8x4};
+    use std::mem::transmute;
+    use stdarch_test::simd_test;
+
+    #[test]
+    fn qadd8() {
+        unsafe {
+            let a = i8x4::new(1, 2, 3, i8::MAX);
+            let b = i8x4::new(2, -1, 0, 1);
+            let c = i8x4::new(3, 1, 3, i8::MAX);
+            let r: i8x4 = dsp_call!(super::__qadd8, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn qsub8() {
+        unsafe {
+            let a = i8x4::new(1, 2, 3, i8::MIN);
+            let b = i8x4::new(2, -1, 0, 1);
+            let c = i8x4::new(-1, 3, 3, i8::MIN);
+            let r: i8x4 = dsp_call!(super::__qsub8, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn qadd16() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(2, -1);
+            let c = i16x2::new(3, 1);
+            let r: i16x2 = dsp_call!(super::__qadd16, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn qsub16() {
+        unsafe {
+            let a = i16x2::new(10, 20);
+            let b = i16x2::new(20, -10);
+            let c = i16x2::new(-10, 30);
+            let r: i16x2 = dsp_call!(super::__qsub16, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn qasx() {
+        unsafe {
+            let a = i16x2::new(1, i16::MAX);
+            let b = i16x2::new(2, 2);
+            let c = i16x2::new(-1, i16::MAX);
+            let r: i16x2 = dsp_call!(super::__qasx, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn qsax() {
+        unsafe {
+            let a = i16x2::new(1, i16::MAX);
+            let b = i16x2::new(2, 2);
+            let c = i16x2::new(3, i16::MAX - 2);
+            let r: i16x2 = dsp_call!(super::__qsax, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn sadd16() {
+        unsafe {
+            let a = i16x2::new(1, i16::MAX);
+            let b = i16x2::new(2, 2);
+            let c = i16x2::new(3, -i16::MAX);
+            let r: i16x2 = dsp_call!(super::__sadd16, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn sadd8() {
+        unsafe {
+            let a = i8x4::new(1, 2, 3, i8::MAX);
+            let b = i8x4::new(4, 3, 2, 2);
+            let c = i8x4::new(5, 5, 5, -i8::MAX);
+            let r: i8x4 = dsp_call!(super::__sadd8, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn sasx() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(2, 1);
+            let c = i16x2::new(0, 4);
+            let r: i16x2 = dsp_call!(super::__sasx, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn smlad() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(3, 4);
+            let r = super::__smlad(transmute(a), transmute(b), 10);
+            assert_eq!(r, (1 * 3) + (2 * 4) + 10);
+        }
+    }
+
+    #[test]
+    fn smlsd() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(3, 4);
+            let r = super::__smlsd(transmute(a), transmute(b), 10);
+            assert_eq!(r, ((1 * 3) - (2 * 4)) + 10);
+        }
+    }
+
+    #[test]
+    fn sel() {
+        unsafe {
+            let a = i8x4::new(1, 2, 3, i8::MAX);
+            let b = i8x4::new(4, 3, 2, 2);
+            // call sadd8() to set GE bits
+            super::__sadd8(transmute(a), transmute(b));
+            let c = i8x4::new(1, 2, 3, i8::MAX);
+            let r: i8x4 = dsp_call!(super::__sel, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn shadd8() {
+        unsafe {
+            let a = i8x4::new(1, 2, 3, 4);
+            let b = i8x4::new(5, 4, 3, 2);
+            let c = i8x4::new(3, 3, 3, 3);
+            let r: i8x4 = dsp_call!(super::__shadd8, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn shadd16() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(5, 4);
+            let c = i16x2::new(3, 3);
+            let r: i16x2 = dsp_call!(super::__shadd16, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn shsub8() {
+        unsafe {
+            let a = i8x4::new(1, 2, 3, 4);
+            let b = i8x4::new(5, 4, 3, 2);
+            let c = i8x4::new(-2, -1, 0, 1);
+            let r: i8x4 = dsp_call!(super::__shsub8, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn ssub8() {
+        unsafe {
+            let a = i8x4::new(1, 2, 3, 4);
+            let b = i8x4::new(5, 4, 3, 2);
+            let c = i8x4::new(-4, -2, 0, 2);
+            let r: i8x4 = dsp_call!(super::__ssub8, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn usub8() {
+        unsafe {
+            let a = u8x4::new(1, 2, 3, 4);
+            let b = u8x4::new(5, 4, 3, 2);
+            let c = u8x4::new(252, 254, 0, 2);
+            let r: u8x4 = dsp_call!(super::__usub8, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn shsub16() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(5, 4);
+            let c = i16x2::new(-2, -1);
+            let r: i16x2 = dsp_call!(super::__shsub16, a, b);
+            assert_eq!(r, c);
+        }
+    }
+
+    #[test]
+    fn smuad() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(5, 4);
+            let r = super::__smuad(transmute(a), transmute(b));
+            assert_eq!(r, 13);
+        }
+    }
+
+    #[test]
+    fn smuadx() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(5, 4);
+            let r = super::__smuadx(transmute(a), transmute(b));
+            assert_eq!(r, 14);
+        }
+    }
+
+    #[test]
+    fn smusd() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(5, 4);
+            let r = super::__smusd(transmute(a), transmute(b));
+            assert_eq!(r, -3);
+        }
+    }
+
+    #[test]
+    fn smusdx() {
+        unsafe {
+            let a = i16x2::new(1, 2);
+            let b = i16x2::new(5, 4);
+            let r = super::__smusdx(transmute(a), transmute(b));
+            assert_eq!(r, -6);
+        }
+    }
+
+    #[test]
+    fn usad8() {
+        unsafe {
+            let a = i8x4::new(1, 2, 3, 4);
+            let b = i8x4::new(4, 3, 2, 1);
+            let r = super::__usad8(transmute(a), transmute(b));
+            assert_eq!(r, 8);
+        }
+    }
+
+    #[test]
+    fn usad8a() {
+        unsafe {
+            let a = i8x4::new(1, 2, 3, 4);
+            let b = i8x4::new(4, 3, 2, 1);
+            let c = 10;
+            let r = super::__usada8(transmute(a), transmute(b), c);
+            assert_eq!(r, 8 + c);
+        }
+    }
+}
--- a/library/stdarch/crates/core_arch/src/arm_shared/barrier/common.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/common.rs
@ -0,0 +1,16 @@
+//! Access types available on all architectures
+
+/// Full system is the required shareability domain, reads and writes are the
+/// required access types
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct SY;
+
+dmb_dsb!(SY);
+
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+impl super::super::sealed::Isb for SY {
+    #[inline(always)]
+    unsafe fn __isb(&self) {
+        super::isb(super::arg::SY)
+    }
+}
--- a/library/stdarch/crates/core_arch/src/arm_shared/barrier/cp15.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/cp15.rs
@ -0,0 +1,45 @@
+// Reference: ARM11 MPCore Processor Technical Reference Manual (ARM DDI 0360E) Section 3.5 "Summary
+// of CP15 instructions"
+
+use crate::arch::asm;
+
+/// Full system is the required shareability domain, reads and writes are the
+/// required access types
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct SY;
+
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+impl super::super::sealed::Dmb for SY {
+    #[inline(always)]
+    unsafe fn __dmb(&self) {
+        asm!(
+            "mcr p15, 0, {}, c7, c10, 5",
+            in(reg) 0_u32,
+            options(preserves_flags, nostack)
+        )
+    }
+}
+
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+impl super::super::sealed::Dsb for SY {
+    #[inline(always)]
+    unsafe fn __dsb(&self) {
+        asm!(
+            "mcr p15, 0, {}, c7, c10, 4",
+            in(reg) 0_u32,
+            options(preserves_flags, nostack)
+        )
+    }
+}
+
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+impl super::super::sealed::Isb for SY {
+    #[inline(always)]
+    unsafe fn __isb(&self) {
+        asm!(
+            "mcr p15, 0, {}, c7, c5, 4",
+            in(reg) 0_u32,
+            options(preserves_flags, nostack)
+        )
+    }
+}
--- a/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/mod.rs
@ -0,0 +1,185 @@
+// Reference: Section 7.4 "Hints" of ACLE
+
+// CP15 instruction
+#[cfg(not(any(
+    // v8
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    // v7
+    target_feature = "v7",
+    // v6-M
+    target_feature = "mclass"
+)))]
+mod cp15;
+
+#[cfg(not(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    target_feature = "mclass"
+)))]
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub use self::cp15::*;
+
+// Dedicated instructions
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    target_feature = "mclass"
+))]
+macro_rules! dmb_dsb {
+    ($A:ident) => {
+        #[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+        impl super::super::sealed::Dmb for $A {
+            #[inline(always)]
+            unsafe fn __dmb(&self) {
+                super::dmb(super::arg::$A)
+            }
+        }
+
+        #[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+        impl super::super::sealed::Dsb for $A {
+            #[inline(always)]
+            unsafe fn __dsb(&self) {
+                super::dsb(super::arg::$A)
+            }
+        }
+    };
+}
+
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    target_feature = "mclass"
+))]
+mod common;
+
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    target_feature = "mclass"
+))]
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub use self::common::*;
+
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+))]
+mod not_mclass;
+
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+))]
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub use self::not_mclass::*;
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+mod v8;
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub use self::v8::*;
+
+/// Generates a DMB (data memory barrier) instruction or equivalent CP15 instruction.
+///
+/// DMB ensures the observed ordering of memory accesses. Memory accesses of the specified type
+/// issued before the DMB are guaranteed to be observed (in the specified scope) before memory
+/// accesses issued after the DMB.
+///
+/// For example, DMB should be used between storing data, and updating a flag variable that makes
+/// that data available to another core.
+///
+/// The __dmb() intrinsic also acts as a compiler memory barrier of the appropriate type.
+#[inline(always)]
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub unsafe fn __dmb<A>(arg: A)
+where
+    A: super::sealed::Dmb,
+{
+    arg.__dmb()
+}
+
+/// Generates a DSB (data synchronization barrier) instruction or equivalent CP15 instruction.
+///
+/// DSB ensures the completion of memory accesses. A DSB behaves as the equivalent DMB and has
+/// additional properties. After a DSB instruction completes, all memory accesses of the specified
+/// type issued before the DSB are guaranteed to have completed.
+///
+/// The __dsb() intrinsic also acts as a compiler memory barrier of the appropriate type.
+#[inline(always)]
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub unsafe fn __dsb<A>(arg: A)
+where
+    A: super::sealed::Dsb,
+{
+    arg.__dsb()
+}
+
+/// Generates an ISB (instruction synchronization barrier) instruction or equivalent CP15
+/// instruction.
+///
+/// This instruction flushes the processor pipeline fetch buffers, so that following instructions
+/// are fetched from cache or memory.
+///
+/// An ISB is needed after some system maintenance operations. An ISB is also needed before
+/// transferring control to code that has been loaded or modified in memory, for example by an
+/// overlay mechanism or just-in-time code generator.  (Note that if instruction and data caches are
+/// separate, privileged cache maintenance operations would be needed in order to unify the caches.)
+///
+/// The only supported argument for the __isb() intrinsic is 15, corresponding to the SY (full
+/// system) scope of the ISB instruction.
+#[inline(always)]
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub unsafe fn __isb<A>(arg: A)
+where
+    A: super::sealed::Isb,
+{
+    arg.__isb()
+}
+
+unsafe extern "unadjusted" {
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.dmb"
+    )]
+    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")]
+    fn dmb(_: i32);
+
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.dsb"
+    )]
+    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")]
+    fn dsb(_: i32);
+
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.isb"
+    )]
+    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")]
+    fn isb(_: i32);
+}
+
+// we put these in a module to prevent weirdness with glob re-exports
+mod arg {
+    // See Section 7.3  Memory barriers of ACLE
+    pub const SY: i32 = 15;
+    pub const ST: i32 = 14;
+    pub const LD: i32 = 13;
+    pub const ISH: i32 = 11;
+    pub const ISHST: i32 = 10;
+    pub const ISHLD: i32 = 9;
+    pub const NSH: i32 = 7;
+    pub const NSHST: i32 = 6;
+    pub const NSHLD: i32 = 5;
+    pub const OSH: i32 = 3;
+    pub const OSHST: i32 = 2;
+    pub const OSHLD: i32 = 1;
+}
--- a/library/stdarch/crates/core_arch/src/arm_shared/barrier/not_mclass.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/not_mclass.rs
@ -0,0 +1,50 @@
+//! Access types available on v7 and v8 but not on v7(E)-M or v8-M
+
+/// Full system is the required shareability domain, writes are the required
+/// access type
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct ST;
+
+dmb_dsb!(ST);
+
+/// Inner Shareable is the required shareability domain, reads and writes are
+/// the required access types
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct ISH;
+
+dmb_dsb!(ISH);
+
+/// Inner Shareable is the required shareability domain, writes are the required
+/// access type
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct ISHST;
+
+dmb_dsb!(ISHST);
+
+/// Non-shareable is the required shareability domain, reads and writes are the
+/// required access types
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct NSH;
+
+dmb_dsb!(NSH);
+
+/// Non-shareable is the required shareability domain, writes are the required
+/// access type
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct NSHST;
+
+dmb_dsb!(NSHST);
+
+/// Outer Shareable is the required shareability domain, reads and writes are
+/// the required access types
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct OSH;
+
+dmb_dsb!(OSH);
+
+/// Outer Shareable is the required shareability domain, writes are the required
+/// access type
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct OSHST;
+
+dmb_dsb!(OSHST);
--- a/library/stdarch/crates/core_arch/src/arm_shared/barrier/v8.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/barrier/v8.rs
@ -0,0 +1,27 @@
+/// Full system is the required shareability domain, reads are the required
+/// access type
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct LD;
+
+dmb_dsb!(LD);
+
+/// Inner Shareable is the required shareability domain, reads are the required
+/// access type
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct ISHLD;
+
+dmb_dsb!(ISHLD);
+
+/// Non-shareable is the required shareability domain, reads are the required
+/// access type
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct NSHLD;
+
+dmb_dsb!(NSHLD);
+
+/// Outer Shareable is the required shareability domain, reads are the required
+/// access type
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub struct OSHLD;
+
+dmb_dsb!(OSHLD);
--- a/library/stdarch/crates/core_arch/src/arm_shared/hints.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/hints.rs
@ -0,0 +1,125 @@
+// # References
+//
+// - Section 7.4 "Hints" of ACLE
+// - Section 7.7 "NOP" of ACLE
+
+/// Generates a WFI (wait for interrupt) hint instruction, or nothing.
+///
+/// The WFI instruction allows (but does not require) the processor to enter a
+/// low-power state until one of a number of asynchronous events occurs.
+// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
+// LLVM says "instruction requires: armv6k"
+#[cfg(any(
+    target_feature = "v6",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
+#[inline(always)]
+#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
+pub unsafe fn __wfi() {
+    hint(HINT_WFI);
+}
+
+/// Generates a WFE (wait for event) hint instruction, or nothing.
+///
+/// The WFE instruction allows (but does not require) the processor to enter a
+/// low-power state until some event occurs such as a SEV being issued by
+/// another processor.
+// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
+// LLVM says "instruction requires: armv6k"
+#[cfg(any(
+    target_feature = "v6",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
+#[inline(always)]
+#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
+pub unsafe fn __wfe() {
+    hint(HINT_WFE);
+}
+
+/// Generates a SEV (send a global event) hint instruction.
+///
+/// This causes an event to be signaled to all processors in a multiprocessor
+/// system. It is a NOP on a uniprocessor system.
+// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M
+// LLVM says "instruction requires: armv6k"
+#[cfg(any(
+    target_feature = "v6",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
+#[inline(always)]
+#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
+pub unsafe fn __sev() {
+    hint(HINT_SEV);
+}
+
+/// Generates a send a local event hint instruction.
+///
+/// This causes an event to be signaled to only the processor executing this
+/// instruction. In a multiprocessor system, it is not required to affect the
+/// other processors.
+// LLVM says "instruction requires: armv8"
+#[cfg(any(
+    target_feature = "v8", // 32-bit ARMv8
+    target_arch = "aarch64", // AArch64
+    target_arch = "arm64ec", // Arm64EC
+    doc,
+))]
+#[inline(always)]
+#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
+pub unsafe fn __sevl() {
+    hint(HINT_SEVL);
+}
+
+/// Generates a YIELD hint instruction.
+///
+/// This enables multithreading software to indicate to the hardware that it is
+/// performing a task, for example a spin-lock, that could be swapped out to
+/// improve overall system performance.
+// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
+// LLVM says "instruction requires: armv6k"
+#[cfg(any(
+    target_feature = "v6",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
+#[inline(always)]
+#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
+pub unsafe fn __yield() {
+    hint(HINT_YIELD);
+}
+
+/// Generates an unspecified no-op instruction.
+///
+/// Note that not all architectures provide a distinguished NOP instruction. On
+/// those that do, it is unspecified whether this intrinsic generates it or
+/// another instruction. It is not guaranteed that inserting this instruction
+/// will increase execution time.
+#[inline(always)]
+#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
+pub unsafe fn __nop() {
+    crate::arch::asm!("nop", options(nomem, nostack, preserves_flags));
+}
+
+unsafe extern "unadjusted" {
+    #[cfg_attr(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        link_name = "llvm.aarch64.hint"
+    )]
+    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")]
+    fn hint(_: i32);
+}
+
+// from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td
+const HINT_NOP: i32 = 0;
+const HINT_YIELD: i32 = 1;
+const HINT_WFE: i32 = 2;
+const HINT_WFI: i32 = 3;
+const HINT_SEV: i32 = 4;
+const HINT_SEVL: i32 = 5;
--- a/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
@ -0,0 +1,117 @@
+//! ARM C Language Extensions (ACLE)
+//!
+//! # Developer notes
+//!
+//! Below is a list of built-in targets that are representative of the different ARM
+//! architectures; the list includes the `target_feature`s they possess.
+//!
+//! - `armv4t-unknown-linux-gnueabi` - **ARMv4** - `+v4t`
+//! - `armv5te-unknown-linux-gnueabi` - **ARMv5TE** - `+v4t +v5te`
+//! - `arm-unknown-linux-gnueabi` - **ARMv6** - `+v4t +v5te +v6`
+//! - `thumbv6m-none-eabi` - **ARMv6-M** - `+v4t +v5te +v6 +thumb-mode +mclass`
+//! - `armv7-unknown-linux-gnueabihf` - **ARMv7-A** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +aclass`
+//! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2  +v7 +dsp +thumb2 +rclass`
+//! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
+//! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass`
+//! - `thumbv8m.main-none-eabi` - **ARMv8-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
+//! - `armv8r-none-eabi` - **ARMv8-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +v8 +thumb2 +rclass`
+//! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon`
+//!
+//! Section 10.1 of ACLE says:
+//!
+//! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes
+//! its predecessor instruction set."
+//!
+//! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes
+//! its predecessor instruction set."
+//!
+//! From that info and from looking at how LLVM features work (using custom targets) we can identify
+//! features that are subsets of others:
+//!
+//! Legend: `a < b` reads as "`a` is a subset of `b`"; this means that if `b` is enabled then `a` is
+//! enabled as well.
+//!
+//! - `v4t < v5te < v6 < v6k < v6t2 < v7 < v8`
+//! - `v6 < v8m < v6t2`
+//! - `v7 < v8m.main`
+//!
+//! *NOTE*: Section 5.4.7 of ACLE says:
+//!
+//! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the
+//! intrinsics defined in Saturating intrinsics are available."
+//!
+//! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te
+//! targets so we have to work around this difference.
+//!
+//! # References
+//!
+//! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest)
+
+#![cfg_attr(
+    all(target_arch = "aarch64", target_abi = "softfloat"),
+    // Just allow the warning: anyone soundly using the intrinsics has to enable
+    // the target feature, and that will generate a warning for them.
+    allow(aarch64_softfloat_neon)
+)]
+// Only for 'neon' submodule
+#![allow(non_camel_case_types)]
+
+// 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported
+// via CP15 instructions. See Section 10.1 of ACLE
+mod barrier;
+#[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+pub use self::barrier::*;
+
+mod hints;
+#[unstable(feature = "stdarch_arm_hints", issue = "117218")]
+pub use self::hints::*;
+
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    doc
+))]
+pub(crate) mod neon;
+
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    doc
+))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub use self::neon::*;
+
+#[cfg(test)]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    target_feature = "v7",
+    doc
+))]
+pub(crate) mod test_support;
+
+mod sealed {
+    #[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+    pub trait Dmb {
+        unsafe fn __dmb(&self);
+    }
+
+    #[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+    pub trait Dsb {
+        unsafe fn __dsb(&self);
+    }
+
+    #[unstable(feature = "stdarch_arm_barrier", issue = "117219")]
+    pub trait Isb {
+        unsafe fn __isb(&self);
+    }
+}
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs
@ -0,0 +1,206 @@
+//! Tests for ARM+v7+neon load (vld1) intrinsics.
+//!
+//! These are included in `{arm, aarch64}::neon`.
+
+use super::*;
+
+#[cfg(target_arch = "arm")]
+use crate::core_arch::arm::*;
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+use crate::core_arch::aarch64::*;
+
+use crate::core_arch::simd::*;
+use std::mem;
+use stdarch_test::simd_test;
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_s8() {
+    let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+    let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+    let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_s8() {
+    let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+    let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_s16() {
+    let a: [i16; 5] = [0, 1, 2, 3, 4];
+    let e = i16x4::new(1, 2, 3, 4);
+    let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_s16() {
+    let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+    let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+    let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_s32() {
+    let a: [i32; 3] = [0, 1, 2];
+    let e = i32x2::new(1, 2);
+    let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_s32() {
+    let a: [i32; 5] = [0, 1, 2, 3, 4];
+    let e = i32x4::new(1, 2, 3, 4);
+    let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_s64() {
+    let a: [i64; 2] = [0, 1];
+    let e = i64x1::new(1);
+    let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_s64() {
+    let a: [i64; 3] = [0, 1, 2];
+    let e = i64x2::new(1, 2);
+    let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_u8() {
+    let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+    let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+    let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_u8() {
+    let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+    let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_u16() {
+    let a: [u16; 5] = [0, 1, 2, 3, 4];
+    let e = u16x4::new(1, 2, 3, 4);
+    let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_u16() {
+    let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+    let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+    let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_u32() {
+    let a: [u32; 3] = [0, 1, 2];
+    let e = u32x2::new(1, 2);
+    let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_u32() {
+    let a: [u32; 5] = [0, 1, 2, 3, 4];
+    let e = u32x4::new(1, 2, 3, 4);
+    let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_u64() {
+    let a: [u64; 2] = [0, 1];
+    let e = u64x1::new(1);
+    let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_u64() {
+    let a: [u64; 3] = [0, 1, 2];
+    let e = u64x2::new(1, 2);
+    let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_p8() {
+    let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+    let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+    let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_p8() {
+    let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+    let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_p16() {
+    let a: [p16; 5] = [0, 1, 2, 3, 4];
+    let e = u16x4::new(1, 2, 3, 4);
+    let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_p16() {
+    let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+    let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+    let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon,aes")]
+unsafe fn test_vld1_p64() {
+    let a: [p64; 2] = [0, 1];
+    let e = u64x1::new(1);
+    let r: u64x1 = transmute(vld1_p64(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon,aes")]
+unsafe fn test_vld1q_p64() {
+    let a: [p64; 3] = [0, 1, 2];
+    let e = u64x2::new(1, 2);
+    let r: u64x2 = transmute(vld1q_p64(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1_f32() {
+    let a: [f32; 3] = [0., 1., 2.];
+    let e = f32x2::new(1., 2.);
+    let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vld1q_f32() {
+    let a: [f32; 5] = [0., 1., 2., 3., 4.];
+    let e = f32x4::new(1., 2., 3., 4.);
+    let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr()));
+    assert_eq!(r, e)
+}
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
@ -0,0 +1,93 @@
+//! Tests for ARM+v7+neon shift and insert (vsli[q]_n, vsri[q]_n) intrinsics.
+//!
+//! These are included in `{arm, aarch64}::neon`.
+
+use super::*;
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+use crate::core_arch::aarch64::*;
+
+#[cfg(target_arch = "arm")]
+use crate::core_arch::arm::*;
+
+use crate::core_arch::simd::*;
+use std::mem::transmute;
+use stdarch_test::simd_test;
+
+macro_rules! test_vsli {
+    ($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => {
+        #[simd_test(enable = "neon")]
+        #[allow(unused_assignments)]
+        unsafe fn $test_id() {
+            let a = [$($a as $t),*];
+            let b = [$($b as $t),*];
+            let n_bit_mask: $t = (1 << $n) - 1;
+            let e = [$(($a as $t & n_bit_mask) | (($b as $t) << $n)),*];
+            let r = $fn_id::<$n>(transmute(a), transmute(b));
+            let mut d = e;
+            d = transmute(r);
+            assert_eq!(d, e);
+        }
+    }
+}
+test_vsli!(test_vsli_n_s8, i8 => vsli_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5));
+test_vsli!(test_vsliq_n_s8, i8 => vsliq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2));
+test_vsli!(test_vsli_n_s16, i16 => vsli_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7));
+test_vsli!(test_vsliq_n_s16, i16 => vsliq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14));
+test_vsli!(test_vsli_n_s32, i32 => vsli_n_s32([125683, -78901], [-128, -112944], 23));
+test_vsli!(test_vsliq_n_s32, i32 => vsliq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15));
+test_vsli!(test_vsli_n_s64, i64 => vsli_n_s64([-333333], [1028], 45));
+test_vsli!(test_vsliq_n_s64, i64 => vsliq_n_s64([-333333, -52023], [1028, -99814], 33));
+test_vsli!(test_vsli_n_u8, u8 => vsli_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
+test_vsli!(test_vsliq_n_u8, u8 => vsliq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
+test_vsli!(test_vsli_n_u16, u16 => vsli_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
+test_vsli!(test_vsliq_n_u16, u16 => vsliq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
+test_vsli!(test_vsli_n_u32, u32 => vsli_n_u32([125683, 78901], [128, 112944], 23));
+test_vsli!(test_vsliq_n_u32, u32 => vsliq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15));
+test_vsli!(test_vsli_n_u64, u64 => vsli_n_u64([333333], [1028], 45));
+test_vsli!(test_vsliq_n_u64, u64 => vsliq_n_u64([333333, 52023], [1028, 99814], 33));
+test_vsli!(test_vsli_n_p8, i8 => vsli_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
+test_vsli!(test_vsliq_n_p8, i8 => vsliq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
+test_vsli!(test_vsli_n_p16, i16 => vsli_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
+test_vsli!(test_vsliq_n_p16, i16 => vsliq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
+//test_vsli!(test_vsli_n_p64, i64 => vsli_n_p64([333333], [1028], 45));
+//test_vsli!(test_vsliq_n_p64, i64 => vsliq_n_p64([333333, 52023], [1028, 99814], 33));
+
+macro_rules! test_vsri {
+    ($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => {
+        #[simd_test(enable = "neon")]
+        #[allow(unused_assignments)]
+        unsafe fn $test_id() {
+            let a = [$($a as $t),*];
+            let b = [$($b as $t),*];
+            let n_bit_mask = (((1 as $t) << $n) - 1).rotate_right($n);
+            let e = [$(($a as $t & n_bit_mask) | (($b as $t >> $n) & !n_bit_mask)),*];
+            let r = $fn_id::<$n>(transmute(a), transmute(b));
+            let mut d = e;
+            d = transmute(r);
+            assert_eq!(d, e);
+        }
+    }
+}
+test_vsri!(test_vsri_n_s8, i8 => vsri_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5));
+test_vsri!(test_vsriq_n_s8, i8 => vsriq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2));
+test_vsri!(test_vsri_n_s16, i16 => vsri_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7));
+test_vsri!(test_vsriq_n_s16, i16 => vsriq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14));
+test_vsri!(test_vsri_n_s32, i32 => vsri_n_s32([125683, -78901], [-128, -112944], 23));
+test_vsri!(test_vsriq_n_s32, i32 => vsriq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15));
+test_vsri!(test_vsri_n_s64, i64 => vsri_n_s64([-333333], [1028], 45));
+test_vsri!(test_vsriq_n_s64, i64 => vsriq_n_s64([-333333, -52023], [1028, -99814], 33));
+test_vsri!(test_vsri_n_u8, u8 => vsri_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
+test_vsri!(test_vsriq_n_u8, u8 => vsriq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
+test_vsri!(test_vsri_n_u16, u16 => vsri_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
+test_vsri!(test_vsriq_n_u16, u16 => vsriq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
+test_vsri!(test_vsri_n_u32, u32 => vsri_n_u32([125683, 78901], [128, 112944], 23));
+test_vsri!(test_vsriq_n_u32, u32 => vsriq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15));
+test_vsri!(test_vsri_n_u64, u64 => vsri_n_u64([333333], [1028], 45));
+test_vsri!(test_vsriq_n_u64, u64 => vsriq_n_u64([333333, 52023], [1028, 99814], 33));
+test_vsri!(test_vsri_n_p8, i8 => vsri_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5));
+test_vsri!(test_vsriq_n_p8, i8 => vsriq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
+test_vsri!(test_vsri_n_p16, i16 => vsri_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
+test_vsri!(test_vsriq_n_p16, i16 => vsriq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
+//test_vsri!(test_vsri_n_p64, i64 => vsri_n_p64([333333], [1028], 45));
+//test_vsri!(test_vsriq_n_p64, i64 => vsriq_n_p64([333333, 52023], [1028, 99814], 33));
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs
@ -0,0 +1,389 @@
+//! Tests for ARM+v7+neon store (vst1) intrinsics.
+//!
+//! These are included in `{arm, aarch64}::neon`.
+
+use super::*;
+
+#[cfg(target_arch = "arm")]
+use crate::core_arch::arm::*;
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+use crate::core_arch::aarch64::*;
+
+use crate::core_arch::simd::*;
+use stdarch_test::simd_test;
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_s8() {
+    let mut vals = [0_i8; 9];
+    let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+
+    vst1_s8(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+    assert_eq!(vals[5], 5);
+    assert_eq!(vals[6], 6);
+    assert_eq!(vals[7], 7);
+    assert_eq!(vals[8], 8);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_s8() {
+    let mut vals = [0_i8; 17];
+    let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+
+    vst1q_s8(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+    assert_eq!(vals[5], 5);
+    assert_eq!(vals[6], 6);
+    assert_eq!(vals[7], 7);
+    assert_eq!(vals[8], 8);
+    assert_eq!(vals[9], 9);
+    assert_eq!(vals[10], 10);
+    assert_eq!(vals[11], 11);
+    assert_eq!(vals[12], 12);
+    assert_eq!(vals[13], 13);
+    assert_eq!(vals[14], 14);
+    assert_eq!(vals[15], 15);
+    assert_eq!(vals[16], 16);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_s16() {
+    let mut vals = [0_i16; 5];
+    let a = i16x4::new(1, 2, 3, 4);
+
+    vst1_s16(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_s16() {
+    let mut vals = [0_i16; 9];
+    let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+
+    vst1q_s16(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+    assert_eq!(vals[5], 5);
+    assert_eq!(vals[6], 6);
+    assert_eq!(vals[7], 7);
+    assert_eq!(vals[8], 8);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_s32() {
+    let mut vals = [0_i32; 3];
+    let a = i32x2::new(1, 2);
+
+    vst1_s32(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_s32() {
+    let mut vals = [0_i32; 5];
+    let a = i32x4::new(1, 2, 3, 4);
+
+    vst1q_s32(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_s64() {
+    let mut vals = [0_i64; 2];
+    let a = i64x1::new(1);
+
+    vst1_s64(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_s64() {
+    let mut vals = [0_i64; 3];
+    let a = i64x2::new(1, 2);
+
+    vst1q_s64(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_u8() {
+    let mut vals = [0_u8; 9];
+    let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+
+    vst1_u8(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+    assert_eq!(vals[5], 5);
+    assert_eq!(vals[6], 6);
+    assert_eq!(vals[7], 7);
+    assert_eq!(vals[8], 8);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_u8() {
+    let mut vals = [0_u8; 17];
+    let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+
+    vst1q_u8(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+    assert_eq!(vals[5], 5);
+    assert_eq!(vals[6], 6);
+    assert_eq!(vals[7], 7);
+    assert_eq!(vals[8], 8);
+    assert_eq!(vals[9], 9);
+    assert_eq!(vals[10], 10);
+    assert_eq!(vals[11], 11);
+    assert_eq!(vals[12], 12);
+    assert_eq!(vals[13], 13);
+    assert_eq!(vals[14], 14);
+    assert_eq!(vals[15], 15);
+    assert_eq!(vals[16], 16);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_u16() {
+    let mut vals = [0_u16; 5];
+    let a = u16x4::new(1, 2, 3, 4);
+
+    vst1_u16(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_u16() {
+    let mut vals = [0_u16; 9];
+    let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+
+    vst1q_u16(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+    assert_eq!(vals[5], 5);
+    assert_eq!(vals[6], 6);
+    assert_eq!(vals[7], 7);
+    assert_eq!(vals[8], 8);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_u32() {
+    let mut vals = [0_u32; 3];
+    let a = u32x2::new(1, 2);
+
+    vst1_u32(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_u32() {
+    let mut vals = [0_u32; 5];
+    let a = u32x4::new(1, 2, 3, 4);
+
+    vst1q_u32(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_u64() {
+    let mut vals = [0_u64; 2];
+    let a = u64x1::new(1);
+
+    vst1_u64(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_u64() {
+    let mut vals = [0_u64; 3];
+    let a = u64x2::new(1, 2);
+
+    vst1q_u64(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_p8() {
+    let mut vals = [0_u8; 9];
+    let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+
+    vst1_p8(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+    assert_eq!(vals[5], 5);
+    assert_eq!(vals[6], 6);
+    assert_eq!(vals[7], 7);
+    assert_eq!(vals[8], 8);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_p8() {
+    let mut vals = [0_u8; 17];
+    let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+
+    vst1q_p8(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+    assert_eq!(vals[5], 5);
+    assert_eq!(vals[6], 6);
+    assert_eq!(vals[7], 7);
+    assert_eq!(vals[8], 8);
+    assert_eq!(vals[9], 9);
+    assert_eq!(vals[10], 10);
+    assert_eq!(vals[11], 11);
+    assert_eq!(vals[12], 12);
+    assert_eq!(vals[13], 13);
+    assert_eq!(vals[14], 14);
+    assert_eq!(vals[15], 15);
+    assert_eq!(vals[16], 16);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_p16() {
+    let mut vals = [0_u16; 5];
+    let a = u16x4::new(1, 2, 3, 4);
+
+    vst1_p16(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_p16() {
+    let mut vals = [0_u16; 9];
+    let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+
+    vst1q_p16(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+    assert_eq!(vals[3], 3);
+    assert_eq!(vals[4], 4);
+    assert_eq!(vals[5], 5);
+    assert_eq!(vals[6], 6);
+    assert_eq!(vals[7], 7);
+    assert_eq!(vals[8], 8);
+}
+
+#[simd_test(enable = "neon,aes")]
+unsafe fn test_vst1_p64() {
+    let mut vals = [0_u64; 2];
+    let a = u64x1::new(1);
+
+    vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+}
+
+#[simd_test(enable = "neon,aes")]
+unsafe fn test_vst1q_p64() {
+    let mut vals = [0_u64; 3];
+    let a = u64x2::new(1, 2);
+
+    vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0);
+    assert_eq!(vals[1], 1);
+    assert_eq!(vals[2], 2);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1_f32() {
+    let mut vals = [0_f32; 3];
+    let a = f32x2::new(1., 2.);
+
+    vst1_f32(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0.);
+    assert_eq!(vals[1], 1.);
+    assert_eq!(vals[2], 2.);
+}
+
+#[simd_test(enable = "neon")]
+unsafe fn test_vst1q_f32() {
+    let mut vals = [0_f32; 5];
+    let a = f32x4::new(1., 2., 3., 4.);
+
+    vst1q_f32(vals[1..].as_mut_ptr(), transmute(a));
+
+    assert_eq!(vals[0], 0.);
+    assert_eq!(vals[1], 1.);
+    assert_eq!(vals[2], 2.);
+    assert_eq!(vals[3], 3.);
+    assert_eq!(vals[4], 4.);
+}
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
--- a/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs
@ -0,0 +1,836 @@
+#[cfg(target_arch = "arm")]
+use crate::core_arch::arm::*;
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+use crate::core_arch::aarch64::*;
+
+use crate::core_arch::simd::*;
+use std::{mem::transmute, vec::Vec};
+
+macro_rules! V_u8 {
+    () => {
+        vec![0x00u8, 0x01u8, 0x02u8, 0x0Fu8, 0x80u8, 0xF0u8, 0xFFu8]
+    };
+}
+macro_rules! V_u16 {
+    () => {
+        vec![
+            0x0000u16, 0x0101u16, 0x0202u16, 0x0F0Fu16, 0x8000u16, 0xF0F0u16, 0xFFFFu16,
+        ]
+    };
+}
+macro_rules! V_u32 {
+    () => {
+        vec![
+            0x00000000u32,
+            0x01010101u32,
+            0x02020202u32,
+            0x0F0F0F0Fu32,
+            0x80000000u32,
+            0xF0F0F0F0u32,
+            0xFFFFFFFFu32,
+        ]
+    };
+}
+macro_rules! V_u64 {
+    () => {
+        vec![
+            0x0000000000000000u64,
+            0x0101010101010101u64,
+            0x0202020202020202u64,
+            0x0F0F0F0F0F0F0F0Fu64,
+            0x8080808080808080u64,
+            0xF0F0F0F0F0F0F0F0u64,
+            0xFFFFFFFFFFFFFFFFu64,
+        ]
+    };
+}
+
+macro_rules! V_i8 {
+    () => {
+        vec![
+            0x00i8, 0x01i8, 0x02i8, 0x0Fi8, -128i8, /* 0x80 */
+            -16i8,  /* 0xF0 */
+            -1i8,   /* 0xFF */
+        ]
+    };
+}
+macro_rules! V_i16 {
+    () => {
+        vec![
+            0x0000i16, 0x0101i16, 0x0202i16, 0x0F0Fi16, -32768i16, /* 0x8000 */
+            -3856i16,  /* 0xF0F0 */
+            -1i16,     /* 0xFFF */
+        ]
+    };
+}
+macro_rules! V_i32 {
+    () => {
+        vec![
+            0x00000000i32,
+            0x01010101i32,
+            0x02020202i32,
+            0x0F0F0F0Fi32,
+            -2139062144i32, /* 0x80000000 */
+            -252645136i32,  /* 0xF0F0F0F0 */
+            -1i32,          /* 0xFFFFFFFF */
+        ]
+    };
+}
+
+macro_rules! V_i64 {
+    () => {
+        vec![
+            0x0000000000000000i64,
+            0x0101010101010101i64,
+            0x0202020202020202i64,
+            0x0F0F0F0F0F0F0F0Fi64,
+            -9223372036854775808i64, /* 0x8000000000000000 */
+            -1152921504606846976i64, /* 0xF000000000000000 */
+            -1i64,                   /* 0xFFFFFFFFFFFFFFFF */
+        ]
+    };
+}
+
+macro_rules! V_f32 {
+    () => {
+        vec![
+            0.0f32,
+            1.0f32,
+            -1.0f32,
+            1.2f32,
+            2.4f32,
+            f32::MAX,
+            f32::MIN,
+            f32::INFINITY,
+            f32::NEG_INFINITY,
+            f32::NAN,
+        ]
+    };
+}
+
+macro_rules! to64 {
+    ($t : ident) => {
+        |v: $t| -> u64 { transmute(v) }
+    };
+}
+
+macro_rules! to128 {
+    ($t : ident) => {
+        |v: $t| -> u128 { transmute(v) }
+    };
+}
+
+pub(crate) fn test<T, U, V, W, X>(
+    vals: Vec<T>,
+    fill1: fn(T) -> V,
+    fill2: fn(U) -> W,
+    cast: fn(W) -> X,
+    test_fun: fn(V, V) -> W,
+    verify_fun: fn(T, T) -> U,
+) where
+    T: Copy + core::fmt::Debug + std::cmp::PartialEq,
+    U: Copy + core::fmt::Debug + std::cmp::PartialEq,
+    V: Copy + core::fmt::Debug,
+    W: Copy + core::fmt::Debug,
+    X: Copy + core::fmt::Debug + std::cmp::PartialEq,
+{
+    let pairs = vals.iter().zip(vals.iter());
+
+    for (i, j) in pairs {
+        let a: V = fill1(*i);
+        let b: V = fill1(*j);
+
+        let actual_pre: W = test_fun(a, b);
+        let expected_pre: W = fill2(verify_fun(*i, *j));
+
+        let actual: X = cast(actual_pre);
+        let expected: X = cast(expected_pre);
+
+        assert_eq!(
+            actual, expected,
+            "[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
+            *i, *j, &a, &b, actual_pre, &a, &b, expected_pre
+        );
+    }
+}
+
+macro_rules! gen_test_fn {
+    ($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
+        pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
+            unsafe {
+                test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
+            };
+        }
+    };
+}
+
+macro_rules! gen_fill_fn {
+    ($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
+        pub(crate) fn $id(val: $in_t) -> $out_t {
+            let initial: [$in_t; $num_els] = [val; $num_els];
+            let result: $cmp_t = unsafe { transmute(initial) };
+            let result_out: $out_t = unsafe { transmute(result) };
+
+            // println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
+
+            result_out
+        }
+    };
+}
+
+gen_fill_fn!(fill_u8, 8, 8, u8, uint8x8_t, u64);
+gen_fill_fn!(fill_s8, 8, 8, i8, int8x8_t, u64);
+gen_fill_fn!(fillq_u8, 8, 16, u8, uint8x16_t, u128);
+gen_fill_fn!(fillq_s8, 8, 16, i8, int8x16_t, u128);
+
+gen_fill_fn!(fill_u16, 16, 4, u16, uint16x4_t, u64);
+gen_fill_fn!(fill_s16, 16, 4, i16, int16x4_t, u64);
+gen_fill_fn!(fillq_u16, 16, 8, u16, uint16x8_t, u128);
+gen_fill_fn!(fillq_s16, 16, 8, i16, int16x8_t, u128);
+
+gen_fill_fn!(fill_u32, 32, 2, u32, uint32x2_t, u64);
+gen_fill_fn!(fill_s32, 32, 2, i32, int32x2_t, u64);
+gen_fill_fn!(fillq_u32, 32, 4, u32, uint32x4_t, u128);
+gen_fill_fn!(fillq_s32, 32, 4, i32, int32x4_t, u128);
+
+gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
+gen_fill_fn!(fill_s64, 64, 1, i64, int64x1_t, u64);
+gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
+gen_fill_fn!(fillq_s64, 64, 2, i64, int64x2_t, u128);
+
+gen_fill_fn!(fill_f32, 32, 2, f32, float32x2_t, u64);
+gen_fill_fn!(fillq_f32, 32, 4, f32, float32x4_t, u128);
+
+gen_test_fn!(
+    test_ari_u8,
+    u8,
+    u8,
+    uint8x8_t,
+    uint8x8_t,
+    u64,
+    V_u8!(),
+    fill_u8,
+    fill_u8,
+    to64!(uint8x8_t)
+);
+gen_test_fn!(
+    test_bit_u8,
+    u8,
+    u8,
+    uint8x8_t,
+    uint8x8_t,
+    u64,
+    V_u8!(),
+    fill_u8,
+    fill_u8,
+    to64!(uint8x8_t)
+);
+gen_test_fn!(
+    test_cmp_u8,
+    u8,
+    u8,
+    uint8x8_t,
+    uint8x8_t,
+    u64,
+    V_u8!(),
+    fill_u8,
+    fill_u8,
+    to64!(uint8x8_t)
+);
+gen_test_fn!(
+    testq_ari_u8,
+    u8,
+    u8,
+    uint8x16_t,
+    uint8x16_t,
+    u128,
+    V_u8!(),
+    fillq_u8,
+    fillq_u8,
+    to128!(uint8x16_t)
+);
+gen_test_fn!(
+    testq_bit_u8,
+    u8,
+    u8,
+    uint8x16_t,
+    uint8x16_t,
+    u128,
+    V_u8!(),
+    fillq_u8,
+    fillq_u8,
+    to128!(uint8x16_t)
+);
+gen_test_fn!(
+    testq_cmp_u8,
+    u8,
+    u8,
+    uint8x16_t,
+    uint8x16_t,
+    u128,
+    V_u8!(),
+    fillq_u8,
+    fillq_u8,
+    to128!(uint8x16_t)
+);
+
+gen_test_fn!(
+    test_ari_s8,
+    i8,
+    i8,
+    int8x8_t,
+    int8x8_t,
+    u64,
+    V_i8!(),
+    fill_s8,
+    fill_s8,
+    to64!(int8x8_t)
+);
+gen_test_fn!(
+    test_bit_s8,
+    i8,
+    i8,
+    int8x8_t,
+    int8x8_t,
+    u64,
+    V_i8!(),
+    fill_s8,
+    fill_s8,
+    to64!(int8x8_t)
+);
+gen_test_fn!(
+    test_cmp_s8,
+    i8,
+    u8,
+    int8x8_t,
+    uint8x8_t,
+    u64,
+    V_i8!(),
+    fill_s8,
+    fill_u8,
+    to64!(uint8x8_t)
+);
+gen_test_fn!(
+    testq_ari_s8,
+    i8,
+    i8,
+    int8x16_t,
+    int8x16_t,
+    u128,
+    V_i8!(),
+    fillq_s8,
+    fillq_s8,
+    to128!(int8x16_t)
+);
+gen_test_fn!(
+    testq_bit_s8,
+    i8,
+    i8,
+    int8x16_t,
+    int8x16_t,
+    u128,
+    V_i8!(),
+    fillq_s8,
+    fillq_s8,
+    to128!(int8x16_t)
+);
+gen_test_fn!(
+    testq_cmp_s8,
+    i8,
+    u8,
+    int8x16_t,
+    uint8x16_t,
+    u128,
+    V_i8!(),
+    fillq_s8,
+    fillq_u8,
+    to128!(uint8x16_t)
+);
+
+gen_test_fn!(
+    test_ari_u16,
+    u16,
+    u16,
+    uint16x4_t,
+    uint16x4_t,
+    u64,
+    V_u16!(),
+    fill_u16,
+    fill_u16,
+    to64!(uint16x4_t)
+);
+gen_test_fn!(
+    test_bit_u16,
+    u16,
+    u16,
+    uint16x4_t,
+    uint16x4_t,
+    u64,
+    V_u16!(),
+    fill_u16,
+    fill_u16,
+    to64!(uint16x4_t)
+);
+gen_test_fn!(
+    test_cmp_u16,
+    u16,
+    u16,
+    uint16x4_t,
+    uint16x4_t,
+    u64,
+    V_u16!(),
+    fill_u16,
+    fill_u16,
+    to64!(uint16x4_t)
+);
+gen_test_fn!(
+    testq_ari_u16,
+    u16,
+    u16,
+    uint16x8_t,
+    uint16x8_t,
+    u128,
+    V_u16!(),
+    fillq_u16,
+    fillq_u16,
+    to128!(uint16x8_t)
+);
+gen_test_fn!(
+    testq_bit_u16,
+    u16,
+    u16,
+    uint16x8_t,
+    uint16x8_t,
+    u128,
+    V_u16!(),
+    fillq_u16,
+    fillq_u16,
+    to128!(uint16x8_t)
+);
+gen_test_fn!(
+    testq_cmp_u16,
+    u16,
+    u16,
+    uint16x8_t,
+    uint16x8_t,
+    u128,
+    V_u16!(),
+    fillq_u16,
+    fillq_u16,
+    to128!(uint16x8_t)
+);
+
+gen_test_fn!(
+    test_ari_s16,
+    i16,
+    i16,
+    int16x4_t,
+    int16x4_t,
+    u64,
+    V_i16!(),
+    fill_s16,
+    fill_s16,
+    to64!(int16x4_t)
+);
+gen_test_fn!(
+    test_bit_s16,
+    i16,
+    i16,
+    int16x4_t,
+    int16x4_t,
+    u64,
+    V_i16!(),
+    fill_s16,
+    fill_s16,
+    to64!(int16x4_t)
+);
+gen_test_fn!(
+    test_cmp_s16,
+    i16,
+    u16,
+    int16x4_t,
+    uint16x4_t,
+    u64,
+    V_i16!(),
+    fill_s16,
+    fill_u16,
+    to64!(uint16x4_t)
+);
+gen_test_fn!(
+    testq_ari_s16,
+    i16,
+    i16,
+    int16x8_t,
+    int16x8_t,
+    u128,
+    V_i16!(),
+    fillq_s16,
+    fillq_s16,
+    to128!(int16x8_t)
+);
+gen_test_fn!(
+    testq_bit_s16,
+    i16,
+    i16,
+    int16x8_t,
+    int16x8_t,
+    u128,
+    V_i16!(),
+    fillq_s16,
+    fillq_s16,
+    to128!(int16x8_t)
+);
+gen_test_fn!(
+    testq_cmp_s16,
+    i16,
+    u16,
+    int16x8_t,
+    uint16x8_t,
+    u128,
+    V_i16!(),
+    fillq_s16,
+    fillq_u16,
+    to128!(uint16x8_t)
+);
+
+gen_test_fn!(
+    test_ari_u32,
+    u32,
+    u32,
+    uint32x2_t,
+    uint32x2_t,
+    u64,
+    V_u32!(),
+    fill_u32,
+    fill_u32,
+    to64!(uint32x2_t)
+);
+gen_test_fn!(
+    test_bit_u32,
+    u32,
+    u32,
+    uint32x2_t,
+    uint32x2_t,
+    u64,
+    V_u32!(),
+    fill_u32,
+    fill_u32,
+    to64!(uint32x2_t)
+);
+gen_test_fn!(
+    test_cmp_u32,
+    u32,
+    u32,
+    uint32x2_t,
+    uint32x2_t,
+    u64,
+    V_u32!(),
+    fill_u32,
+    fill_u32,
+    to64!(uint32x2_t)
+);
+gen_test_fn!(
+    testq_ari_u32,
+    u32,
+    u32,
+    uint32x4_t,
+    uint32x4_t,
+    u128,
+    V_u32!(),
+    fillq_u32,
+    fillq_u32,
+    to128!(uint32x4_t)
+);
+gen_test_fn!(
+    testq_bit_u32,
+    u32,
+    u32,
+    uint32x4_t,
+    uint32x4_t,
+    u128,
+    V_u32!(),
+    fillq_u32,
+    fillq_u32,
+    to128!(uint32x4_t)
+);
+gen_test_fn!(
+    testq_cmp_u32,
+    u32,
+    u32,
+    uint32x4_t,
+    uint32x4_t,
+    u128,
+    V_u32!(),
+    fillq_u32,
+    fillq_u32,
+    to128!(uint32x4_t)
+);
+
+gen_test_fn!(
+    test_ari_s32,
+    i32,
+    i32,
+    int32x2_t,
+    int32x2_t,
+    u64,
+    V_i32!(),
+    fill_s32,
+    fill_s32,
+    to64!(int32x2_t)
+);
+gen_test_fn!(
+    test_bit_s32,
+    i32,
+    i32,
+    int32x2_t,
+    int32x2_t,
+    u64,
+    V_i32!(),
+    fill_s32,
+    fill_s32,
+    to64!(int32x2_t)
+);
+gen_test_fn!(
+    test_cmp_s32,
+    i32,
+    u32,
+    int32x2_t,
+    uint32x2_t,
+    u64,
+    V_i32!(),
+    fill_s32,
+    fill_u32,
+    to64!(uint32x2_t)
+);
+gen_test_fn!(
+    testq_ari_s32,
+    i32,
+    i32,
+    int32x4_t,
+    int32x4_t,
+    u128,
+    V_i32!(),
+    fillq_s32,
+    fillq_s32,
+    to128!(int32x4_t)
+);
+gen_test_fn!(
+    testq_bit_s32,
+    i32,
+    i32,
+    int32x4_t,
+    int32x4_t,
+    u128,
+    V_i32!(),
+    fillq_s32,
+    fillq_s32,
+    to128!(int32x4_t)
+);
+gen_test_fn!(
+    testq_cmp_s32,
+    i32,
+    u32,
+    int32x4_t,
+    uint32x4_t,
+    u128,
+    V_i32!(),
+    fillq_s32,
+    fillq_u32,
+    to128!(uint32x4_t)
+);
+
+gen_test_fn!(
+    test_ari_u64,
+    u64,
+    u64,
+    uint64x1_t,
+    uint64x1_t,
+    u64,
+    V_u64!(),
+    fill_u64,
+    fill_u64,
+    to64!(uint64x1_t)
+);
+gen_test_fn!(
+    test_bit_u64,
+    u64,
+    u64,
+    uint64x1_t,
+    uint64x1_t,
+    u64,
+    V_u64!(),
+    fill_u64,
+    fill_u64,
+    to64!(uint64x1_t)
+);
+gen_test_fn!(
+    test_cmp_u64,
+    u64,
+    u64,
+    uint64x1_t,
+    uint64x1_t,
+    u64,
+    V_u64!(),
+    fill_u64,
+    fill_u64,
+    to64!(uint64x1_t)
+);
+gen_test_fn!(
+    testq_ari_u64,
+    u64,
+    u64,
+    uint64x2_t,
+    uint64x2_t,
+    u128,
+    V_u64!(),
+    fillq_u64,
+    fillq_u64,
+    to128!(uint64x2_t)
+);
+gen_test_fn!(
+    testq_bit_u64,
+    u64,
+    u64,
+    uint64x2_t,
+    uint64x2_t,
+    u128,
+    V_u64!(),
+    fillq_u64,
+    fillq_u64,
+    to128!(uint64x2_t)
+);
+gen_test_fn!(
+    testq_cmp_u64,
+    u64,
+    u64,
+    uint64x2_t,
+    uint64x2_t,
+    u128,
+    V_u64!(),
+    fillq_u64,
+    fillq_u64,
+    to128!(uint64x2_t)
+);
+
+gen_test_fn!(
+    test_ari_s64,
+    i64,
+    i64,
+    int64x1_t,
+    int64x1_t,
+    u64,
+    V_i64!(),
+    fill_s64,
+    fill_s64,
+    to64!(int64x1_t)
+);
+gen_test_fn!(
+    test_bit_s64,
+    i64,
+    i64,
+    int64x1_t,
+    int64x1_t,
+    u64,
+    V_i64!(),
+    fill_s64,
+    fill_s64,
+    to64!(int64x1_t)
+);
+gen_test_fn!(
+    test_cmp_s64,
+    i64,
+    u64,
+    int64x1_t,
+    uint64x1_t,
+    u64,
+    V_i64!(),
+    fill_s64,
+    fill_u64,
+    to64!(uint64x1_t)
+);
+gen_test_fn!(
+    testq_ari_s64,
+    i64,
+    i64,
+    int64x2_t,
+    int64x2_t,
+    u128,
+    V_i64!(),
+    fillq_s64,
+    fillq_s64,
+    to128!(int64x2_t)
+);
+gen_test_fn!(
+    testq_bit_s64,
+    i64,
+    i64,
+    int64x2_t,
+    int64x2_t,
+    u128,
+    V_i64!(),
+    fillq_s64,
+    fillq_s64,
+    to128!(int64x2_t)
+);
+gen_test_fn!(
+    testq_cmp_s64,
+    i64,
+    u64,
+    int64x2_t,
+    uint64x2_t,
+    u128,
+    V_i64!(),
+    fillq_s64,
+    fillq_u64,
+    to128!(uint64x2_t)
+);
+
+gen_test_fn!(
+    test_ari_f32,
+    f32,
+    f32,
+    float32x2_t,
+    float32x2_t,
+    u64,
+    V_f32!(),
+    fill_f32,
+    fill_f32,
+    to64!(float32x2_t)
+);
+gen_test_fn!(
+    test_cmp_f32,
+    f32,
+    u32,
+    float32x2_t,
+    uint32x2_t,
+    u64,
+    V_f32!(),
+    fill_f32,
+    fill_u32,
+    to64!(uint32x2_t)
+);
+gen_test_fn!(
+    testq_ari_f32,
+    f32,
+    f32,
+    float32x4_t,
+    float32x4_t,
+    u128,
+    V_f32!(),
+    fillq_f32,
+    fillq_f32,
+    to128!(float32x4_t)
+);
+gen_test_fn!(
+    testq_cmp_f32,
+    f32,
+    u32,
+    float32x4_t,
+    uint32x4_t,
+    u128,
+    V_f32!(),
+    fillq_f32,
+    fillq_u32,
+    to128!(uint32x4_t)
+);
--- a/library/stdarch/crates/core_arch/src/core_arch_docs.md
+++ b/library/stdarch/crates/core_arch/src/core_arch_docs.md
@ -0,0 +1,350 @@
+SIMD and vendor intrinsics module.
+
+This module is intended to be the gateway to architecture-specific
+intrinsic functions, typically related to SIMD (but not always!). Each
+architecture that Rust compiles to may contain a submodule here, which
+means that this is not a portable module! If you're writing a portable
+library take care when using these APIs!
+
+Under this module you'll find an architecture-named module, such as
+`x86_64`. Each `#[cfg(target_arch)]` that Rust can compile to may have a
+module entry here, only present on that particular target. For example the
+`i686-pc-windows-msvc` target will have an `x86` module here, whereas
+`x86_64-pc-windows-msvc` has `x86_64`.
+
+[rfc]: https://github.com/rust-lang/rfcs/pull/2325
+[tracked]: https://github.com/rust-lang/rust/issues/48556
+
+# Overview
+
+This module exposes vendor-specific intrinsics that typically correspond to
+a single machine instruction. These intrinsics are not portable: their
+availability is architecture-dependent, and not all machines of that
+architecture might provide the intrinsic.
+
+The `arch` module is intended to be a low-level implementation detail for
+higher-level APIs. Using it correctly can be quite tricky as you need to
+ensure at least a few guarantees are upheld:
+
+* The correct architecture's module is used. For example the `arm` module
+  isn't available on the `x86_64-unknown-linux-gnu` target. This is
+  typically done by ensuring that `#[cfg]` is used appropriately when using
+  this module.
+* The CPU the program is currently running on supports the function being
+  called. For example it is unsafe to call an AVX2 function on a CPU that
+  doesn't actually support AVX2.
+
+As a result of the latter of these guarantees all intrinsics in this module
+are `unsafe` and extra care needs to be taken when calling them!
+
+# CPU Feature Detection
+
+In order to call these APIs in a safe fashion there's a number of
+mechanisms available to ensure that the correct CPU feature is available
+to call an intrinsic. Let's consider, for example, the `_mm256_add_epi64`
+intrinsics on the `x86` and `x86_64` architectures. This function requires
+the AVX2 feature as [documented by Intel][intel-dox] so to correctly call
+this function we need to (a) guarantee we only call it on `x86`/`x86_64`
+and (b) ensure that the CPU feature is available
+
+[intel-dox]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64&expand=100
+
+## Static CPU Feature Detection
+
+The first option available to us is to conditionally compile code via the
+`#[cfg]` attribute. CPU features correspond to the `target_feature` cfg
+available, and can be used like so:
+
+```ignore
+#[cfg(
+    all(
+        any(target_arch = "x86", target_arch = "x86_64"),
+        target_feature = "avx2"
+    )
+)]
+fn foo() {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86::_mm256_add_epi64;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64::_mm256_add_epi64;
+
+    unsafe {
+        _mm256_add_epi64(...);
+    }
+}
+```
+
+Here we're using `#[cfg(target_feature = "avx2")]` to conditionally compile
+this function into our module. This means that if the `avx2` feature is
+*enabled statically* then we'll use the `_mm256_add_epi64` function at
+runtime. The `unsafe` block here can be justified through the usage of
+`#[cfg]` to only compile the code in situations where the safety guarantees
+are upheld.
+
+Statically enabling a feature is typically done with the `-C
+target-feature` or `-C target-cpu` flags to the compiler. For example if
+your local CPU supports AVX2 then you can compile the above function with:
+
+```sh
+$ RUSTFLAGS='-C target-cpu=native' cargo build
+```
+
+Or otherwise you can specifically enable just the AVX2 feature:
+
+```sh
+$ RUSTFLAGS='-C target-feature=+avx2' cargo build
+```
+
+Note that when you compile a binary with a particular feature enabled it's
+important to ensure that you only run the binary on systems which satisfy
+the required feature set.
+
+## Dynamic CPU Feature Detection
+
+Sometimes statically dispatching isn't quite what you want. Instead you
+might want to build a portable binary that runs across a variety of CPUs,
+but at runtime it selects the most optimized implementation available. This
+allows you to build a "least common denominator" binary which has certain
+sections more optimized for different CPUs.
+
+Taking our previous example from before, we're going to compile our binary
+*without* AVX2 support, but we'd like to enable it for just one function.
+We can do that in a manner like:
+
+```ignore
+fn foo() {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("avx2") {
+            return unsafe { foo_avx2() };
+        }
+    }
+
+    // fallback implementation without using AVX2
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "avx2")]
+unsafe fn foo_avx2() {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86::_mm256_add_epi64;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64::_mm256_add_epi64;
+
+    unsafe { _mm256_add_epi64(...); }
+}
+```
+
+There's a couple of components in play here, so let's go through them in
+detail!
+
+* First up we notice the `is_x86_feature_detected!` macro. Provided by
+  the standard library, this macro will perform necessary runtime detection
+  to determine whether the CPU the program is running on supports the
+  specified feature. In this case the macro will expand to a boolean
+  expression evaluating to whether the local CPU has the AVX2 feature or
+  not.
+
+  Note that this macro, like the `arch` module, is platform-specific. For
+  example calling `is_x86_feature_detected!("avx2")` on ARM will be a
+  compile time error. To ensure we don't hit this error a statement level
+  `#[cfg]` is used to only compile usage of the macro on `x86`/`x86_64`.
+
+* Next up we see our AVX2-enabled function, `foo_avx2`. This function is
+  decorated with the `#[target_feature]` attribute which enables a CPU
+  feature for just this one function. Using a compiler flag like `-C
+  target-feature=+avx2` will enable AVX2 for the entire program, but using
+  an attribute will only enable it for the one function. Usage of the
+  `#[target_feature]` attribute currently requires the function to also be
+  `unsafe`, as we see here. This is because the function can only be
+  correctly called on systems which have the AVX2 (like the intrinsics
+  themselves).
+
+And with all that we should have a working program! This program will run
+across all machines and it'll use the optimized AVX2 implementation on
+machines where support is detected.
+
+# Ergonomics
+
+It's important to note that using the `arch` module is not the easiest
+thing in the world, so if you're curious to try it out you may want to
+brace yourself for some wordiness!
+
+The primary purpose of this module is to enable stable crates on crates.io
+to build up much more ergonomic abstractions which end up using SIMD under
+the hood. Over time these abstractions may also move into the standard
+library itself, but for now this module is tasked with providing the bare
+minimum necessary to use vendor intrinsics on stable Rust.
+
+# Other architectures
+
+This documentation is only for one particular architecture, you can find
+others at:
+
+* [`x86`]
+* [`x86_64`]
+* [`arm`]
+* [`aarch64`]
+* [`riscv32`]
+* [`riscv64`]
+* [`mips`]
+* [`mips64`]
+* [`powerpc`]
+* [`powerpc64`]
+* [`nvptx`]
+* [`wasm32`]
+* [`loongarch64`]
+* [`s390x`]
+
+[`x86`]: ../../core/arch/x86/index.html
+[`x86_64`]: ../../core/arch/x86_64/index.html
+[`arm`]: ../../core/arch/arm/index.html
+[`aarch64`]: ../../core/arch/aarch64/index.html
+[`riscv32`]: ../../core/arch/riscv32/index.html
+[`riscv64`]: ../../core/arch/riscv64/index.html
+[`mips`]: ../../core/arch/mips/index.html
+[`mips64`]: ../../core/arch/mips64/index.html
+[`powerpc`]: ../../core/arch/powerpc/index.html
+[`powerpc64`]: ../../core/arch/powerpc64/index.html
+[`nvptx`]: ../../core/arch/nvptx/index.html
+[`wasm32`]: ../../core/arch/wasm32/index.html
+[`loongarch64`]: ../../core/arch/loongarch64/index.html
+[`s390x`]: ../../core/arch/s390x/index.html
+
+# Examples
+
+First let's take a look at not actually using any intrinsics but instead
+using LLVM's auto-vectorization to produce optimized vectorized code for
+AVX2 and also for the default platform.
+
+```rust
+fn main() {
+    let mut dst = [0];
+    add_quickly(&[1], &[2], &mut dst);
+    assert_eq!(dst[0], 3);
+}
+
+fn add_quickly(a: &[u8], b: &[u8], c: &mut [u8]) {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        // Note that this `unsafe` block is safe because we're testing
+        // that the `avx2` feature is indeed available on our CPU.
+        if is_x86_feature_detected!("avx2") {
+            return unsafe { add_quickly_avx2(a, b, c) };
+        }
+    }
+
+    add_quickly_fallback(a, b, c)
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "avx2")]
+unsafe fn add_quickly_avx2(a: &[u8], b: &[u8], c: &mut [u8]) {
+    add_quickly_fallback(a, b, c) // the function below is inlined here
+}
+
+fn add_quickly_fallback(a: &[u8], b: &[u8], c: &mut [u8]) {
+    for ((a, b), c) in a.iter().zip(b).zip(c) {
+        *c = *a + *b;
+    }
+}
+```
+
+Next up let's take a look at an example of manually using intrinsics. Here
+we'll be using SSE4.1 features to implement hex encoding.
+
+```
+fn main() {
+    let mut dst = [0; 32];
+    hex_encode(b"\x01\x02\x03", &mut dst);
+    assert_eq!(&dst[..6], b"010203");
+
+    let mut src = [0; 16];
+    for i in 0..16 {
+        src[i] = (i + 1) as u8;
+    }
+    hex_encode(&src, &mut dst);
+    assert_eq!(&dst, b"0102030405060708090a0b0c0d0e0f10");
+}
+
+pub fn hex_encode(src: &[u8], dst: &mut [u8]) {
+    let len = src.len().checked_mul(2).unwrap();
+    assert!(dst.len() >= len);
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("sse4.1") {
+            return unsafe { hex_encode_sse41(src, dst) };
+        }
+    }
+
+    hex_encode_fallback(src, dst)
+}
+
+// translated from
+// <https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp>
+#[target_feature(enable = "sse4.1")]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+unsafe fn hex_encode_sse41(mut src: &[u8], dst: &mut [u8]) {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86::*;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64::*;
+
+    unsafe {
+        let ascii_zero = _mm_set1_epi8(b'0' as i8);
+        let nines = _mm_set1_epi8(9);
+        let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
+        let and4bits = _mm_set1_epi8(0xf);
+
+        let mut i = 0_isize;
+        while src.len() >= 16 {
+            let invec = _mm_loadu_si128(src.as_ptr() as *const _);
+
+            let masked1 = _mm_and_si128(invec, and4bits);
+            let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
+
+            // return 0xff corresponding to the elements > 9, or 0x00 otherwise
+            let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
+            let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
+
+            // add '0' or the offset depending on the masks
+            let masked1 = _mm_add_epi8(
+                masked1,
+                _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
+            );
+            let masked2 = _mm_add_epi8(
+                masked2,
+                _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
+            );
+
+            // interleave masked1 and masked2 bytes
+            let res1 = _mm_unpacklo_epi8(masked2, masked1);
+            let res2 = _mm_unpackhi_epi8(masked2, masked1);
+
+            _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
+            _mm_storeu_si128(
+                dst.as_mut_ptr().offset(i * 2 + 16) as *mut _,
+                res2,
+            );
+            src = &src[16..];
+            i += 16;
+        }
+
+        let i = i as usize;
+        hex_encode_fallback(src, &mut dst[i * 2..]);
+    }
+}
+
+fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) {
+    fn hex(byte: u8) -> u8 {
+        static TABLE: &[u8] = b"0123456789abcdef";
+        TABLE[byte as usize]
+    }
+
+    for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
+        slots[0] = hex((*byte >> 4) & 0xf);
+        slots[1] = hex(*byte & 0xf);
+    }
+}
+```
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@ -0,0 +1,94 @@
+#![doc = include_str!("core_arch_docs.md")]
+#![allow(improper_ctypes_definitions)]
+#![allow(dead_code)]
+#![allow(unused_features)]
+#![allow(internal_features)]
+#![allow(unsafe_op_in_unsafe_fn)]
+#![deny(rust_2018_idioms)]
+#![feature(
+    custom_inner_attributes,
+    link_llvm_intrinsics,
+    repr_simd,
+    simd_ffi,
+    proc_macro_hygiene,
+    stmt_expr_attributes,
+    core_intrinsics,
+    no_core,
+    fmt_helpers_for_derive,
+    rustc_attrs,
+    staged_api,
+    doc_cfg,
+    tbm_target_feature,
+    sse4a_target_feature,
+    riscv_target_feature,
+    arm_target_feature,
+    mips_target_feature,
+    powerpc_target_feature,
+    s390x_target_feature,
+    loongarch_target_feature,
+    wasm_target_feature,
+    abi_unadjusted,
+    rtm_target_feature,
+    allow_internal_unstable,
+    decl_macro,
+    asm_experimental_arch,
+    x86_amx_intrinsics,
+    f16,
+    aarch64_unstable_target_feature,
+    bigint_helper_methods
+)]
+#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
+#![deny(clippy::missing_inline_in_public_items)]
+#![allow(
+    clippy::identity_op,
+    clippy::inline_always,
+    clippy::too_many_arguments,
+    clippy::cast_sign_loss,
+    clippy::cast_lossless,
+    clippy::cast_possible_wrap,
+    clippy::cast_possible_truncation,
+    clippy::cast_precision_loss,
+    clippy::cognitive_complexity,
+    clippy::many_single_char_names,
+    clippy::missing_safety_doc,
+    clippy::shadow_reuse,
+    clippy::similar_names,
+    clippy::unusual_byte_groupings,
+    clippy::wrong_self_convention
+)]
+#![cfg_attr(test, allow(unused_imports))]
+#![no_std]
+#![stable(feature = "stdsimd", since = "1.27.0")]
+#![doc(
+    test(attr(deny(warnings))),
+    test(attr(allow(dead_code, deprecated, unused_variables, unused_mut)))
+)]
+#![cfg_attr(
+    test,
+    feature(
+        stdarch_arm_feature_detection,
+        stdarch_powerpc_feature_detection,
+        stdarch_s390x_feature_detection
+    )
+)]
+
+#[cfg(test)]
+#[macro_use]
+extern crate std;
+#[cfg(test)]
+#[macro_use]
+extern crate std_detect;
+#[path = "mod.rs"]
+mod core_arch;
+
+#[stable(feature = "stdsimd", since = "1.27.0")]
+pub mod arch {
+    #[stable(feature = "stdsimd", since = "1.27.0")]
+    #[allow(unused_imports)]
+    pub use crate::core_arch::arch::*;
+    #[stable(feature = "stdsimd", since = "1.27.0")]
+    pub use core::arch::asm;
+}
+
+#[allow(unused_imports)]
+use core::{array, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/mod.rs
@ -0,0 +1,21 @@
+//! LoongArch64 LASX intrinsics
+
+#![allow(non_camel_case_types)]
+
+#[rustfmt::skip]
+mod types;
+
+#[rustfmt::skip]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub use self::types::*;
+
+#[rustfmt::skip]
+mod generated;
+
+#[rustfmt::skip]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub use self::generated::*;
+
+#[rustfmt::skip]
+#[cfg(test)]
+mod tests;
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/tests.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/tests.rs
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/types.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/types.rs
@ -0,0 +1,33 @@
+types! {
+    #![unstable(feature = "stdarch_loongarch", issue = "117427")]
+
+    /// LOONGARCH-specific 256-bit wide vector of 32 packed `i8`.
+    pub struct v32i8(32 x pub(crate) i8);
+
+    /// LOONGARCH-specific 256-bit wide vector of 16 packed `i16`.
+    pub struct v16i16(16 x pub(crate) i16);
+
+    /// LOONGARCH-specific 256-bit wide vector of 8 packed `i32`.
+    pub struct v8i32(8 x pub(crate) i32);
+
+    /// LOONGARCH-specific 256-bit wide vector of 4 packed `i64`.
+    pub struct v4i64(4 x pub(crate) i64);
+
+    /// LOONGARCH-specific 256-bit wide vector of 32 packed `u8`.
+    pub struct v32u8(32 x pub(crate) u8);
+
+    /// LOONGARCH-specific 256-bit wide vector of 16 packed `u16`.
+    pub struct v16u16(16 x pub(crate) u16);
+
+    /// LOONGARCH-specific 256-bit wide vector of 8 packed `u32`.
+    pub struct v8u32(8 x pub(crate) u32);
+
+    /// LOONGARCH-specific 256-bit wide vector of 4 packed `u64`.
+    pub struct v4u64(4 x pub(crate) u64);
+
+    /// LOONGARCH-specific 128-bit wide vector of 8 packed `f32`.
+    pub struct v8f32(8 x pub(crate) f32);
+
+    /// LOONGARCH-specific 256-bit wide vector of 4 packed `f64`.
+    pub struct v4f64(4 x pub(crate) f64);
+}
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/mod.rs
@ -0,0 +1,21 @@
+//! LoongArch64 LSX intrinsics
+
+#![allow(non_camel_case_types)]
+
+#[rustfmt::skip]
+mod types;
+
+#[rustfmt::skip]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub use self::types::*;
+
+#[rustfmt::skip]
+mod generated;
+
+#[rustfmt::skip]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub use self::generated::*;
+
+#[rustfmt::skip]
+#[cfg(test)]
+mod tests;
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/tests.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/tests.rs
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/types.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/types.rs
@ -0,0 +1,33 @@
+types! {
+    #![unstable(feature = "stdarch_loongarch", issue = "117427")]
+
+    /// LOONGARCH-specific 128-bit wide vector of 16 packed `i8`.
+    pub struct v16i8(16 x pub(crate) i8);
+
+    /// LOONGARCH-specific 128-bit wide vector of 8 packed `i16`.
+    pub struct v8i16(8 x pub(crate) i16);
+
+    /// LOONGARCH-specific 128-bit wide vector of 4 packed `i32`.
+    pub struct v4i32(4 x pub(crate) i32);
+
+    /// LOONGARCH-specific 128-bit wide vector of 2 packed `i64`.
+    pub struct v2i64(2 x pub(crate) i64);
+
+    /// LOONGARCH-specific 128-bit wide vector of 16 packed `u8`.
+    pub struct v16u8(16 x pub(crate) u8);
+
+    /// LOONGARCH-specific 128-bit wide vector of 8 packed `u16`.
+    pub struct v8u16(8 x pub(crate) u16);
+
+    /// LOONGARCH-specific 128-bit wide vector of 4 packed `u32`.
+    pub struct v4u32(4 x pub(crate) u32);
+
+    /// LOONGARCH-specific 128-bit wide vector of 2 packed `u64`.
+    pub struct v2u64(2 x pub(crate) u64);
+
+    /// LOONGARCH-specific 128-bit wide vector of 4 packed `f32`.
+    pub struct v4f32(4 x pub(crate) f32);
+
+    /// LOONGARCH-specific 128-bit wide vector of 2 packed `f64`.
+    pub struct v2f64(2 x pub(crate) f64);
+}
--- a/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
@ -0,0 +1,376 @@
+//! `LoongArch` intrinsics
+
+mod lasx;
+mod lsx;
+
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub use self::lasx::*;
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub use self::lsx::*;
+
+use crate::arch::asm;
+
+/// Reads the 64-bit stable counter value and the counter ID
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn rdtime_d() -> (i64, isize) {
+    let val: i64;
+    let tid: isize;
+    asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
+    (val, tid)
+}
+
+/// Reads the lower 32-bit stable counter value and the counter ID
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn rdtimel_w() -> (i32, isize) {
+    let val: i32;
+    let tid: isize;
+    asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
+    (val, tid)
+}
+
+/// Reads the upper 32-bit stable counter value and the counter ID
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn rdtimeh_w() -> (i32, isize) {
+    let val: i32;
+    let tid: isize;
+    asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
+    (val, tid)
+}
+
+#[allow(improper_ctypes)]
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.loongarch.crc.w.b.w"]
+    fn __crc_w_b_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crc.w.h.w"]
+    fn __crc_w_h_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crc.w.w.w"]
+    fn __crc_w_w_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crc.w.d.w"]
+    fn __crc_w_d_w(a: i64, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crcc.w.b.w"]
+    fn __crcc_w_b_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crcc.w.h.w"]
+    fn __crcc_w_h_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crcc.w.w.w"]
+    fn __crcc_w_w_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crcc.w.d.w"]
+    fn __crcc_w_d_w(a: i64, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.cacop.d"]
+    fn __cacop(a: i64, b: i64, c: i64);
+    #[link_name = "llvm.loongarch.dbar"]
+    fn __dbar(a: i32);
+    #[link_name = "llvm.loongarch.ibar"]
+    fn __ibar(a: i32);
+    #[link_name = "llvm.loongarch.movgr2fcsr"]
+    fn __movgr2fcsr(a: i32, b: i32);
+    #[link_name = "llvm.loongarch.movfcsr2gr"]
+    fn __movfcsr2gr(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.csrrd.d"]
+    fn __csrrd(a: i32) -> i64;
+    #[link_name = "llvm.loongarch.csrwr.d"]
+    fn __csrwr(a: i64, b: i32) -> i64;
+    #[link_name = "llvm.loongarch.csrxchg.d"]
+    fn __csrxchg(a: i64, b: i64, c: i32) -> i64;
+    #[link_name = "llvm.loongarch.iocsrrd.b"]
+    fn __iocsrrd_b(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.iocsrrd.h"]
+    fn __iocsrrd_h(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.iocsrrd.w"]
+    fn __iocsrrd_w(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.iocsrrd.d"]
+    fn __iocsrrd_d(a: i32) -> i64;
+    #[link_name = "llvm.loongarch.iocsrwr.b"]
+    fn __iocsrwr_b(a: i32, b: i32);
+    #[link_name = "llvm.loongarch.iocsrwr.h"]
+    fn __iocsrwr_h(a: i32, b: i32);
+    #[link_name = "llvm.loongarch.iocsrwr.w"]
+    fn __iocsrwr_w(a: i32, b: i32);
+    #[link_name = "llvm.loongarch.iocsrwr.d"]
+    fn __iocsrwr_d(a: i64, b: i32);
+    #[link_name = "llvm.loongarch.break"]
+    fn __break(a: i32);
+    #[link_name = "llvm.loongarch.cpucfg"]
+    fn __cpucfg(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.syscall"]
+    fn __syscall(a: i32);
+    #[link_name = "llvm.loongarch.asrtle.d"]
+    fn __asrtle(a: i64, b: i64);
+    #[link_name = "llvm.loongarch.asrtgt.d"]
+    fn __asrtgt(a: i64, b: i64);
+    #[link_name = "llvm.loongarch.lddir.d"]
+    fn __lddir(a: i64, b: i64) -> i64;
+    #[link_name = "llvm.loongarch.ldpte.d"]
+    fn __ldpte(a: i64, b: i64);
+    #[link_name = "llvm.loongarch.frecipe.s"]
+    fn __frecipe_s(a: f32) -> f32;
+    #[link_name = "llvm.loongarch.frecipe.d"]
+    fn __frecipe_d(a: f64) -> f64;
+    #[link_name = "llvm.loongarch.frsqrte.s"]
+    fn __frsqrte_s(a: f32) -> f32;
+    #[link_name = "llvm.loongarch.frsqrte.d"]
+    fn __frsqrte_d(a: f64) -> f64;
+}
+
+/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn crc_w_b_w(a: i32, b: i32) -> i32 {
+    __crc_w_b_w(a, b)
+}
+
+/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn crc_w_h_w(a: i32, b: i32) -> i32 {
+    __crc_w_h_w(a, b)
+}
+
+/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn crc_w_w_w(a: i32, b: i32) -> i32 {
+    __crc_w_w_w(a, b)
+}
+
+/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn crc_w_d_w(a: i64, b: i32) -> i32 {
+    __crc_w_d_w(a, b)
+}
+
+/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn crcc_w_b_w(a: i32, b: i32) -> i32 {
+    __crcc_w_b_w(a, b)
+}
+
+/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn crcc_w_h_w(a: i32, b: i32) -> i32 {
+    __crcc_w_h_w(a, b)
+}
+
+/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn crcc_w_w_w(a: i32, b: i32) -> i32 {
+    __crcc_w_w_w(a, b)
+}
+
+/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn crcc_w_d_w(a: i64, b: i32) -> i32 {
+    __crcc_w_d_w(a, b)
+}
+
+/// Generates the cache operation instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn cacop<const IMM12: i64>(a: i64, b: i64) {
+    static_assert_simm_bits!(IMM12, 12);
+    __cacop(a, b, IMM12);
+}
+
+/// Generates the memory barrier instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn dbar<const IMM15: i32>() {
+    static_assert_uimm_bits!(IMM15, 15);
+    __dbar(IMM15);
+}
+
+/// Generates the instruction-fetch barrier instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn ibar<const IMM15: i32>() {
+    static_assert_uimm_bits!(IMM15, 15);
+    __ibar(IMM15);
+}
+
+/// Moves data from a GPR to the FCSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
+    static_assert_uimm_bits!(IMM5, 5);
+    __movgr2fcsr(IMM5, a);
+}
+
+/// Moves data from a FCSR to the GPR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn movfcsr2gr<const IMM5: i32>() -> i32 {
+    static_assert_uimm_bits!(IMM5, 5);
+    __movfcsr2gr(IMM5)
+}
+
+/// Reads the CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn csrrd<const IMM14: i32>() -> i64 {
+    static_assert_uimm_bits!(IMM14, 14);
+    __csrrd(IMM14)
+}
+
+/// Writes the CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn csrwr<const IMM14: i32>(a: i64) -> i64 {
+    static_assert_uimm_bits!(IMM14, 14);
+    __csrwr(a, IMM14)
+}
+
+/// Exchanges the CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn csrxchg<const IMM14: i32>(a: i64, b: i64) -> i64 {
+    static_assert_uimm_bits!(IMM14, 14);
+    __csrxchg(a, b, IMM14)
+}
+
+/// Reads the 8-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrrd_b(a: i32) -> i32 {
+    __iocsrrd_b(a)
+}
+
+/// Reads the 16-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrrd_h(a: i32) -> i32 {
+    __iocsrrd_h(a)
+}
+
+/// Reads the 32-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrrd_w(a: i32) -> i32 {
+    __iocsrrd_w(a)
+}
+
+/// Reads the 64-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrrd_d(a: i32) -> i64 {
+    __iocsrrd_d(a)
+}
+
+/// Writes the 8-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrwr_b(a: i32, b: i32) {
+    __iocsrwr_b(a, b)
+}
+
+/// Writes the 16-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrwr_h(a: i32, b: i32) {
+    __iocsrwr_h(a, b)
+}
+
+/// Writes the 32-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrwr_w(a: i32, b: i32) {
+    __iocsrwr_w(a, b)
+}
+
+/// Writes the 64-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrwr_d(a: i64, b: i32) {
+    __iocsrwr_d(a, b)
+}
+
+/// Generates the breakpoint instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn brk<const IMM15: i32>() {
+    static_assert_uimm_bits!(IMM15, 15);
+    __break(IMM15);
+}
+
+/// Reads the CPU configuration register
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn cpucfg(a: i32) -> i32 {
+    __cpucfg(a)
+}
+
+/// Generates the syscall instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn syscall<const IMM15: i32>() {
+    static_assert_uimm_bits!(IMM15, 15);
+    __syscall(IMM15);
+}
+
+/// Generates the less-than-or-equal asseration instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn asrtle(a: i64, b: i64) {
+    __asrtle(a, b);
+}
+
+/// Generates the greater-than asseration instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn asrtgt(a: i64, b: i64) {
+    __asrtgt(a, b);
+}
+
+/// Loads the page table directory entry
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn lddir<const B: i64>(a: i64) -> i64 {
+    __lddir(a, B)
+}
+
+/// Loads the page table entry
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn ldpte<const B: i64>(a: i64) {
+    __ldpte(a, B)
+}
+
+/// Calculate the approximate single-precision result of 1.0 divided
+#[inline]
+#[target_feature(enable = "frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn frecipe_s(a: f32) -> f32 {
+    __frecipe_s(a)
+}
+
+/// Calculate the approximate double-precision result of 1.0 divided
+#[inline]
+#[target_feature(enable = "frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn frecipe_d(a: f64) -> f64 {
+    __frecipe_d(a)
+}
+
+/// Calculate the approximate single-precision result of dividing 1.0 by the square root
+#[inline]
+#[target_feature(enable = "frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn frsqrte_s(a: f32) -> f32 {
+    __frsqrte_s(a)
+}
+
+/// Calculate the approximate double-precision result of dividing 1.0 by the square root
+#[inline]
+#[target_feature(enable = "frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn frsqrte_d(a: f64) -> f64 {
+    __frsqrte_d(a)
+}
--- a/library/stdarch/crates/core_arch/src/macros.rs
+++ b/library/stdarch/crates/core_arch/src/macros.rs
@ -0,0 +1,165 @@
+//! Utility macros.
+
+#[allow(unused)]
+macro_rules! static_assert {
+    ($e:expr) => {
+        const {
+            assert!($e);
+        }
+    };
+    ($e:expr, $msg:expr) => {
+        const {
+            assert!($e, $msg);
+        }
+    };
+}
+
+#[allow(unused_macros)]
+macro_rules! static_assert_uimm_bits {
+    ($imm:ident, $bits:expr) => {
+        // `0 <= $imm` produces a warning if the immediate has an unsigned type
+        #[allow(unused_comparisons)]
+        {
+            static_assert!(
+                0 <= $imm && $imm < (1 << $bits),
+                concat!(
+                    stringify!($imm),
+                    " doesn't fit in ",
+                    stringify!($bits),
+                    " bits",
+                )
+            )
+        }
+    };
+}
+
+#[allow(unused_macros)]
+macro_rules! static_assert_simm_bits {
+    ($imm:ident, $bits:expr) => {
+        static_assert!(
+            (-1 << ($bits - 1)) - 1 <= $imm && $imm < (1 << ($bits - 1)),
+            concat!(
+                stringify!($imm),
+                " doesn't fit in ",
+                stringify!($bits),
+                " bits",
+            )
+        )
+    };
+}
+
+#[allow(unused)]
+macro_rules! types {
+    (
+        #![$stability_first:meta]
+        $(
+            #![$stability_more:meta]
+        )*
+
+        $(
+            $(#[$doc:meta])*
+            $(stability: [$stability_already: meta])*
+            pub struct $name:ident($len:literal x $v:vis $elem_type:ty);
+        )*
+    ) => (types! {
+        $(
+            #![$stability_more]
+        )*
+
+        $(
+            $(#[$doc])*
+            $(stability: [$stability_already])*
+            stability: [$stability_first]
+            pub struct $name($len x $v $elem_type);
+        )*
+    });
+
+    (
+        $(
+            $(#[$doc:meta])*
+            $(stability: [$stability: meta])+
+            pub struct $name:ident($len:literal x $v:vis $elem_type:ty);
+        )*
+    ) => ($(
+        $(#[$doc])*
+        $(#[$stability])+
+        #[derive(Copy, Clone)]
+        #[allow(non_camel_case_types)]
+        #[repr(simd)]
+        #[allow(clippy::missing_inline_in_public_items)]
+        pub struct $name($v [$elem_type; $len]);
+
+        impl $name {
+            /// Using `my_simd([x; N])` seemingly fails tests,
+            /// so use this internal helper for it instead.
+            #[inline(always)]
+            $v fn splat(value: $elem_type) -> $name {
+                #[derive(Copy, Clone)]
+                #[repr(simd)]
+                struct JustOne([$elem_type; 1]);
+                let one = JustOne([value]);
+                // SAFETY: 0 is always in-bounds because we're shuffling
+                // a simd type with exactly one element.
+                unsafe { simd_shuffle!(one, one, [0; $len]) }
+            }
+
+            /// Returns an array reference containing the entire SIMD vector.
+            $v const fn as_array(&self) -> &[$elem_type; $len] {
+                // SAFETY: this type is just an overaligned `[T; N]` with
+                // potential padding at the end, so pointer casting to a
+                // `&[T; N]` is safe.
+                //
+                // NOTE: This deliberately doesn't just use `&self.0` because it may soon be banned
+                // see https://github.com/rust-lang/compiler-team/issues/838
+                unsafe { &*(self as *const Self as *const [$elem_type; $len]) }
+
+            }
+
+            /// Returns a mutable array reference containing the entire SIMD vector.
+            #[inline]
+            $v fn as_mut_array(&mut self) -> &mut [$elem_type; $len] {
+                // SAFETY: this type is just an overaligned `[T; N]` with
+                // potential padding at the end, so pointer casting to a
+                // `&mut [T; N]` is safe.
+                //
+                // NOTE: This deliberately doesn't just use `&mut self.0` because it may soon be banned
+                // see https://github.com/rust-lang/compiler-team/issues/838
+                unsafe { &mut *(self as *mut Self as *mut [$elem_type; $len]) }
+            }
+        }
+
+        $(#[$stability])+
+        impl crate::fmt::Debug for $name {
+            #[inline]
+            fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+                crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
+            }
+        }
+    )*);
+}
+
+#[allow(unused)]
+#[repr(simd)]
+pub(crate) struct SimdShuffleIdx<const LEN: usize>(pub(crate) [u32; LEN]);
+
+#[allow(unused)]
+macro_rules! simd_shuffle {
+    ($x:expr, $y:expr, $idx:expr $(,)?) => {{
+        $crate::intrinsics::simd::simd_shuffle(
+            $x,
+            $y,
+            const { $crate::core_arch::macros::SimdShuffleIdx($idx) },
+        )
+    }};
+}
+
+#[allow(unused)]
+macro_rules! simd_insert {
+    ($x:expr, $idx:expr, $val:expr $(,)?) => {{ $crate::intrinsics::simd::simd_insert($x, const { $idx }, $val) }};
+}
+
+#[allow(unused)]
+macro_rules! simd_extract {
+    ($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }};
+    ($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }};
+}
--- a/library/stdarch/crates/core_arch/src/mips/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mips/mod.rs
@ -0,0 +1,20 @@
+//! MIPS
+
+// Building this module (even if unused) for non-fp64 targets fails with an LLVM
+// error.
+#[cfg(target_feature = "fp64")]
+mod msa;
+#[cfg(target_feature = "fp64")]
+#[unstable(feature = "stdarch_mips", issue = "111198")]
+pub use self::msa::*;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Generates the trap instruction `BREAK`
+#[cfg_attr(test, assert_instr(break))]
+#[inline]
+#[unstable(feature = "stdarch_mips", issue = "111198")]
+pub unsafe fn break_() -> ! {
+    crate::intrinsics::abort()
+}
--- a/library/stdarch/crates/core_arch/src/mips/msa.rs
+++ b/library/stdarch/crates/core_arch/src/mips/msa.rs
--- a/library/stdarch/crates/core_arch/src/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mod.rs
@ -0,0 +1,343 @@
+//! `core_arch`
+
+#![allow(unknown_lints, unnecessary_transmutes)]
+
+#[macro_use]
+mod macros;
+
+#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
+mod riscv_shared;
+
+#[cfg(any(
+    target_arch = "arm",
+    target_arch = "aarch64",
+    target_arch = "arm64ec",
+    doc
+))]
+mod arm_shared;
+
+mod simd;
+
+#[doc = include_str!("core_arch_docs.md")]
+#[stable(feature = "simd_arch", since = "1.27.0")]
+pub mod arch {
+    /// Platform-specific intrinsics for the `x86` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "x86", doc))]
+    #[doc(cfg(target_arch = "x86"))]
+    #[stable(feature = "simd_x86", since = "1.27.0")]
+    pub mod x86 {
+        #[stable(feature = "simd_x86", since = "1.27.0")]
+        pub use crate::core_arch::x86::*;
+    }
+
+    /// Platform-specific intrinsics for the `x86_64` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "x86_64", doc))]
+    #[doc(cfg(target_arch = "x86_64"))]
+    #[stable(feature = "simd_x86", since = "1.27.0")]
+    pub mod x86_64 {
+        #[stable(feature = "simd_x86", since = "1.27.0")]
+        pub use crate::core_arch::x86::*;
+        #[stable(feature = "simd_x86", since = "1.27.0")]
+        pub use crate::core_arch::x86_64::*;
+    }
+
+    /// Platform-specific intrinsics for the `arm` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "arm", doc))]
+    #[doc(cfg(target_arch = "arm"))]
+    #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+    pub mod arm {
+        #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+        pub use crate::core_arch::arm::*;
+    }
+
+    /// Platform-specific intrinsics for the `aarch64` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))]
+    #[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))]
+    #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+    pub mod aarch64 {
+        #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+        pub use crate::core_arch::aarch64::*;
+    }
+
+    /// Platform-specific intrinsics for the `riscv32` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "riscv32", doc))]
+    #[doc(cfg(any(target_arch = "riscv32")))]
+    #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+    pub mod riscv32 {
+        pub use crate::core_arch::riscv_shared::*;
+        pub use crate::core_arch::riscv32::*;
+    }
+
+    /// Platform-specific intrinsics for the `riscv64` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "riscv64", doc))]
+    #[doc(cfg(any(target_arch = "riscv64")))]
+    #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+    pub mod riscv64 {
+        pub use crate::core_arch::riscv64::*;
+        // RISC-V RV64 supports all RV32 instructions as well in current specifications (2022-01-05).
+        // Module `riscv_shared` includes instructions available under all RISC-V platforms,
+        // i.e. RISC-V RV32 instructions.
+        pub use crate::core_arch::riscv_shared::*;
+    }
+
+    /// Platform-specific intrinsics for the `wasm32` platform.
+    ///
+    /// This module provides intrinsics specific to the WebAssembly
+    /// architecture. Here you'll find intrinsics specific to WebAssembly that
+    /// aren't otherwise surfaced somewhere in a cross-platform abstraction of
+    /// `std`, and you'll also find functions for leveraging WebAssembly
+    /// proposals such as [atomics] and [simd].
+    ///
+    /// Intrinsics in the `wasm32` module are modeled after the WebAssembly
+    /// instructions that they represent. Most functions are named after the
+    /// instruction they intend to correspond to, and the arguments/results
+    /// correspond to the type signature of the instruction itself. Stable
+    /// WebAssembly instructions are [documented online][instrdoc].
+    ///
+    /// [instrdoc]: https://webassembly.github.io/spec/core/valid/instructions.html
+    ///
+    /// If a proposal is not yet stable in WebAssembly itself then the functions
+    /// within this function may be unstable and require the nightly channel of
+    /// Rust to use. As the proposal itself stabilizes the intrinsics in this
+    /// module should stabilize as well.
+    ///
+    /// [atomics]: https://github.com/webassembly/threads
+    /// [simd]: https://github.com/webassembly/simd
+    ///
+    /// See the [module documentation](../index.html) for general information
+    /// about the `arch` module and platform intrinsics.
+    ///
+    /// ## Atomics
+    ///
+    /// The [threads proposal][atomics] for WebAssembly adds a number of
+    /// instructions for dealing with multithreaded programs. Most instructions
+    /// added in the [atomics] proposal are exposed in Rust through the
+    /// `std::sync::atomic` module. Some instructions, however, don't have
+    /// direct equivalents in Rust so they're exposed here instead.
+    ///
+    /// Note that the instructions added in the [atomics] proposal can work in
+    /// either a context with a shared wasm memory and without. These intrinsics
+    /// are always available in the standard library, but you likely won't be
+    /// able to use them too productively unless you recompile the standard
+    /// library (and all your code) with `-Ctarget-feature=+atomics`.
+    ///
+    /// It's also worth pointing out that multi-threaded WebAssembly and its
+    /// story in Rust is still in a somewhat "early days" phase as of the time
+    /// of this writing. Pieces should mostly work but it generally requires a
+    /// good deal of manual setup. At this time it's not as simple as "just call
+    /// `std::thread::spawn`", but it will hopefully get there one day!
+    ///
+    /// ## SIMD
+    ///
+    /// The [simd proposal][simd] for WebAssembly added a new `v128` type for a
+    /// 128-bit SIMD register. It also added a large array of instructions to
+    /// operate on the `v128` type to perform data processing. Using SIMD on
+    /// wasm is intended to be similar to as you would on `x86_64`, for example.
+    /// You'd write a function such as:
+    ///
+    /// ```rust,ignore
+    /// #[cfg(target_arch = "wasm32")]
+    /// #[target_feature(enable = "simd128")]
+    /// unsafe fn uses_simd() {
+    ///     use std::arch::wasm32::*;
+    ///     // ...
+    /// }
+    /// ```
+    ///
+    /// Unlike `x86_64`, however, WebAssembly does not currently have dynamic
+    /// detection at runtime as to whether SIMD is supported (this is one of the
+    /// motivators for the [conditional sections][condsections] and [feature
+    /// detection] proposals, but that is still pretty early days). This means
+    /// that your binary will either have SIMD and can only run on engines
+    /// which support SIMD, or it will not have SIMD at all. For compatibility
+    /// the standard library itself does not use any SIMD internally.
+    /// Determining how best to ship your WebAssembly binary with SIMD is
+    /// largely left up to you as it can be pretty nuanced depending on
+    /// your situation.
+    ///
+    /// [condsections]: https://github.com/webassembly/conditional-sections
+    /// [feature detection]: https://github.com/WebAssembly/feature-detection
+    ///
+    /// To enable SIMD support at compile time you need to do one of two things:
+    ///
+    /// * First you can annotate functions with `#[target_feature(enable =
+    ///   "simd128")]`. This causes just that one function to have SIMD support
+    ///   available to it, and intrinsics will get inlined as usual in this
+    ///   situation.
+    ///
+    /// * Second you can compile your program with `-Ctarget-feature=+simd128`.
+    ///   This compilation flag blanket enables SIMD support for your entire
+    ///   compilation. Note that this does not include the standard library
+    ///   unless you [recompile the standard library][buildstd].
+    ///
+    /// [buildstd]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#build-std
+    ///
+    /// If you enable SIMD via either of these routes then you'll have a
+    /// WebAssembly binary that uses SIMD instructions, and you'll need to ship
+    /// that accordingly. Also note that if you call SIMD intrinsics but don't
+    /// enable SIMD via either of these mechanisms, you'll still have SIMD
+    /// generated in your program. This means to generate a binary without SIMD
+    /// you'll need to avoid both options above plus calling into any intrinsics
+    /// in this module.
+    #[cfg(any(target_arch = "wasm32", doc))]
+    #[doc(cfg(target_arch = "wasm32"))]
+    #[stable(feature = "simd_wasm32", since = "1.33.0")]
+    pub mod wasm32 {
+        #[stable(feature = "simd_wasm32", since = "1.33.0")]
+        pub use crate::core_arch::wasm32::*;
+    }
+
+    /// Platform-specific intrinsics for the `wasm64` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "wasm64", doc))]
+    #[doc(cfg(target_arch = "wasm64"))]
+    #[unstable(feature = "simd_wasm64", issue = "90599")]
+    pub mod wasm64 {
+        #[unstable(feature = "simd_wasm64", issue = "90599")]
+        pub use crate::core_arch::wasm32::*;
+    }
+
+    /// Platform-specific intrinsics for the `wasm` target family.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_family = "wasm", doc))]
+    #[doc(cfg(target_family = "wasm"))]
+    #[unstable(feature = "simd_wasm64", issue = "90599")]
+    pub mod wasm {
+        #[unstable(feature = "simd_wasm64", issue = "90599")]
+        pub use crate::core_arch::wasm32::*;
+    }
+
+    /// Platform-specific intrinsics for the `mips` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "mips", doc))]
+    #[doc(cfg(target_arch = "mips"))]
+    #[unstable(feature = "stdarch_mips", issue = "111198")]
+    pub mod mips {
+        pub use crate::core_arch::mips::*;
+    }
+
+    /// Platform-specific intrinsics for the `mips64` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "mips64", doc))]
+    #[doc(cfg(target_arch = "mips64"))]
+    #[unstable(feature = "stdarch_mips", issue = "111198")]
+    pub mod mips64 {
+        pub use crate::core_arch::mips::*;
+    }
+
+    /// Platform-specific intrinsics for the `PowerPC` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "powerpc", doc))]
+    #[doc(cfg(target_arch = "powerpc"))]
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub mod powerpc {
+        pub use crate::core_arch::powerpc::*;
+    }
+
+    /// Platform-specific intrinsics for the `PowerPC64` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "powerpc64", doc))]
+    #[doc(cfg(target_arch = "powerpc64"))]
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub mod powerpc64 {
+        pub use crate::core_arch::powerpc64::*;
+    }
+
+    /// Platform-specific intrinsics for the `NVPTX` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "nvptx64", doc))]
+    #[doc(cfg(target_arch = "nvptx64"))]
+    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
+    pub mod nvptx {
+        pub use crate::core_arch::nvptx::*;
+    }
+
+    /// Platform-specific intrinsics for the `loongarch` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "loongarch64", doc))]
+    #[doc(cfg(target_arch = "loongarch64"))]
+    #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+    pub mod loongarch64 {
+        pub use crate::core_arch::loongarch64::*;
+    }
+
+    /// Platform-specific intrinsics for the `s390x` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "s390x", doc))]
+    #[doc(cfg(target_arch = "s390x"))]
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub mod s390x {
+        pub use crate::core_arch::s390x::*;
+    }
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))]
+#[doc(cfg(any(target_arch = "x86", target_arch = "x86_64")))]
+mod x86;
+#[cfg(any(target_arch = "x86_64", doc))]
+#[doc(cfg(target_arch = "x86_64"))]
+mod x86_64;
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", doc))]
+#[doc(cfg(any(target_arch = "aarch64", target_arch = "arm64ec")))]
+mod aarch64;
+#[cfg(any(target_arch = "arm", doc))]
+#[doc(cfg(any(target_arch = "arm")))]
+mod arm;
+
+#[cfg(any(target_arch = "riscv32", doc))]
+#[doc(cfg(any(target_arch = "riscv32")))]
+mod riscv32;
+
+#[cfg(any(target_arch = "riscv64", doc))]
+#[doc(cfg(any(target_arch = "riscv64")))]
+mod riscv64;
+
+#[cfg(any(target_family = "wasm", doc))]
+#[doc(cfg(target_family = "wasm"))]
+mod wasm32;
+
+#[cfg(any(target_arch = "mips", target_arch = "mips64", doc))]
+#[doc(cfg(any(target_arch = "mips", target_arch = "mips64")))]
+mod mips;
+
+#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64", doc))]
+#[doc(cfg(any(target_arch = "powerpc", target_arch = "powerpc64")))]
+mod powerpc;
+
+#[cfg(any(target_arch = "powerpc64", doc))]
+#[doc(cfg(target_arch = "powerpc64"))]
+mod powerpc64;
+
+#[cfg(any(target_arch = "nvptx64", doc))]
+#[doc(cfg(target_arch = "nvptx64"))]
+mod nvptx;
+
+#[cfg(any(target_arch = "loongarch64", doc))]
+#[doc(cfg(target_arch = "loongarch64"))]
+mod loongarch64;
+
+#[cfg(any(target_arch = "s390x", doc))]
+#[doc(cfg(target_arch = "s390x"))]
+mod s390x;
--- a/library/stdarch/crates/core_arch/src/nvptx/mod.rs
+++ b/library/stdarch/crates/core_arch/src/nvptx/mod.rs
@ -0,0 +1,236 @@
+//! NVPTX intrinsics (experimental)
+//!
+//! These intrinsics form the foundation of the CUDA
+//! programming model.
+//!
+//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also
+//! the [LLVM NVPTX Backend documentation][llvm_docs].
+//!
+//! [cuda_c]:
+//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
+//! [llvm_docs]:
+//! https://llvm.org/docs/NVPTXUsage.html
+
+use crate::ffi::c_void;
+
+mod packed;
+
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub use packed::*;
+
+#[allow(improper_ctypes)]
+unsafe extern "C" {
+    #[link_name = "llvm.nvvm.barrier0"]
+    fn syncthreads() -> ();
+    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
+    fn block_dim_x() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
+    fn block_dim_y() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
+    fn block_dim_z() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
+    fn block_idx_x() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
+    fn block_idx_y() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
+    fn block_idx_z() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
+    fn grid_dim_x() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
+    fn grid_dim_y() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
+    fn grid_dim_z() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
+    fn thread_idx_x() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
+    fn thread_idx_y() -> i32;
+    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
+    fn thread_idx_z() -> i32;
+}
+
+/// Synchronizes all threads in the block.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _syncthreads() -> () {
+    syncthreads()
+}
+
+/// x-th thread-block dimension.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _block_dim_x() -> i32 {
+    block_dim_x()
+}
+
+/// y-th thread-block dimension.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _block_dim_y() -> i32 {
+    block_dim_y()
+}
+
+/// z-th thread-block dimension.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _block_dim_z() -> i32 {
+    block_dim_z()
+}
+
+/// x-th thread-block index.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _block_idx_x() -> i32 {
+    block_idx_x()
+}
+
+/// y-th thread-block index.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _block_idx_y() -> i32 {
+    block_idx_y()
+}
+
+/// z-th thread-block index.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _block_idx_z() -> i32 {
+    block_idx_z()
+}
+
+/// x-th block-grid dimension.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _grid_dim_x() -> i32 {
+    grid_dim_x()
+}
+
+/// y-th block-grid dimension.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _grid_dim_y() -> i32 {
+    grid_dim_y()
+}
+
+/// z-th block-grid dimension.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _grid_dim_z() -> i32 {
+    grid_dim_z()
+}
+
+/// x-th thread index.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _thread_idx_x() -> i32 {
+    thread_idx_x()
+}
+
+/// y-th thread index.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _thread_idx_y() -> i32 {
+    thread_idx_y()
+}
+
+/// z-th thread index.
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn _thread_idx_z() -> i32 {
+    thread_idx_z()
+}
+
+/// Generates the trap instruction `TRAP`
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn trap() -> ! {
+    crate::intrinsics::abort()
+}
+
+// Basic CUDA syscall declarations.
+unsafe extern "C" {
+    /// Print formatted output from a kernel to a host-side output stream.
+    ///
+    /// Syscall arguments:
+    /// * `status`: The status value that is returned by `vprintf`.
+    /// * `format`: A pointer to the format specifier input (uses common `printf` format).
+    /// * `valist`: A pointer to the valist input.
+    ///
+    /// ```
+    /// #[repr(C)]
+    /// struct PrintArgs(f32, f32, f32, i32);
+    ///
+    /// vprintf(
+    ///     "int(%f + %f) = int(%f) = %d\n".as_ptr(),
+    ///     transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
+    /// );
+    /// ```
+    ///
+    /// Sources:
+    /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
+    /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
+    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
+    pub fn vprintf(format: *const u8, valist: *const c_void) -> i32;
+
+    /// Allocate memory dynamically from a fixed-size heap in global memory.
+    ///
+    /// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
+    /// from the device heap and returns a pointer to the allocated memory
+    /// or `NULL` if insufficient memory exists to fulfill the request.
+    ///
+    /// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
+    ///
+    /// The memory allocated by a given CUDA thread via `malloc()` remains allocated
+    /// for the lifetime of the CUDA context, or until it is explicitly released
+    /// by a call to `free()`. It can be used by any other CUDA threads
+    /// even from subsequent kernel launches.
+    ///
+    /// Sources:
+    /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
+    /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
+    // FIXME(denzp): assign `malloc` and `nothrow` attributes.
+    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
+    pub fn malloc(size: usize) -> *mut c_void;
+
+    /// Free previously dynamically allocated memory.
+    ///
+    /// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
+    /// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
+    /// the call to `free()` is ignored.
+    ///
+    /// Any CUDA thread may free memory allocated by another thread, but care should be taken
+    /// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
+    /// with the same `ptr` has undefined behavior.
+    ///
+    /// Sources:
+    /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
+    /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
+    // FIXME(denzp): assign `nothrow` attribute.
+    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
+    pub fn free(ptr: *mut c_void);
+
+    // Internal declaration of the syscall. Exported variant has
+    // the `char_size` parameter set to `1` (single char size in bytes).
+    fn __assertfail(
+        message: *const u8,
+        file: *const u8,
+        line: u32,
+        function: *const u8,
+        char_size: usize,
+    );
+}
+
+/// Syscall to be used whenever the *assert expression produces a `false` value*.
+///
+/// Syscall arguments:
+/// * `message`: The pointer to the string that should be output.
+/// * `file`: The pointer to the file name string associated with the assert.
+/// * `line`: The line number associated with the assert.
+/// * `function`: The pointer to the function name string associated with the assert.
+///
+/// Source:
+/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
+#[inline]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) {
+    __assertfail(message, file, line, function, 1)
+}
--- a/library/stdarch/crates/core_arch/src/nvptx/packed.rs
+++ b/library/stdarch/crates/core_arch/src/nvptx/packed.rs
@ -0,0 +1,139 @@
+//! NVPTX Packed data types (SIMD)
+//!
+//! Packed Data Types is what PTX calls SIMD types. See [PTX ISA (Packed Data Types)](https://docs.nvidia.com/cuda/parallel-thread-execution/#packed-data-types) for a full reference.
+
+// Note: #[assert_instr] tests are not actually being run on nvptx due to being a `no_std` target incapable of running tests. Something like FileCheck would be appropriate for verifying the correct instruction is used.
+
+use crate::intrinsics::simd::*;
+
+#[allow(improper_ctypes)]
+unsafe extern "C" {
+    #[link_name = "llvm.minimum.v2f16"]
+    fn llvm_f16x2_minimum(a: f16x2, b: f16x2) -> f16x2;
+    #[link_name = "llvm.maximum.v2f16"]
+    fn llvm_f16x2_maximum(a: f16x2, b: f16x2) -> f16x2;
+}
+
+types! {
+    #![unstable(feature = "stdarch_nvptx", issue = "111199")]
+
+    /// PTX-specific 32-bit wide floating point (f16 x 2) vector type
+    pub struct f16x2(2 x f16);
+
+}
+
+/// Add two values, round to nearest even
+///
+/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add>
+///
+/// Corresponds to the CUDA C intrinsics:
+///  - [`__hadd2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g921c795176eaa31265bd80ef4fe4b8e6)
+///  - [`__hadd2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g6cd8ddb2c3d670e1a10c3eb2e7644f82)
+#[inline]
+#[cfg_attr(test, assert_instr(add.rn.f16x22))]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn f16x2_add(a: f16x2, b: f16x2) -> f16x2 {
+    simd_add(a, b)
+}
+
+/// Subtract two values, round to nearest even
+///
+/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub>
+///
+/// Corresponds to the CUDA C intrinsics:
+///  - [`__hsub2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1ga5536c9c3d853d8c8b9de60e18b41e54)
+///  - [`__hsub2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g8adc164c68d553354f749f0f0645a874)
+#[inline]
+#[cfg_attr(test, assert_instr(sub.rn.f16x2))]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn f16x2_sub(a: f16x2, b: f16x2) -> f16x2 {
+    simd_sub(a, b)
+}
+
+/// Multiply two values, round to nearest even
+///
+/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-mul>
+///
+/// Corresponds to the CUDA C intrinsics:
+///  - [`__hmul2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g70de3f2ee48babe4e0969397ac17708e)
+///  - [`__hmul2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g99f8fe23a4b4c6898d6faf999afaa76e)
+#[inline]
+#[cfg_attr(test, assert_instr(mul.rn.f16x2))]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn f16x2_mul(a: f16x2, b: f16x2) -> f16x2 {
+    simd_mul(a, b)
+}
+
+/// Fused multiply-add, round to nearest even
+///
+/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-fma>
+///
+/// Corresponds to the CUDA C intrinsics:
+///  - [`__fma2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g43628ba21ded8b1e188a367348008dab)
+///  - [`__fma2_rn`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__ARITHMETIC.html#group__CUDA__MATH____HALF2__ARITHMETIC_1g43628ba21ded8b1e188a367348008dab)
+#[inline]
+#[cfg_attr(test, assert_instr(fma.rn.f16x2))]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn f16x2_fma(a: f16x2, b: f16x2, c: f16x2) -> f16x2 {
+    simd_fma(a, b, c)
+}
+
+/// Arithmetic negate
+///
+/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-neg>
+///
+/// Corresponds to the CUDA C intrinsic [`__hmin2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g9e17a33f96061804166f3fbd395422b6)
+#[inline]
+#[cfg_attr(test, assert_instr(neg.f16x2))]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn f16x2_neg(a: f16x2) -> f16x2 {
+    simd_neg(a)
+}
+
+/// Find the minimum of two values
+///
+/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-min>
+///
+/// Corresponds to the CUDA C intrinsic [`__hmin2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g9e17a33f96061804166f3fbd395422b6)
+#[inline]
+#[cfg_attr(test, assert_instr(min.f16x2))]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn f16x2_min(a: f16x2, b: f16x2) -> f16x2 {
+    simd_fmin(a, b)
+}
+
+/// Find the minimum of two values, NaNs pass through.
+///
+/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-min>
+///
+/// Corresponds to the CUDA C intrinsic [`__hmin2_nan`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g8bb8f58e9294cc261d2f42c4d5aecd6b)
+#[inline]
+#[cfg_attr(test, assert_instr(min.NaN.f16x2))]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn f16x2_min_nan(a: f16x2, b: f16x2) -> f16x2 {
+    llvm_f16x2_minimum(a, b)
+}
+
+/// Find the maximum of two values
+///
+/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-max>
+///
+/// Corresponds to the CUDA C intrinsic [`__hmax2`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g59fc7fc7975d8127b202444a05e57e3d)
+#[inline]
+#[cfg_attr(test, assert_instr(max.f16x2))]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn f16x2_max(a: f16x2, b: f16x2) -> f16x2 {
+    simd_fmax(a, b)
+}
+
+/// Find the maximum of two values, NaNs pass through.
+///
+/// <https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-max>
+///
+/// Corresponds to the CUDA C intrinsic [`__hmax2_nan`](https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH____HALF2__COMPARISON.html#group__CUDA__MATH____HALF2__COMPARISON_1g41623db7850e3074fd9daa80a14c3897)
+#[inline]
+#[cfg_attr(test, assert_instr(max.NaN.f16x2))]
+#[unstable(feature = "stdarch_nvptx", issue = "111199")]
+pub unsafe fn f16x2_max_nan(a: f16x2, b: f16x2) -> f16x2 {
+    llvm_f16x2_maximum(a, b)
+}
--- a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
--- a/library/stdarch/crates/core_arch/src/powerpc/macros.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/macros.rs
@ -0,0 +1,315 @@
+macro_rules! test_impl {
+    ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr:ident]) => {
+        #[inline]
+        #[target_feature(enable = "altivec")]
+        #[cfg_attr(test, assert_instr($instr))]
+        pub unsafe fn $fun ($($v : $ty),*) -> $r {
+            $call ($($v),*)
+        }
+    };
+    ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr_altivec:ident / $instr_vsx:ident]) => {
+        test_impl! { $fun ($($v : $ty),*) -> $r [$call, $instr_altivec / $instr_vsx / $instr_vsx] }
+    };
+    ($fun:ident ($($v:ident : $ty:ty),*) -> $r:ty [$call:ident, $instr_altivec:ident / $instr_vsx:ident / $instr_pwr9:ident]) => {
+        #[inline]
+        #[target_feature(enable = "altivec")]
+        #[cfg_attr(all(test, not(target_feature="vsx"), not(target_feature = "power9-vector")), assert_instr($instr_altivec))]
+        #[cfg_attr(all(test, target_feature="vsx", not(target_feature = "power9-vector")), assert_instr($instr_vsx))]
+        #[cfg_attr(all(test, not(target_feature="vsx"), target_feature = "power9-vector"), assert_instr($instr_pwr9))]
+        pub unsafe fn $fun ($($v : $ty),*) -> $r {
+            $call ($($v),*)
+        }
+    }
+}
+
+#[allow(unknown_lints, unused_macro_rules)]
+macro_rules! impl_vec_trait {
+    ([$Trait:ident $m:ident] $fun:ident ($a:ty)) => {
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        impl $Trait for $a {
+            #[inline]
+            #[target_feature(enable = "altivec")]
+            unsafe fn $m(self) -> Self {
+                $fun(transmute(self))
+            }
+        }
+    };
+    ([$Trait:ident $m:ident] $fun:ident ($a:ty) -> $r:ty) => {
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        impl $Trait for $a {
+            type Result = $r;
+            #[inline]
+            #[target_feature(enable = "altivec")]
+            unsafe fn $m(self) -> Self::Result {
+                $fun(transmute(self))
+            }
+        }
+    };
+    ([$Trait:ident $m:ident]+ $fun:ident ($a:ty) -> $r:ty) => {
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        impl $Trait for $a {
+            type Result = $r;
+            #[inline]
+            #[target_feature(enable = "altivec")]
+            unsafe fn $m(self) -> Self::Result {
+                transmute($fun(transmute(self)))
+            }
+        }
+    };
+    ([$Trait:ident $m:ident] 1 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident, $sf: ident)) => {
+        impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char) -> vector_unsigned_char }
+        impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char) -> vector_signed_char }
+        impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short) -> vector_unsigned_short }
+        impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short) -> vector_signed_short }
+        impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int) -> vector_unsigned_int }
+        impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int) -> vector_signed_int }
+        impl_vec_trait!{ [$Trait $m] $sf (vector_float) -> vector_float }
+    };
+    ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty) -> $r:ty) => {
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        impl $Trait<$b> for $a {
+            type Result = $r;
+            #[inline]
+            #[target_feature(enable = "altivec")]
+            unsafe fn $m(self, b: $b) -> Self::Result {
+                $fun(transmute(self), transmute(b))
+            }
+        }
+    };
+    ([$Trait:ident $m:ident]+ $fun:ident ($a:ty, $b:ty) -> $r:ty) => {
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        impl $Trait<$b> for $a {
+            type Result = $r;
+            #[inline]
+            #[target_feature(enable = "altivec")]
+            unsafe fn $m(self, b: $b) -> Self::Result {
+                transmute($fun(transmute(self), transmute(b)))
+            }
+        }
+    };
+    ([$Trait:ident $m:ident] $fun:ident ($a:ty, ~$b:ty) -> $r:ty) => {
+        impl_vec_trait!{ [$Trait $m] $fun ($a, $a) -> $r }
+        impl_vec_trait!{ [$Trait $m] $fun ($a, $b) -> $r }
+        impl_vec_trait!{ [$Trait $m] $fun ($b, $a) -> $r }
+    };
+    ([$Trait:ident $m:ident] ~($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => {
+        impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, ~vector_bool_char) -> vector_unsigned_char }
+        impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, ~vector_bool_char) -> vector_signed_char }
+        impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, ~vector_bool_short) -> vector_unsigned_short }
+        impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, ~vector_bool_short) -> vector_signed_short }
+        impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, ~vector_bool_int) -> vector_unsigned_int }
+        impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, ~vector_bool_int) -> vector_signed_int }
+    };
+    ([$Trait:ident $m:ident] ~($fn:ident)) => {
+        impl_vec_trait!{ [$Trait $m] ~($fn, $fn, $fn, $fn, $fn, $fn) }
+    };
+    ([$Trait:ident $m:ident] 2 ($ub:ident, $sb:ident, $uh:ident, $sh:ident, $uw:ident, $sw:ident)) => {
+        impl_vec_trait!{ [$Trait $m] $ub (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char }
+        impl_vec_trait!{ [$Trait $m] $sb (vector_signed_char, vector_signed_char) -> vector_signed_char }
+        impl_vec_trait!{ [$Trait $m] $uh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short }
+        impl_vec_trait!{ [$Trait $m] $sh (vector_signed_short, vector_signed_short) -> vector_signed_short }
+        impl_vec_trait!{ [$Trait $m] $uw (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int }
+        impl_vec_trait!{ [$Trait $m] $sw (vector_signed_int, vector_signed_int) -> vector_signed_int }
+    };
+    ([$Trait:ident $m:ident] 2 ($fn:ident)) => {
+        impl_vec_trait!{ [$Trait $m] ($fn, $fn, $fn, $fn, $fn, $fn) }
+    };
+    ([$Trait:ident $m:ident]+ 2b ($b:ident, $h:ident, $w:ident)) => {
+        impl_vec_trait!{ [$Trait $m]+ $b (vector_bool_char, vector_bool_char) -> vector_bool_char }
+        impl_vec_trait!{ [$Trait $m]+ $b (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char }
+        impl_vec_trait!{ [$Trait $m]+ $b (vector_signed_char, vector_signed_char) -> vector_signed_char }
+        impl_vec_trait!{ [$Trait $m]+ $h (vector_bool_short, vector_bool_short) -> vector_bool_short }
+        impl_vec_trait!{ [$Trait $m]+ $h (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short }
+        impl_vec_trait!{ [$Trait $m]+ $h (vector_signed_short, vector_signed_short) -> vector_signed_short }
+        impl_vec_trait!{ [$Trait $m]+ $w (vector_bool_int, vector_bool_int) -> vector_bool_int }
+        impl_vec_trait!{ [$Trait $m]+ $w (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int }
+        impl_vec_trait!{ [$Trait $m]+ $w (vector_signed_int, vector_signed_int) -> vector_signed_int }
+    };
+    ([$Trait:ident $m:ident]+ 2b ($fn:ident)) => {
+        impl_vec_trait!{ [$Trait $m]+ 2b ($fn, $fn, $fn) }
+    };
+}
+
+macro_rules! s_t_l {
+    (i32x4) => {
+        vector_signed_int
+    };
+    (i16x8) => {
+        vector_signed_short
+    };
+    (i8x16) => {
+        vector_signed_char
+    };
+
+    (u32x4) => {
+        vector_unsigned_int
+    };
+    (u16x8) => {
+        vector_unsigned_short
+    };
+    (u8x16) => {
+        vector_unsigned_char
+    };
+
+    (f32x4) => {
+        vector_float
+    };
+}
+
+macro_rules! t_t_l {
+    (i32) => {
+        vector_signed_int
+    };
+    (i16) => {
+        vector_signed_short
+    };
+    (i8) => {
+        vector_signed_char
+    };
+
+    (u32) => {
+        vector_unsigned_int
+    };
+    (u16) => {
+        vector_unsigned_short
+    };
+    (u8) => {
+        vector_unsigned_char
+    };
+
+    (f32) => {
+        vector_float
+    };
+}
+
+macro_rules! t_t_s {
+    (i32) => {
+        i32x4
+    };
+    (i16) => {
+        i16x8
+    };
+    (i8) => {
+        i8x16
+    };
+
+    (u32) => {
+        u32x4
+    };
+    (u16) => {
+        u16x8
+    };
+    (u8) => {
+        u8x16
+    };
+
+    (f32) => {
+        f32x4
+    };
+}
+
+macro_rules! t_u {
+    (vector_bool_char) => {
+        vector_unsigned_char
+    };
+    (vector_bool_short) => {
+        vector_unsigned_short
+    };
+    (vector_bool_int) => {
+        vector_unsigned_int
+    };
+    (vector_unsigned_char) => {
+        vector_unsigned_char
+    };
+    (vector_unsigned_short) => {
+        vector_unsigned_short
+    };
+    (vector_unsigned_int) => {
+        vector_unsigned_int
+    };
+    (vector_signed_char) => {
+        vector_unsigned_char
+    };
+    (vector_signed_short) => {
+        vector_unsigned_short
+    };
+    (vector_signed_int) => {
+        vector_unsigned_int
+    };
+    (vector_float) => {
+        vector_unsigned_int
+    };
+}
+
+macro_rules! t_b {
+    (vector_bool_char) => {
+        vector_bool_char
+    };
+    (vector_bool_short) => {
+        vector_bool_short
+    };
+    (vector_bool_int) => {
+        vector_bool_int
+    };
+    (vector_signed_char) => {
+        vector_bool_char
+    };
+    (vector_signed_short) => {
+        vector_bool_short
+    };
+    (vector_signed_int) => {
+        vector_bool_int
+    };
+    (vector_unsigned_char) => {
+        vector_bool_char
+    };
+    (vector_unsigned_short) => {
+        vector_bool_short
+    };
+    (vector_unsigned_int) => {
+        vector_bool_int
+    };
+    (vector_float) => {
+        vector_bool_int
+    };
+}
+
+macro_rules! impl_from {
+    ($s: ident) => {
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        impl From<$s> for s_t_l!($s) {
+            fn from (v: $s) -> Self {
+                unsafe {
+                    transmute(v)
+                }
+            }
+        }
+    };
+    ($($s: ident),*) => {
+        $(
+            impl_from! { $s }
+        )*
+    };
+}
+
+macro_rules! impl_neg {
+    ($s: ident : $zero: expr) => {
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        impl crate::ops::Neg for s_t_l!($s) {
+            type Output = s_t_l!($s);
+            fn neg(self) -> Self::Output {
+                unsafe { simd_neg(self) }
+            }
+        }
+    };
+}
+
+pub(crate) use impl_from;
+pub(crate) use impl_neg;
+pub(crate) use impl_vec_trait;
+pub(crate) use s_t_l;
+pub(crate) use t_b;
+pub(crate) use t_t_l;
+pub(crate) use t_t_s;
+pub(crate) use t_u;
+pub(crate) use test_impl;
--- a/library/stdarch/crates/core_arch/src/powerpc/mod.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/mod.rs
@ -0,0 +1,22 @@
+//! PowerPC intrinsics
+
+pub(crate) mod macros;
+
+mod altivec;
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub use self::altivec::*;
+
+mod vsx;
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub use self::vsx::*;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Generates the trap instruction `TRAP`
+#[cfg_attr(test, assert_instr(trap))]
+#[inline]
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub unsafe fn trap() -> ! {
+    crate::intrinsics::abort()
+}
--- a/library/stdarch/crates/core_arch/src/powerpc/vsx.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/vsx.rs
@ -0,0 +1,240 @@
+//! PowerPC Vector Scalar eXtensions (VSX) intrinsics.
+//!
+//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA
+//! NVlink)] and [POWER ISA v3.0B (for POWER9)].
+//!
+//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u
+//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv
+
+#![allow(non_camel_case_types)]
+
+use crate::core_arch::powerpc::*;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+use crate::mem::transmute;
+
+types! {
+    #![unstable(feature = "stdarch_powerpc", issue = "111145")]
+
+    // pub struct vector_Float16 = f16x8;
+    /// PowerPC-specific 128-bit wide vector of two packed `i64`
+    pub struct vector_signed_long(2 x i64);
+    /// PowerPC-specific 128-bit wide vector of two packed `u64`
+    pub struct vector_unsigned_long(2 x u64);
+    /// PowerPC-specific 128-bit wide vector mask of two `i64`
+    pub struct vector_bool_long(2 x i64);
+    /// PowerPC-specific 128-bit wide vector of two packed `f64`
+    pub struct vector_double(2 x f64);
+    // pub struct vector_signed_long_long = vector_signed_long;
+    // pub struct vector_unsigned_long_long = vector_unsigned_long;
+    // pub struct vector_bool_long_long = vector_bool_long;
+    // pub struct vector_signed___int128 = i128x1;
+    // pub struct vector_unsigned___int128 = i128x1;
+}
+
+#[allow(improper_ctypes)]
+unsafe extern "C" {
+    #[link_name = "llvm.ppc.altivec.vperm"]
+    fn vperm(
+        a: vector_signed_int,
+        b: vector_signed_int,
+        c: vector_unsigned_char,
+    ) -> vector_signed_int;
+}
+
+mod sealed {
+    use super::*;
+    use crate::core_arch::simd::*;
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorPermDI {
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self;
+    }
+
+    // xxpermdi has an big-endian bias and extended mnemonics
+    #[inline]
+    #[target_feature(enable = "vsx")]
+    #[cfg_attr(all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0))]
+    #[cfg_attr(all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0))]
+    unsafe fn xxpermdi(a: vector_signed_long, b: vector_signed_long, dm: u8) -> vector_signed_long {
+        let a: i64x2 = transmute(a);
+        let b: i64x2 = transmute(b);
+        let r: i64x2 = match dm & 0b11 {
+            0 => simd_shuffle!(a, b, [0b00, 0b10]),
+            1 => simd_shuffle!(a, b, [0b01, 0b10]),
+            2 => simd_shuffle!(a, b, [0b00, 0b11]),
+            _ => simd_shuffle!(a, b, [0b01, 0b11]),
+        };
+        transmute(r)
+    }
+
+    macro_rules! vec_xxpermdi {
+        {$impl: ident} => {
+            #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+            impl VectorPermDI for $impl {
+                #[inline]
+                #[target_feature(enable = "vsx")]
+                unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self {
+                    transmute(xxpermdi(transmute(self), transmute(b), dm))
+                }
+            }
+        }
+    }
+
+    vec_xxpermdi! { vector_unsigned_long }
+    vec_xxpermdi! { vector_signed_long }
+    vec_xxpermdi! { vector_bool_long }
+    vec_xxpermdi! { vector_double }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorMergeEo {
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        unsafe fn vec_mergee(self, b: Self) -> Self;
+        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+        unsafe fn vec_mergeo(self, b: Self) -> Self;
+    }
+
+    #[inline]
+    #[target_feature(enable = "altivec")]
+    #[cfg_attr(
+        all(test, target_endian = "little", target_feature = "power8-vector"),
+        assert_instr(vmrgow)
+    )]
+    #[cfg_attr(
+        all(test, target_endian = "big", target_feature = "power8-vector"),
+        assert_instr(vmrgew)
+    )]
+    unsafe fn mergee(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
+        let p = transmute(u8x16::new(
+            0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
+            0x1A, 0x1B,
+        ));
+        vec_perm(a, b, p)
+    }
+
+    #[inline]
+    #[target_feature(enable = "altivec")]
+    #[cfg_attr(
+        all(test, target_endian = "little", target_feature = "power8-vector"),
+        assert_instr(vmrgew)
+    )]
+    #[cfg_attr(
+        all(test, target_endian = "big", target_feature = "power8-vector"),
+        assert_instr(vmrgow)
+    )]
+    unsafe fn mergeo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
+        let p = transmute(u8x16::new(
+            0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D,
+            0x1E, 0x1F,
+        ));
+        vec_perm(a, b, p)
+    }
+
+    macro_rules! vec_mergeeo {
+        { $impl: ident, $even: ident, $odd: ident } => {
+            #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+            impl VectorMergeEo for $impl {
+                #[inline]
+                #[target_feature(enable = "altivec")]
+                unsafe fn vec_mergee(self, b: Self) -> Self {
+                    transmute(mergee(transmute(self), transmute(b)))
+                }
+                #[inline]
+                #[target_feature(enable = "altivec")]
+                unsafe fn vec_mergeo(self, b: Self) -> Self {
+                    transmute(mergeo(transmute(self), transmute(b)))
+                }
+            }
+        }
+    }
+
+    vec_mergeeo! { vector_signed_int, mergee, mergeo }
+    vec_mergeeo! { vector_unsigned_int, mergee, mergeo }
+    vec_mergeeo! { vector_bool_int, mergee, mergeo }
+    vec_mergeeo! { vector_float, mergee, mergeo }
+}
+
+/// Vector permute.
+#[inline]
+#[target_feature(enable = "vsx")]
+//#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub unsafe fn vec_xxpermdi<T, const DM: i32>(a: T, b: T) -> T
+where
+    T: sealed::VectorPermDI,
+{
+    static_assert_uimm_bits!(DM, 2);
+    a.vec_xxpermdi(b, DM as u8)
+}
+
+/// Vector Merge Even
+///
+/// ## Purpose
+/// Merges the even-numbered values from two vectors.
+///
+/// ## Result value
+/// The even-numbered elements of a are stored into the even-numbered elements of r.
+/// The even-numbered elements of b are stored into the odd-numbered elements of r.
+#[inline]
+#[target_feature(enable = "altivec")]
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub unsafe fn vec_mergee<T>(a: T, b: T) -> T
+where
+    T: sealed::VectorMergeEo,
+{
+    a.vec_mergee(b)
+}
+
+/// Vector Merge Odd
+///
+/// ## Purpose
+/// Merges the odd-numbered values from two vectors.
+///
+/// ## Result value
+/// The odd-numbered elements of a are stored into the even-numbered elements of r.
+/// The odd-numbered elements of b are stored into the odd-numbered elements of r.
+#[inline]
+#[target_feature(enable = "altivec")]
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub unsafe fn vec_mergeo<T>(a: T, b: T) -> T
+where
+    T: sealed::VectorMergeEo,
+{
+    a.vec_mergeo(b)
+}
+
+#[cfg(test)]
+mod tests {
+    #[cfg(target_arch = "powerpc")]
+    use crate::core_arch::arch::powerpc::*;
+
+    #[cfg(target_arch = "powerpc64")]
+    use crate::core_arch::arch::powerpc64::*;
+
+    use crate::core_arch::simd::*;
+    use crate::mem::transmute;
+    use stdarch_test::simd_test;
+
+    macro_rules! test_vec_xxpermdi {
+        {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
+            #[simd_test(enable = "vsx")]
+            unsafe fn $name() {
+                let a: $longtype = transmute($shorttype::new($($a),+, $($b),+));
+                let b = transmute($shorttype::new($($c),+, $($d),+));
+
+                assert_eq!($shorttype::new($($a),+, $($c),+), transmute(vec_xxpermdi::<_, 0>(a, b)));
+                assert_eq!($shorttype::new($($b),+, $($c),+), transmute(vec_xxpermdi::<_, 1>(a, b)));
+                assert_eq!($shorttype::new($($a),+, $($d),+), transmute(vec_xxpermdi::<_, 2>(a, b)));
+                assert_eq!($shorttype::new($($b),+, $($d),+), transmute(vec_xxpermdi::<_, 3>(a, b)));
+            }
+        }
+    }
+
+    test_vec_xxpermdi! {test_vec_xxpermdi_u64x2, u64x2, vector_unsigned_long, [0], [1], [2], [3]}
+    test_vec_xxpermdi! {test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]}
+    test_vec_xxpermdi! {test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]}
+    test_vec_xxpermdi! {test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]}
+}
--- a/library/stdarch/crates/core_arch/src/powerpc64/mod.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc64/mod.rs
@ -0,0 +1,14 @@
+//! PowerPC 64
+//!
+//! The reference is the [64-Bit ELF V2 ABI Specification - Power
+//! Architecture].
+//!
+//! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
+
+mod vsx;
+
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub use crate::core_arch::powerpc::*;
+
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub use self::vsx::*;
--- a/library/stdarch/crates/core_arch/src/powerpc64/vsx.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc64/vsx.rs
@ -0,0 +1,156 @@
+//! PowerPC Vector Scalar eXtensions (VSX) intrinsics.
+//!
+//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA
+//! NVlink)] and [POWER ISA v3.0B (for POWER9)].
+//!
+//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u
+//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv
+
+#![allow(non_camel_case_types)]
+
+use crate::core_arch::powerpc::macros::*;
+use crate::core_arch::powerpc::*;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+use crate::mem::transmute;
+
+#[allow(improper_ctypes)]
+unsafe extern "C" {
+    #[link_name = "llvm.ppc.vsx.lxvl"]
+    fn lxvl(a: *const u8, l: usize) -> vector_signed_int;
+
+    #[link_name = "llvm.ppc.vsx.stxvl"]
+    fn stxvl(v: vector_signed_int, a: *mut u8, l: usize);
+}
+
+mod sealed {
+    use super::*;
+
+    #[inline]
+    #[target_feature(enable = "power9-vector")]
+    #[cfg_attr(test, assert_instr(lxvl))]
+    unsafe fn vec_lxvl(p: *const u8, l: usize) -> vector_signed_int {
+        lxvl(p, l << 56)
+    }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorXloads {
+        type Result;
+        unsafe fn vec_xl_len(self, l: usize) -> Self::Result;
+    }
+
+    macro_rules! impl_vsx_loads {
+        ($ty:ident) => {
+            #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+            impl VectorXloads for *const $ty {
+                type Result = t_t_l!($ty);
+                #[inline]
+                #[target_feature(enable = "power9-vector")]
+                unsafe fn vec_xl_len(self, l: usize) -> Self::Result {
+                    transmute(vec_lxvl(self as *const u8, l))
+                }
+            }
+        };
+    }
+
+    impl_vsx_loads! { i8 }
+    impl_vsx_loads! { u8 }
+    impl_vsx_loads! { i16 }
+    impl_vsx_loads! { u16 }
+    impl_vsx_loads! { i32 }
+    impl_vsx_loads! { u32 }
+    impl_vsx_loads! { f32 }
+
+    #[inline]
+    #[target_feature(enable = "power9-vector")]
+    #[cfg_attr(test, assert_instr(stxvl))]
+    unsafe fn vec_stxvl(v: vector_signed_int, a: *mut u8, l: usize) {
+        stxvl(v, a, l << 56);
+    }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorXstores {
+        type Out;
+        unsafe fn vec_xst_len(self, p: Self::Out, l: usize);
+    }
+
+    macro_rules! impl_stores {
+        ($ty:ident) => {
+            #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+            impl VectorXstores for t_t_l!($ty) {
+                type Out = *mut $ty;
+                #[inline]
+                #[target_feature(enable = "power9-vector")]
+                unsafe fn vec_xst_len(self, a: Self::Out, l: usize) {
+                    stxvl(transmute(self), a as *mut u8, l)
+                }
+            }
+        };
+    }
+
+    impl_stores! { i8 }
+    impl_stores! { u8 }
+    impl_stores! { i16 }
+    impl_stores! { u16 }
+    impl_stores! { i32 }
+    impl_stores! { u32 }
+    impl_stores! { f32 }
+}
+
+/// Vector Load with Length
+///
+/// ## Purpose
+/// Loads a vector of a specified byte length.
+///
+/// ## Result value
+/// Loads the number of bytes specified by b from the address specified in a.
+/// Initializes elements in order from the byte stream (as defined by the endianness of the
+/// target). Any bytes of elements that cannot be initialized from the number of loaded bytes have
+/// a zero value.
+///
+/// Between 0 and 16 bytes, inclusive, will be loaded. The length is specified by the
+/// least-significant byte of b, as min (b mod 256, 16). The behavior is undefined if the length
+/// argument is outside of the range 0–255, or if it is not a multiple of the vector element size.
+///
+/// ## Notes
+/// vec_xl_len should not be used to load from cache-inhibited memory.
+#[inline]
+#[target_feature(enable = "power9-vector")]
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub unsafe fn vec_xl_len<T>(p: T, len: usize) -> <T as sealed::VectorXloads>::Result
+where
+    T: sealed::VectorXloads,
+{
+    p.vec_xl_len(len)
+}
+
+/// Vector Store with Length
+///
+/// ## Purpose
+///
+/// Stores a vector of a specified byte length.
+///
+/// ## Operation
+///
+/// Stores the number of bytes specified by c of the vector a to the address specified
+/// in b. The bytes are obtained starting from the lowest-numbered byte of the lowest-numbered
+/// element (as defined by the endianness of the target). All bytes of an element are accessed
+/// before proceeding to the next higher element.
+///
+/// Between 0 and 16 bytes, inclusive, will be stored. The length is specified by the
+/// least-significant byte of c, as min (c mod 256, 16). The behavior is undefined if the length
+/// argument is outside of the range 0–255, or if it is not a multiple of the vector element size.
+///
+/// ## Notes
+/// vec_xst_len should not be used to store to cache-inhibited memory.
+#[inline]
+#[target_feature(enable = "power9-vector")]
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub unsafe fn vec_xst_len<T>(v: T, a: <T as sealed::VectorXstores>::Out, l: usize)
+where
+    T: sealed::VectorXstores,
+{
+    v.vec_xst_len(a, l)
+}
--- a/library/stdarch/crates/core_arch/src/riscv32/mod.rs
+++ b/library/stdarch/crates/core_arch/src/riscv32/mod.rs
@ -0,0 +1,6 @@
+//! RISC-V RV32 specific intrinsics
+
+mod zk;
+
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub use zk::*;
--- a/library/stdarch/crates/core_arch/src/riscv32/zk.rs
+++ b/library/stdarch/crates/core_arch/src/riscv32/zk.rs
@ -0,0 +1,331 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.riscv.aes32esi"]
+    fn _aes32esi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.aes32esmi"]
+    fn _aes32esmi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.aes32dsi"]
+    fn _aes32dsi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.aes32dsmi"]
+    fn _aes32dsmi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.zip.i32"]
+    fn _zip(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.unzip.i32"]
+    fn _unzip(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig0h"]
+    fn _sha512sig0h(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig0l"]
+    fn _sha512sig0l(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig1h"]
+    fn _sha512sig1h(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig1l"]
+    fn _sha512sig1l(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sum0r"]
+    fn _sha512sum0r(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sum1r"]
+    fn _sha512sum1r(rs1: i32, rs2: i32) -> i32;
+}
+
+/// AES final round encryption instruction for RV32.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// forward AES SBox operation, before XOR’ing the result with rs1. This instruction must
+/// always be implemented such that its execution latency does not depend on the data being
+/// operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.3
+///
+/// # Note
+///
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+#[target_feature(enable = "zkne")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32esi, BS = 0))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes32esi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    unsafe { _aes32esi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
+}
+
+/// AES middle round encryption instruction for RV32 with.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// forward AES SBox operation, and a partial forward MixColumn, before XOR’ing the result with
+/// rs1. This instruction must always be implemented such that its execution latency does not
+/// depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.4
+///
+/// # Note
+///
+/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+#[target_feature(enable = "zkne")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32esmi, BS = 0))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes32esmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    unsafe { _aes32esmi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
+}
+
+/// AES final round decryption instruction for RV32.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// inverse AES SBox operation, and XOR’s the result with rs1. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.1
+///
+/// # Note
+///
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+#[target_feature(enable = "zknd")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32dsi, BS = 0))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes32dsi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    unsafe { _aes32dsi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
+}
+
+/// AES middle round decryption instruction for RV32.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// inverse AES SBox operation, and a partial inverse MixColumn, before XOR’ing the result with
+/// rs1. This instruction must always be implemented such that its execution latency does not
+/// depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.2
+///
+/// # Note
+///
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+#[target_feature(enable = "zknd")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32dsmi, BS = 0))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    unsafe { _aes32dsmi(rs1 as i32, rs2 as i32, BS as i32) as u32 }
+}
+
+/// Place upper/lower halves of the source register into odd/even bits of the destination
+/// respectivley.
+///
+/// This instruction places bits in the low half of the source register into the even bit
+/// positions of the destination, and bits in the high half of the source register into the odd
+/// bit positions of the destination. It is the inverse of the unzip instruction. This
+/// instruction is available only on RV32.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.49
+#[target_feature(enable = "zbkb")]
+// See #1464
+// #[cfg_attr(test, assert_instr(zip))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn zip(rs: u32) -> u32 {
+    unsafe { _zip(rs as i32) as u32 }
+}
+
+/// Place odd and even bits of the source word into upper/lower halves of the destination.
+///
+/// This instruction places the even bits of the source register into the low half of the
+/// destination, and the odd bits of the source into the high bits of the destination. It is
+/// the inverse of the zip instruction. This instruction is available only on RV32.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.45
+#[target_feature(enable = "zbkb")]
+#[cfg_attr(test, assert_instr(unzip))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn unzip(rs: u32) -> u32 {
+    unsafe { _unzip(rs as i32) as u32 }
+}
+
+/// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash
+/// function \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig0l instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.31
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig0h))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
+    unsafe { _sha512sig0h(rs1 as i32, rs2 as i32) as u32 }
+}
+
+/// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function
+/// \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig0h instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.32
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig0l))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
+    unsafe { _sha512sig0l(rs1 as i32, rs2 as i32) as u32 }
+}
+
+/// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash
+/// function \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig1l instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.33
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig1h))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
+    unsafe { _sha512sig1h(rs1 as i32, rs2 as i32) as u32 }
+}
+
+/// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function
+/// \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig1h instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.34
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig1l))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
+    unsafe { _sha512sig1l(rs1 as i32, rs2 as i32) as u32 }
+}
+
+/// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section
+/// 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sum0 transform of the
+/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
+/// output is represented by two 32-bit registers. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.35
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum0r))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
+    unsafe { _sha512sum0r(rs1 as i32, rs2 as i32) as u32 }
+}
+
+/// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section
+/// 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sum1 transform of the
+/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
+/// output is represented by two 32-bit registers. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.36
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum1r))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sum1r(rs1: u32, rs2: u32) -> u32 {
+    unsafe { _sha512sum1r(rs1 as i32, rs2 as i32) as u32 }
+}
--- a/library/stdarch/crates/core_arch/src/riscv64/mod.rs
+++ b/library/stdarch/crates/core_arch/src/riscv64/mod.rs
@ -0,0 +1,57 @@
+//! RISC-V RV64 specific intrinsics
+use crate::arch::asm;
+
+mod zk;
+
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub use zk::*;
+
+/// Loads virtual machine memory by unsigned word integer
+///
+/// This instruction performs an explicit memory access as though `V=1`;
+/// i.e., with the address translation and protection, and the endianness, that apply to memory
+/// accesses in either VS-mode or VU-mode.
+///
+/// This operation is not available under RV32 base instruction set.
+///
+/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.WU`
+/// instruction which is effectively a dereference to any memory address.
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub unsafe fn hlv_wu(src: *const u32) -> u32 {
+    let value: u32;
+    asm!(".insn i 0x73, 0x4, {}, {}, 0x681", out(reg) value, in(reg) src, options(readonly, nostack));
+    value
+}
+
+/// Loads virtual machine memory by double integer
+///
+/// This instruction performs an explicit memory access as though `V=1`;
+/// i.e., with the address translation and protection, and the endianness, that apply to memory
+/// accesses in either VS-mode or VU-mode.
+///
+/// This operation is not available under RV32 base instruction set.
+///
+/// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.D`
+/// instruction which is effectively a dereference to any memory address.
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub unsafe fn hlv_d(src: *const i64) -> i64 {
+    let value: i64;
+    asm!(".insn i 0x73, 0x4, {}, {}, 0x6C0", out(reg) value, in(reg) src, options(readonly, nostack));
+    value
+}
+
+/// Stores virtual machine memory by double integer
+///
+/// This instruction performs an explicit memory access as though `V=1`;
+/// i.e., with the address translation and protection, and the endianness, that apply to memory
+/// accesses in either VS-mode or VU-mode.
+///
+/// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.D`
+/// instruction which is effectively a dereference to any memory address.
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub unsafe fn hsv_d(dst: *mut i64, src: i64) {
+    asm!(".insn r 0x73, 0x4, 0x37, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
+}
--- a/library/stdarch/crates/core_arch/src/riscv64/zk.rs
+++ b/library/stdarch/crates/core_arch/src/riscv64/zk.rs
@ -0,0 +1,265 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.riscv.aes64es"]
+    fn _aes64es(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64esm"]
+    fn _aes64esm(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64ds"]
+    fn _aes64ds(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64dsm"]
+    fn _aes64dsm(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64ks1i"]
+    fn _aes64ks1i(rs1: i64, rnum: i32) -> i64;
+
+    #[link_name = "llvm.riscv.aes64ks2"]
+    fn _aes64ks2(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64im"]
+    fn _aes64im(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sig0"]
+    fn _sha512sig0(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sig1"]
+    fn _sha512sig1(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sum0"]
+    fn _sha512sum0(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sum1"]
+    fn _sha512sum1(rs1: i64) -> i64;
+}
+
+/// AES final round encryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the ShiftRows and SubBytes steps. This instruction must
+/// always be implemented such that its execution latency does not depend on the data being
+/// operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.7
+#[target_feature(enable = "zkne")]
+#[cfg_attr(test, assert_instr(aes64es))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes64es(rs1: u64, rs2: u64) -> u64 {
+    unsafe { _aes64es(rs1 as i64, rs2 as i64) as u64 }
+}
+
+/// AES middle round encryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the ShiftRows, SubBytes and MixColumns steps. This
+/// instruction must always be implemented such that its execution latency does not depend on
+/// the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.8
+#[target_feature(enable = "zkne")]
+#[cfg_attr(test, assert_instr(aes64esm))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes64esm(rs1: u64, rs2: u64) -> u64 {
+    unsafe { _aes64esm(rs1 as i64, rs2 as i64) as u64 }
+}
+
+/// AES final round decryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the Inverse ShiftRows and SubBytes steps. This
+/// instruction must always be implemented such that its execution latency does not depend on
+/// the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.5
+#[target_feature(enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes64ds))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes64ds(rs1: u64, rs2: u64) -> u64 {
+    unsafe { _aes64ds(rs1 as i64, rs2 as i64) as u64 }
+}
+
+/// AES middle round decryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the Inverse ShiftRows, SubBytes and MixColumns steps.
+/// This instruction must always be implemented such that its execution latency does not depend
+/// on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.6
+#[target_feature(enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes64dsm))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
+    unsafe { _aes64dsm(rs1 as i64, rs2 as i64) as u64 }
+}
+
+/// This instruction implements part of the KeySchedule operation for the AES Block cipher
+/// involving the SBox operation.
+///
+/// This instruction implements the rotation, SubBytes and Round Constant addition steps of the
+/// AES block cipher Key Schedule. This instruction must always be implemented such that its
+/// execution latency does not depend on the data being operated on. Note that rnum must be in
+/// the range 0x0..0xA. The values 0xB..0xF are reserved.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.10
+///
+/// # Note
+///
+/// The `RNUM` parameter is expected to be a constant value inside the range of `0..=10`.
+#[target_feature(enable = "zkne", enable = "zknd")]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
+    static_assert!(RNUM <= 10);
+
+    unsafe { _aes64ks1i(rs1 as i64, RNUM as i32) as u64 }
+}
+
+/// This instruction implements part of the KeySchedule operation for the AES Block cipher.
+///
+/// This instruction implements the additional XOR’ing of key words as part of the AES block
+/// cipher Key Schedule. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.11
+#[target_feature(enable = "zkne", enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes64ks2))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
+    unsafe { _aes64ks2(rs1 as i64, rs2 as i64) as u64 }
+}
+
+/// This instruction accelerates the inverse MixColumns step of the AES Block Cipher, and is used to aid creation of
+/// the decryption KeySchedule.
+///
+/// The instruction applies the inverse MixColumns transformation to two columns of the state array, packed
+/// into a single 64-bit register. It is used to create the inverse cipher KeySchedule, according to the equivalent
+/// inverse cipher construction in (Page 23, Section 5.3.5). This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.9
+#[target_feature(enable = "zkne", enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes64im))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn aes64im(rs1: u64) -> u64 {
+    unsafe { _aes64im(rs1 as i64) as u64 }
+}
+
+/// Implements the Sigma0 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sigma0
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.37
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig0))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sig0(rs1: u64) -> u64 {
+    unsafe { _sha512sig0(rs1 as i64) as u64 }
+}
+
+/// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sigma1
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.38
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig1))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sig1(rs1: u64) -> u64 {
+    unsafe { _sha512sig1(rs1 as i64) as u64 }
+}
+
+/// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sum0
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.39
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sum0))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sum0(rs1: u64) -> u64 {
+    unsafe { _sha512sum0(rs1 as i64) as u64 }
+}
+
+/// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sum1
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.40
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sum1))]
+#[inline]
+#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
+pub fn sha512sum1(rs1: u64) -> u64 {
+    unsafe { _sha512sum1(rs1 as i64) as u64 }
+}
--- a/Show more
+++ b/Show more