Auto merge of #144222 - Kobzol:stdarch-push, r=folkertdev

stdarch subtree update Subtree update of `stdarch` to 5531955678. Created using https://github.com/rust-lang/josh-sync. I saw that there were non-trivial changes made to `std_detect` in `stdarch` recently. So I want to get them merged here before we move forward with https://github.com/rust-lang/rust/pull/143412. r? `@folkertdev`
2025-07-22 15:25:31 +00:00 · 2025-07-22 15:25:31 +00:00 · 2e53675668
commit 2e53675668
parent 35487a2e7c 9b7d31c851
72 changed files with 4644 additions and 5044 deletions
--- a/library/stdarch/.github/workflows/main.yml
+++ b/library/stdarch/.github/workflows/main.yml
@ -255,6 +255,28 @@ jobs:
      env:
        TARGET: ${{ matrix.target.tuple }}

+  # Check that the generated files agree with the checked-in versions.
+  check-stdarch-gen:
+    needs: [style]
+    name: Check stdarch-gen-{arm, loongarch} output 
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: rustup update nightly && rustup default nightly && rustup component add rustfmt
+    - name: Check arm spec
+      run: |
+        cargo run --bin=stdarch-gen-arm --release -- crates/stdarch-gen-arm/spec
+        git diff --exit-code
+    - name: Check lsx.spec
+      run: |
+        cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lsx.spec
+        git diff --exit-code
+    - name: Check lasx.spec
+      run: |
+        cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lasx.spec
+        git diff --exit-code
+
  build-std-detect:
    needs: [style]
    name: Build std_detect
@ -271,6 +293,7 @@ jobs:
      - verify
      - test
      - build-std-detect
+      - check-stdarch-gen
    runs-on: ubuntu-latest
    # We need to ensure this job does *not* get skipped if its dependencies fail,
    # because a skipped job is considered a success by GitHub. So we have to
--- a/library/stdarch/.github/workflows/rustc-pull.yml
+++ b/library/stdarch/.github/workflows/rustc-pull.yml
@ -0,0 +1,22 @@
+# Perform a subtree sync (pull) using the josh-sync tool once every few days (or on demand).
+name: rustc-pull
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Run at 04:00 UTC every Monday and Thursday
+    - cron: '0 4 * * 1,4'
+
+jobs:
+  pull:
+    if: github.repository == 'rust-lang/stdarch'
+    uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main
+    with:
+      # https://rust-lang.zulipchat.com/#narrow/channel/208962-t-libs.2Fstdarch/topic/Subtree.20sync.20automation/with/528461782
+      zulip-stream-id: 208962
+      zulip-bot-email:  "stdarch-ci-bot@rust-lang.zulipchat.com"
+      pr-base-branch: master
+      branch-name: rustc-pull
+    secrets:
+      zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}
+      token: ${{ secrets.GITHUB_TOKEN }}
--- a/library/stdarch/Cargo.lock
+++ b/library/stdarch/Cargo.lock
@ -73,20 +73,26 @@ version = "0.1.0"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.102",
+ "syn 2.0.104",
 ]

 [[package]]
 name = "autocfg"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "bitflags"
+version = "2.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"

 [[package]]
 name = "cc"
-version = "1.2.26"
+version = "1.2.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "956a5e21988b87f372569b66183b78babf23ebc2e744b733e4350a752c4dafac"
+checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
 dependencies = [
 "shlex",
 ]
@ -99,9 +105,9 @@ checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"

 [[package]]
 name = "clap"
-version = "4.5.40"
+version = "4.5.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
+checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
 dependencies = [
 "clap_builder",
 "clap_derive",
@ -109,9 +115,9 @@ dependencies = [

 [[package]]
 name = "clap_builder"
-version = "4.5.40"
+version = "4.5.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
+checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
 dependencies = [
 "anstream",
 "anstyle",
@ -121,14 +127,14 @@ dependencies = [

 [[package]]
 name = "clap_derive"
-version = "4.5.40"
+version = "4.5.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce"
+checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
 dependencies = [
 "heck",
 "proc-macro2",
 "quote",
- "syn 2.0.102",
+ "syn 2.0.104",
 ]

 [[package]]
@ -338,9 +344,9 @@ dependencies = [

 [[package]]
 name = "indexmap"
-version = "2.9.0"
+version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
+checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
 dependencies = [
 "equivalent",
 "hashbrown 0.15.4",
@ -403,9 +409,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"

 [[package]]
 name = "libc"
-version = "0.2.172"
+version = "0.2.174"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
+checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"

 [[package]]
 name = "linked-hash-map"
@ -624,7 +630,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.102",
+ "syn 2.0.104",
 ]

 [[package]]
@ -685,7 +691,7 @@ version = "0.1.0"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.102",
+ "syn 2.0.104",
 ]

 [[package]]
@ -703,7 +709,6 @@ name = "stdarch-gen-arm"
 version = "0.1.0"
 dependencies = [
 "itertools",
- "lazy_static",
 "proc-macro2",
 "quote",
 "regex",
@ -727,7 +732,6 @@ dependencies = [
 "assert-instr-macro",
 "cc",
 "cfg-if",
- "lazy_static",
 "rustc-demangle",
 "simd-test-macro",
 "wasmprinter",
@ -742,7 +746,7 @@ dependencies = [
 "quote",
 "serde",
 "serde_json",
- "syn 2.0.102",
+ "syn 2.0.104",
 ]

 [[package]]
@ -780,9 +784,9 @@ dependencies = [

 [[package]]
 name = "syn"
-version = "2.0.102"
+version = "2.0.104"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462"
+checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
 dependencies = [
 "proc-macro2",
 "quote",
@ -834,21 +838,23 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"

 [[package]]
 name = "wasmparser"
-version = "0.113.3"
+version = "0.235.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "286049849b5a5bd09a8773171be96824afabffc7cc3df6caaf33a38db6cd07ae"
+checksum = "161296c618fa2d63f6ed5fffd1112937e803cb9ec71b32b01a76321555660917"
 dependencies = [
- "indexmap 2.9.0",
+ "bitflags",
+ "indexmap 2.10.0",
 "semver",
 ]

 [[package]]
 name = "wasmprinter"
-version = "0.2.67"
+version = "0.235.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6615a5587149e753bf4b93f90fa3c3f41c88597a7a2da72879afcabeda9648f"
+checksum = "75aa8e9076de6b9544e6dab4badada518cca0bf4966d35b131bbd057aed8fa0a"
 dependencies = [
 "anyhow",
+ "termcolor",
 "wasmparser",
 ]

@ -945,20 +951,20 @@ dependencies = [

 [[package]]
 name = "zerocopy"
-version = "0.8.25"
+version = "0.8.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
+checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
 dependencies = [
 "zerocopy-derive",
 ]

 [[package]]
 name = "zerocopy-derive"
-version = "0.8.25"
+version = "0.8.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
+checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.102",
+ "syn 2.0.104",
 ]
--- a/library/stdarch/Cargo.toml
+++ b/library/stdarch/Cargo.toml
@ -5,7 +5,8 @@ members = [
  "examples",
 ]
 exclude = [
-  "crates/wasm-assert-instr-tests"
+  "crates/wasm-assert-instr-tests",
+  "rust_programs",
 ]

 [profile.release]
--- a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  g++ \
@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  qemu-user \
  make \
  file \
-  clang-19 \
+  clang \
  lld

 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
--- a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
@ -9,15 +9,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  qemu-user \
  make \
  file \
-  clang-19 \
+  clang \
  curl \
  xz-utils \
  lld

-ENV TOOLCHAIN="arm-gnu-toolchain-14.2.rel1-x86_64-aarch64_be-none-linux-gnu"
+ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu"

 # Download the aarch64_be gcc toolchain
-RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.2.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
+RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
 RUN tar -xvf "${TOOLCHAIN}.tar.xz"
 RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains

--- a/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
+++ b/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
--- a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  qemu-user \
  make \
  file \
-  clang-19 \
+  clang \
  lld
 ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
    CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
--- a/library/stdarch/ci/docker/i586-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/i586-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc-multilib \
  libc6-dev \
--- a/library/stdarch/ci/docker/i686-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/i686-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc-multilib \
  libc6-dev \
--- a/library/stdarch/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
@ -1,9 +1,9 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    gcc libc6-dev qemu-user-static ca-certificates \
-    gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
+    gcc-loongarch64-linux-gnu libc6-dev-loong64-cross


 ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \
--- a/library/stdarch/ci/docker/mips-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/mips-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/library/stdarch/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
+++ b/library/stdarch/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/library/stdarch/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
+++ b/library/stdarch/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/library/stdarch/ci/docker/mipsel-unknown-linux-musl/Dockerfile
+++ b/library/stdarch/ci/docker/mipsel-unknown-linux-musl/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
--- a/library/stdarch/ci/docker/nvptx64-nvidia-cuda/Dockerfile
+++ b/library/stdarch/ci/docker/nvptx64-nvidia-cuda/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  libc6-dev \
--- a/library/stdarch/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/library/stdarch/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/library/stdarch/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/library/stdarch/ci/docker/riscv32gc-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/riscv32gc-unknown-linux-gnu/Dockerfile
@ -1,10 +1,10 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
        wget xz-utils make file llvm

-ENV VERSION=2025.01.20
+ENV VERSION=2025.07.03

 RUN wget "https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${VERSION}/riscv32-glibc-ubuntu-24.04-gcc-nightly-${VERSION}-nightly.tar.xz" \
    -O riscv-toolchain.tar.xz
--- a/library/stdarch/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
--- a/library/stdarch/ci/docker/s390x-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/s390x-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 RUN apt-get update && apt-get install -y --no-install-recommends \
        curl ca-certificates \
--- a/library/stdarch/ci/docker/wasm32-wasip1/Dockerfile
+++ b/library/stdarch/ci/docker/wasm32-wasip1/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10

 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update -y && apt-get install -y --no-install-recommends \
@ -7,7 +7,9 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends \
  xz-utils \
  clang

-RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasmtime-v18.0.2-x86_64-linux.tar.xz | tar xJf -
-ENV PATH=$PATH:/wasmtime-v18.0.2-x86_64-linux
+ENV VERSION=v34.0.1
+
+RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/${VERSION}/wasmtime-${VERSION}-x86_64-linux.tar.xz | tar xJf -
+ENV PATH=$PATH:/wasmtime-${VERSION}-x86_64-linux

 ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime --dir /checkout/target/wasm32-wasip1/release/deps::."
--- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@ -1,4 +1,4 @@
-FROM ubuntu:25.04
+FROM ubuntu:25.10
 RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  libc6-dev \
@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  wget \
  xz-utils

-RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.53.0-2025-03-16-lin.tar.xz -O sde.tar.xz
+RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.58.0-2025-06-16-lin.tar.xz -O sde.tar.xz
 RUN mkdir intel-sde
 RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
 ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
--- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
+++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
@ -1,41 +1,35 @@
-# Copyright (C) 2024-2024 Intel Corporation.
-#
+# Copyright (C) 2017-2025 Intel Corporation.
+# 
 # This software and the related documents are Intel copyrighted materials, and your
 # use of them is governed by the express license under which they were provided to
 # you ("License"). Unless the License provides otherwise, you may not use, modify,
 # copy, publish, distribute, disclose or transmit this software or the related
 # documents without Intel's prior written permission.
-#
+# 
 # This software and the related documents are provided as is, with no express or
 # implied warranties, other than those that are expressly stated in the License.
 #
-# The CPUID information in this file is for software enabling purposes only and
-# it is not a full and accurate representation of the CPU under development which
-# it represents.
-# The CPUID information in this file is not a guarantee of the availability of
-# features or characteristics in the final released CPU.
-#
 # CPUID_VERSION = 1.0
 #      Input      =>               Output
 # EAX      ECX    =>   EAX      EBX      ECX      EDX
 00000000 ******** => 00000024 68747541 444d4163 69746e65
-00000001 ******** => 000d06f0 00100800 7ffaf3ff bfebfbff
+00000001 ******** => 00400f10 00100800 7ffaf3ff bfebfbff
 00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000
 00000003 ******** => 00000000 00000000 00000000 00000000
-00000004 00000000 => 7c004121 02c0003f 0000003f 00000000 #Deterministic Cache
+00000004 00000000 => 7c004121 01c0003f 0000003f 00000000 #Deterministic Cache
 00000004 00000001 => 7c004122 01c0003f 0000003f 00000000
-00000004 00000002 => 7c004143 03c0003f 000007ff 00000000
-00000004 00000003 => 7c0fc163 04c0003f 0005ffff 00000004
+00000004 00000002 => 7c004143 03c0003f 000003ff 00000000
+00000004 00000003 => 7c0fc163 0280003f 0000dfff 00000004
 00000004 00000004 => 00000000 00000000 00000000 00000000
 00000005 ******** => 00000040 00000040 00000003 00042120 #MONITOR/MWAIT
 00000006 ******** => 00000077 00000002 00000001 00000000 #Thermal and Power
-00000007 00000000 => 00000001 f3bfbfbf bbc05ffe 03d55130 #Extended Features
-00000007 00000001 => 88ee00bf 00000002 00000000 1d29cd3e
+00000007 00000000 => 00000001 f3bfbfbf bac05ffe 03d54130 #Extended Features
+00000007 00000001 => 98ee00bf 00000002 00000020 1d29cd3e
 00000008 ******** => 00000000 00000000 00000000 00000000
 00000009 ******** => 00000000 00000000 00000000 00000000 #Direct Cache
 0000000a ******** => 07300403 00000000 00000000 00000603
-0000000b 00000000 => 00000001 00000002 00000100 0000001e #Extended Topology
-0000000b 00000001 => 00000004 00000002 00000201 0000001e
+0000000b 00000000 => 00000001 00000002 00000100 00000000 #Extended Topology
+0000000b 00000001 => 00000004 00000002 00000201 00000000
 0000000c ******** => 00000000 00000000 00000000 00000000
 0000000d 00000000 => 000e02e7 00002b00 00002b00 00000000 #xcr0
 0000000d 00000001 => 0000001f 00000240 00000100 00000000
@ -52,10 +46,8 @@
 0000001d 00000001 => 04002000 00080040 00000010 00000000 #AMX Palette1
 0000001e 00000000 => 00000001 00004010 00000000 00000000 #AMX Tmul
 0000001e 00000001 => 000001ff 00000000 00000000 00000000
-0000001f 00000000 => 00000001 00000002 00000100 0000001e
-0000001f 00000001 => 00000007 00000070 00000201 0000001e
-0000001f 00000002 => 00000000 00000000 00000002 0000001e
-00000024 00000000 => 00000000 00070002 00000000 00000000 #AVX10
+00000024 00000000 => 00000001 00070002 00000000 00000000 #AVX10
+00000024 00000001 => 00000000 00000000 00000004 00000000
 80000000 ******** => 80000008 00000000 00000000 00000000
 80000001 ******** => 00000000 00000000 00200961 2c100000
 80000002 ******** => 00000000 00000000 00000000 00000000
@ -66,6 +58,6 @@
 80000007 ******** => 00000000 00000000 00000000 00000100
 80000008 ******** => 00003028 00000200 00000200 00000000

-# This file was copied from intel-sde/misc/cpuid/dmr/cpuid.def, and modified to
+# This file was copied from intel-sde/misc/cpuid/future/cpuid.def, and modified to
 # use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM`,
 # `AVX512_VP2INTERSECT` and the VEX variants of AVX512 was added in the CPUID.
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@ -144,21 +144,21 @@ case ${TARGET} in
    aarch64-unknown-linux-gnu*)
        TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
-        TEST_CXX_COMPILER="clang++-19"
+        TEST_CXX_COMPILER="clang++"
        TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
        ;;

    aarch64_be-unknown-linux-gnu*)
        TEST_CPPFLAGS="-fuse-ld=lld"
        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
-        TEST_CXX_COMPILER="clang++-19"
+        TEST_CXX_COMPILER="clang++"
        TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
        ;;

    armv7-unknown-linux-gnueabihf*)
        TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
-        TEST_CXX_COMPILER="clang++-19"
+        TEST_CXX_COMPILER="clang++"
        TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
        ;;
    *)
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
--- a/library/stdarch/crates/core_arch/src/core_arch_docs.md
+++ b/library/stdarch/crates/core_arch/src/core_arch_docs.md
@ -193,6 +193,7 @@ others at:
 * [`powerpc64`]
 * [`nvptx`]
 * [`wasm32`]
+* [`loongarch32`]
 * [`loongarch64`]
 * [`s390x`]

@ -208,6 +209,7 @@ others at:
 [`powerpc64`]: ../../core/arch/powerpc64/index.html
 [`nvptx`]: ../../core/arch/nvptx/index.html
 [`wasm32`]: ../../core/arch/wasm32/index.html
+[`loongarch32`]: ../../core/arch/loongarch32/index.html
 [`loongarch64`]: ../../core/arch/loongarch64/index.html
 [`s390x`]: ../../core/arch/s390x/index.html

--- a/library/stdarch/crates/core_arch/src/loongarch32/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch32/mod.rs
@ -0,0 +1,47 @@
+//! `LoongArch32` intrinsics
+
+use crate::arch::asm;
+
+#[allow(improper_ctypes)]
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.loongarch.cacop.w"]
+    fn __cacop(a: i32, b: i32, c: i32);
+    #[link_name = "llvm.loongarch.csrrd.w"]
+    fn __csrrd(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.csrwr.w"]
+    fn __csrwr(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.csrxchg.w"]
+    fn __csrxchg(a: i32, b: i32, c: i32) -> i32;
+}
+
+/// Generates the cache operation instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn cacop<const IMM12: i32>(a: i32, b: i32) {
+    static_assert_simm_bits!(IMM12, 12);
+    __cacop(a, b, IMM12);
+}
+
+/// Reads the CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn csrrd<const IMM14: i32>() -> i32 {
+    static_assert_uimm_bits!(IMM14, 14);
+    __csrrd(IMM14)
+}
+
+/// Writes the CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn csrwr<const IMM14: i32>(a: i32) -> i32 {
+    static_assert_uimm_bits!(IMM14, 14);
+    __csrwr(a, IMM14)
+}
+
+/// Exchanges the CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn csrxchg<const IMM14: i32>(a: i32, b: i32) -> i32 {
+    static_assert_uimm_bits!(IMM14, 14);
+    __csrxchg(a, b, IMM14)
+}
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
--- a/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs
@ -1,4 +1,4 @@
-//! `LoongArch` intrinsics
+//! `LoongArch64` intrinsics

 mod lasx;
 mod lsx;
@ -13,89 +13,30 @@ use crate::arch::asm;
 /// Reads the 64-bit stable counter value and the counter ID
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn rdtime_d() -> (i64, isize) {
-    let val: i64;
-    let tid: isize;
-    asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
-    (val, tid)
-}
-
-/// Reads the lower 32-bit stable counter value and the counter ID
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn rdtimel_w() -> (i32, isize) {
-    let val: i32;
-    let tid: isize;
-    asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
-    (val, tid)
-}
-
-/// Reads the upper 32-bit stable counter value and the counter ID
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn rdtimeh_w() -> (i32, isize) {
-    let val: i32;
-    let tid: isize;
-    asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
+pub fn rdtime_d() -> (i64, isize) {
+    let (val, tid): (i64, isize);
+    unsafe { asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)) };
    (val, tid)
 }

 #[allow(improper_ctypes)]
 unsafe extern "unadjusted" {
-    #[link_name = "llvm.loongarch.crc.w.b.w"]
-    fn __crc_w_b_w(a: i32, b: i32) -> i32;
-    #[link_name = "llvm.loongarch.crc.w.h.w"]
-    fn __crc_w_h_w(a: i32, b: i32) -> i32;
-    #[link_name = "llvm.loongarch.crc.w.w.w"]
-    fn __crc_w_w_w(a: i32, b: i32) -> i32;
    #[link_name = "llvm.loongarch.crc.w.d.w"]
    fn __crc_w_d_w(a: i64, b: i32) -> i32;
-    #[link_name = "llvm.loongarch.crcc.w.b.w"]
-    fn __crcc_w_b_w(a: i32, b: i32) -> i32;
-    #[link_name = "llvm.loongarch.crcc.w.h.w"]
-    fn __crcc_w_h_w(a: i32, b: i32) -> i32;
-    #[link_name = "llvm.loongarch.crcc.w.w.w"]
-    fn __crcc_w_w_w(a: i32, b: i32) -> i32;
    #[link_name = "llvm.loongarch.crcc.w.d.w"]
    fn __crcc_w_d_w(a: i64, b: i32) -> i32;
    #[link_name = "llvm.loongarch.cacop.d"]
    fn __cacop(a: i64, b: i64, c: i64);
-    #[link_name = "llvm.loongarch.dbar"]
-    fn __dbar(a: i32);
-    #[link_name = "llvm.loongarch.ibar"]
-    fn __ibar(a: i32);
-    #[link_name = "llvm.loongarch.movgr2fcsr"]
-    fn __movgr2fcsr(a: i32, b: i32);
-    #[link_name = "llvm.loongarch.movfcsr2gr"]
-    fn __movfcsr2gr(a: i32) -> i32;
    #[link_name = "llvm.loongarch.csrrd.d"]
    fn __csrrd(a: i32) -> i64;
    #[link_name = "llvm.loongarch.csrwr.d"]
    fn __csrwr(a: i64, b: i32) -> i64;
    #[link_name = "llvm.loongarch.csrxchg.d"]
    fn __csrxchg(a: i64, b: i64, c: i32) -> i64;
-    #[link_name = "llvm.loongarch.iocsrrd.b"]
-    fn __iocsrrd_b(a: i32) -> i32;
-    #[link_name = "llvm.loongarch.iocsrrd.h"]
-    fn __iocsrrd_h(a: i32) -> i32;
-    #[link_name = "llvm.loongarch.iocsrrd.w"]
-    fn __iocsrrd_w(a: i32) -> i32;
    #[link_name = "llvm.loongarch.iocsrrd.d"]
    fn __iocsrrd_d(a: i32) -> i64;
-    #[link_name = "llvm.loongarch.iocsrwr.b"]
-    fn __iocsrwr_b(a: i32, b: i32);
-    #[link_name = "llvm.loongarch.iocsrwr.h"]
-    fn __iocsrwr_h(a: i32, b: i32);
-    #[link_name = "llvm.loongarch.iocsrwr.w"]
-    fn __iocsrwr_w(a: i32, b: i32);
    #[link_name = "llvm.loongarch.iocsrwr.d"]
    fn __iocsrwr_d(a: i64, b: i32);
-    #[link_name = "llvm.loongarch.break"]
-    fn __break(a: i32);
-    #[link_name = "llvm.loongarch.cpucfg"]
-    fn __cpucfg(a: i32) -> i32;
-    #[link_name = "llvm.loongarch.syscall"]
-    fn __syscall(a: i32);
    #[link_name = "llvm.loongarch.asrtle.d"]
    fn __asrtle(a: i64, b: i64);
    #[link_name = "llvm.loongarch.asrtgt.d"]
@ -104,70 +45,20 @@ unsafe extern "unadjusted" {
    fn __lddir(a: i64, b: i64) -> i64;
    #[link_name = "llvm.loongarch.ldpte.d"]
    fn __ldpte(a: i64, b: i64);
-    #[link_name = "llvm.loongarch.frecipe.s"]
-    fn __frecipe_s(a: f32) -> f32;
-    #[link_name = "llvm.loongarch.frecipe.d"]
-    fn __frecipe_d(a: f64) -> f64;
-    #[link_name = "llvm.loongarch.frsqrte.s"]
-    fn __frsqrte_s(a: f32) -> f32;
-    #[link_name = "llvm.loongarch.frsqrte.d"]
-    fn __frsqrte_d(a: f64) -> f64;
 }

 /// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn crc_w_b_w(a: i32, b: i32) -> i32 {
-    __crc_w_b_w(a, b)
-}
-
-/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn crc_w_h_w(a: i32, b: i32) -> i32 {
-    __crc_w_h_w(a, b)
-}
-
-/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn crc_w_w_w(a: i32, b: i32) -> i32 {
-    __crc_w_w_w(a, b)
-}
-
-/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn crc_w_d_w(a: i64, b: i32) -> i32 {
-    __crc_w_d_w(a, b)
+pub fn crc_w_d_w(a: i64, b: i32) -> i32 {
+    unsafe { __crc_w_d_w(a, b) }
 }

 /// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn crcc_w_b_w(a: i32, b: i32) -> i32 {
-    __crcc_w_b_w(a, b)
-}
-
-/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn crcc_w_h_w(a: i32, b: i32) -> i32 {
-    __crcc_w_h_w(a, b)
-}
-
-/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn crcc_w_w_w(a: i32, b: i32) -> i32 {
-    __crcc_w_w_w(a, b)
-}
-
-/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn crcc_w_d_w(a: i64, b: i32) -> i32 {
-    __crcc_w_d_w(a, b)
+pub fn crcc_w_d_w(a: i64, b: i32) -> i32 {
+    unsafe { __crcc_w_d_w(a, b) }
 }

 /// Generates the cache operation instruction
@ -178,38 +69,6 @@ pub unsafe fn cacop<const IMM12: i64>(a: i64, b: i64) {
    __cacop(a, b, IMM12);
 }

-/// Generates the memory barrier instruction
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn dbar<const IMM15: i32>() {
-    static_assert_uimm_bits!(IMM15, 15);
-    __dbar(IMM15);
-}
-
-/// Generates the instruction-fetch barrier instruction
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn ibar<const IMM15: i32>() {
-    static_assert_uimm_bits!(IMM15, 15);
-    __ibar(IMM15);
-}
-
-/// Moves data from a GPR to the FCSR
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
-    static_assert_uimm_bits!(IMM5, 5);
-    __movgr2fcsr(IMM5, a);
-}
-
-/// Moves data from a FCSR to the GPR
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn movfcsr2gr<const IMM5: i32>() -> i32 {
-    static_assert_uimm_bits!(IMM5, 5);
-    __movfcsr2gr(IMM5)
-}
-
 /// Reads the CSR
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@ -234,27 +93,6 @@ pub unsafe fn csrxchg<const IMM14: i32>(a: i64, b: i64) -> i64 {
    __csrxchg(a, b, IMM14)
 }

-/// Reads the 8-bit IO-CSR
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn iocsrrd_b(a: i32) -> i32 {
-    __iocsrrd_b(a)
-}
-
-/// Reads the 16-bit IO-CSR
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn iocsrrd_h(a: i32) -> i32 {
-    __iocsrrd_h(a)
-}
-
-/// Reads the 32-bit IO-CSR
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn iocsrrd_w(a: i32) -> i32 {
-    __iocsrrd_w(a)
-}
-
 /// Reads the 64-bit IO-CSR
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@ -262,27 +100,6 @@ pub unsafe fn iocsrrd_d(a: i32) -> i64 {
    __iocsrrd_d(a)
 }

-/// Writes the 8-bit IO-CSR
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn iocsrwr_b(a: i32, b: i32) {
-    __iocsrwr_b(a, b)
-}
-
-/// Writes the 16-bit IO-CSR
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn iocsrwr_h(a: i32, b: i32) {
-    __iocsrwr_h(a, b)
-}
-
-/// Writes the 32-bit IO-CSR
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn iocsrwr_w(a: i32, b: i32) {
-    __iocsrwr_w(a, b)
-}
-
 /// Writes the 64-bit IO-CSR
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@ -290,29 +107,6 @@ pub unsafe fn iocsrwr_d(a: i64, b: i32) {
    __iocsrwr_d(a, b)
 }

-/// Generates the breakpoint instruction
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn brk<const IMM15: i32>() {
-    static_assert_uimm_bits!(IMM15, 15);
-    __break(IMM15);
-}
-
-/// Reads the CPU configuration register
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn cpucfg(a: i32) -> i32 {
-    __cpucfg(a)
-}
-
-/// Generates the syscall instruction
-#[inline]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn syscall<const IMM15: i32>() {
-    static_assert_uimm_bits!(IMM15, 15);
-    __syscall(IMM15);
-}
-
 /// Generates the less-than-or-equal asseration instruction
 #[inline]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@ -342,35 +136,3 @@ pub unsafe fn lddir<const B: i64>(a: i64) -> i64 {
 pub unsafe fn ldpte<const B: i64>(a: i64) {
    __ldpte(a, B)
 }
-
-/// Calculate the approximate single-precision result of 1.0 divided
-#[inline]
-#[target_feature(enable = "frecipe")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn frecipe_s(a: f32) -> f32 {
-    __frecipe_s(a)
-}
-
-/// Calculate the approximate double-precision result of 1.0 divided
-#[inline]
-#[target_feature(enable = "frecipe")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn frecipe_d(a: f64) -> f64 {
-    __frecipe_d(a)
-}
-
-/// Calculate the approximate single-precision result of dividing 1.0 by the square root
-#[inline]
-#[target_feature(enable = "frecipe")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn frsqrte_s(a: f32) -> f32 {
-    __frsqrte_s(a)
-}
-
-/// Calculate the approximate double-precision result of dividing 1.0 by the square root
-#[inline]
-#[target_feature(enable = "frecipe")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub unsafe fn frsqrte_d(a: f64) -> f64 {
-    __frsqrte_d(a)
-}
--- a/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs
@ -0,0 +1,242 @@
+//! `Shared LoongArch` intrinsics
+
+use crate::arch::asm;
+
+/// Reads the lower 32-bit stable counter value and the counter ID
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn rdtimel_w() -> (i32, isize) {
+    let (val, tid): (i32, isize);
+    unsafe { asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)) };
+    (val, tid)
+}
+
+/// Reads the upper 32-bit stable counter value and the counter ID
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn rdtimeh_w() -> (i32, isize) {
+    let (val, tid): (i32, isize);
+    unsafe { asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)) };
+    (val, tid)
+}
+
+#[allow(improper_ctypes)]
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.loongarch.crc.w.b.w"]
+    fn __crc_w_b_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crc.w.h.w"]
+    fn __crc_w_h_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crc.w.w.w"]
+    fn __crc_w_w_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crcc.w.b.w"]
+    fn __crcc_w_b_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crcc.w.h.w"]
+    fn __crcc_w_h_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.crcc.w.w.w"]
+    fn __crcc_w_w_w(a: i32, b: i32) -> i32;
+    #[link_name = "llvm.loongarch.dbar"]
+    fn __dbar(a: i32);
+    #[link_name = "llvm.loongarch.ibar"]
+    fn __ibar(a: i32);
+    #[link_name = "llvm.loongarch.movgr2fcsr"]
+    fn __movgr2fcsr(a: i32, b: i32);
+    #[link_name = "llvm.loongarch.movfcsr2gr"]
+    fn __movfcsr2gr(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.iocsrrd.b"]
+    fn __iocsrrd_b(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.iocsrrd.h"]
+    fn __iocsrrd_h(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.iocsrrd.w"]
+    fn __iocsrrd_w(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.iocsrwr.b"]
+    fn __iocsrwr_b(a: i32, b: i32);
+    #[link_name = "llvm.loongarch.iocsrwr.h"]
+    fn __iocsrwr_h(a: i32, b: i32);
+    #[link_name = "llvm.loongarch.iocsrwr.w"]
+    fn __iocsrwr_w(a: i32, b: i32);
+    #[link_name = "llvm.loongarch.break"]
+    fn __break(a: i32);
+    #[link_name = "llvm.loongarch.cpucfg"]
+    fn __cpucfg(a: i32) -> i32;
+    #[link_name = "llvm.loongarch.syscall"]
+    fn __syscall(a: i32);
+    #[link_name = "llvm.loongarch.frecipe.s"]
+    fn __frecipe_s(a: f32) -> f32;
+    #[link_name = "llvm.loongarch.frecipe.d"]
+    fn __frecipe_d(a: f64) -> f64;
+    #[link_name = "llvm.loongarch.frsqrte.s"]
+    fn __frsqrte_s(a: f32) -> f32;
+    #[link_name = "llvm.loongarch.frsqrte.d"]
+    fn __frsqrte_d(a: f64) -> f64;
+}
+
+/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn crc_w_b_w(a: i32, b: i32) -> i32 {
+    unsafe { __crc_w_b_w(a, b) }
+}
+
+/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn crc_w_h_w(a: i32, b: i32) -> i32 {
+    unsafe { __crc_w_h_w(a, b) }
+}
+
+/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn crc_w_w_w(a: i32, b: i32) -> i32 {
+    unsafe { __crc_w_w_w(a, b) }
+}
+
+/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn crcc_w_b_w(a: i32, b: i32) -> i32 {
+    unsafe { __crcc_w_b_w(a, b) }
+}
+
+/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn crcc_w_h_w(a: i32, b: i32) -> i32 {
+    unsafe { __crcc_w_h_w(a, b) }
+}
+
+/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn crcc_w_w_w(a: i32, b: i32) -> i32 {
+    unsafe { __crcc_w_w_w(a, b) }
+}
+
+/// Generates the memory barrier instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn dbar<const IMM15: i32>() {
+    static_assert_uimm_bits!(IMM15, 15);
+    unsafe { __dbar(IMM15) };
+}
+
+/// Generates the instruction-fetch barrier instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn ibar<const IMM15: i32>() {
+    static_assert_uimm_bits!(IMM15, 15);
+    unsafe { __ibar(IMM15) };
+}
+
+/// Moves data from a GPR to the FCSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
+    static_assert_uimm_bits!(IMM5, 5);
+    __movgr2fcsr(IMM5, a);
+}
+
+/// Moves data from a FCSR to the GPR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn movfcsr2gr<const IMM5: i32>() -> i32 {
+    static_assert_uimm_bits!(IMM5, 5);
+    unsafe { __movfcsr2gr(IMM5) }
+}
+
+/// Reads the 8-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrrd_b(a: i32) -> i32 {
+    __iocsrrd_b(a)
+}
+
+/// Reads the 16-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrrd_h(a: i32) -> i32 {
+    __iocsrrd_h(a)
+}
+
+/// Reads the 32-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrrd_w(a: i32) -> i32 {
+    __iocsrrd_w(a)
+}
+
+/// Writes the 8-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrwr_b(a: i32, b: i32) {
+    __iocsrwr_b(a, b)
+}
+
+/// Writes the 16-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrwr_h(a: i32, b: i32) {
+    __iocsrwr_h(a, b)
+}
+
+/// Writes the 32-bit IO-CSR
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn iocsrwr_w(a: i32, b: i32) {
+    __iocsrwr_w(a, b)
+}
+
+/// Generates the breakpoint instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn brk<const IMM15: i32>() {
+    static_assert_uimm_bits!(IMM15, 15);
+    __break(IMM15);
+}
+
+/// Reads the CPU configuration register
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn cpucfg(a: i32) -> i32 {
+    unsafe { __cpucfg(a) }
+}
+
+/// Generates the syscall instruction
+#[inline]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn syscall<const IMM15: i32>() {
+    static_assert_uimm_bits!(IMM15, 15);
+    __syscall(IMM15);
+}
+
+/// Calculate the approximate single-precision result of 1.0 divided
+#[inline]
+#[target_feature(enable = "frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn frecipe_s(a: f32) -> f32 {
+    unsafe { __frecipe_s(a) }
+}
+
+/// Calculate the approximate double-precision result of 1.0 divided
+#[inline]
+#[target_feature(enable = "frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn frecipe_d(a: f64) -> f64 {
+    unsafe { __frecipe_d(a) }
+}
+
+/// Calculate the approximate single-precision result of dividing 1.0 by the square root
+#[inline]
+#[target_feature(enable = "frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn frsqrte_s(a: f32) -> f32 {
+    unsafe { __frsqrte_s(a) }
+}
+
+/// Calculate the approximate double-precision result of dividing 1.0 by the square root
+#[inline]
+#[target_feature(enable = "frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn frsqrte_d(a: f64) -> f64 {
+    unsafe { __frsqrte_d(a) }
+}
--- a/library/stdarch/crates/core_arch/src/mod.rs
+++ b/library/stdarch/crates/core_arch/src/mod.rs
@ -16,6 +16,9 @@ mod riscv_shared;
 ))]
 mod arm_shared;

+#[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64", doc))]
+mod loongarch_shared;
+
 mod simd;

 #[doc = include_str!("core_arch_docs.md")]
@ -271,13 +274,25 @@ pub mod arch {
        pub use crate::core_arch::nvptx::*;
    }

-    /// Platform-specific intrinsics for the `loongarch` platform.
+    /// Platform-specific intrinsics for the `loongarch32` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "loongarch32", doc))]
+    #[doc(cfg(target_arch = "loongarch32"))]
+    #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+    pub mod loongarch32 {
+        pub use crate::core_arch::loongarch_shared::*;
+        pub use crate::core_arch::loongarch32::*;
+    }
+
+    /// Platform-specific intrinsics for the `loongarch64` platform.
    ///
    /// See the [module documentation](../index.html) for more details.
    #[cfg(any(target_arch = "loongarch64", doc))]
    #[doc(cfg(target_arch = "loongarch64"))]
    #[unstable(feature = "stdarch_loongarch", issue = "117427")]
    pub mod loongarch64 {
+        pub use crate::core_arch::loongarch_shared::*;
        pub use crate::core_arch::loongarch64::*;
    }

@ -334,6 +349,10 @@ mod powerpc64;
 #[doc(cfg(target_arch = "nvptx64"))]
 mod nvptx;

+#[cfg(any(target_arch = "loongarch32", doc))]
+#[doc(cfg(target_arch = "loongarch32"))]
+mod loongarch32;
+
 #[cfg(any(target_arch = "loongarch64", doc))]
 #[doc(cfg(target_arch = "loongarch64"))]
 mod loongarch64;
--- a/library/stdarch/crates/core_arch/src/s390x/vector.rs
+++ b/library/stdarch/crates/core_arch/src/s390x/vector.rs
@ -1181,6 +1181,20 @@ mod sealed {

    impl_vec_trait! { [VectorOrc vec_orc]+ 2c (orc) }

+    // Z vector intrinsic      C23 math.h  LLVM IR         ISO/IEC 60559 operation        inexact  vfidb parameters
+    //
+    // vec_rint                rint        llvm.rint       roundToIntegralExact           yes      0, 0
+    // vec_roundc              nearbyint   llvm.nearbyint  n/a                            no       4, 0
+    // vec_floor / vec_roundm  floor       llvm.floor      roundToIntegralTowardNegative  no       4, 7
+    // vec_ceil / vec_roundp   ceil        llvm.ceil       roundToIntegralTowardPositive  no       4, 6
+    // vec_trunc / vec_roundz  trunc       llvm.trunc      roundToIntegralTowardZero      no       4, 5
+    // vec_round               roundeven   llvm.roundeven  roundToIntegralTiesToEven      no       4, 4
+    // n/a                     round       llvm.round      roundToIntegralTiesAway        no       4, 1
+
+    // `simd_round_ties_even` is implemented as `llvm.rint`.
+    test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] }
+    test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] }
+
    test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32,  "vector-enhancements-1" vfisb] }
    test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] }

@ -1189,9 +1203,6 @@ mod sealed {
    test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
    test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }

-    test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] }
-    test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] }
-
    #[unstable(feature = "stdarch_s390x", issue = "135681")]
    pub trait VectorRoundc {
        unsafe fn vec_roundc(self) -> Self;
@ -2254,14 +2265,14 @@ mod sealed {

    #[inline]
    #[target_feature(enable = "vector")]
-    #[cfg_attr(test, assert_instr("vlbb"))]
+    #[cfg_attr(test, assert_instr(vlbb))]
    unsafe fn test_vec_load_bndry(ptr: *const i32) -> MaybeUninit<vector_signed_int> {
        vector_signed_int::vec_load_bndry::<512>(ptr)
    }

    #[inline]
    #[target_feature(enable = "vector")]
-    #[cfg_attr(test, assert_instr(vst))]
+    #[cfg_attr(test, assert_instr(vstl))]
    unsafe fn test_vec_store_len(vector: vector_signed_int, ptr: *mut i32, byte_count: u32) {
        vector.vec_store_len(ptr, byte_count)
    }
@ -2787,11 +2798,11 @@ mod sealed {
    }

    test_impl! { vec_vmal_ib(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [simd_mladd, vmalb ] }
-    test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalh ] }
+    test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalhw ] }
    test_impl! { vec_vmal_if(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [simd_mladd, vmalf ] }

    test_impl! { vec_vmal_ub(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [simd_mladd, vmalb ] }
-    test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalh ] }
+    test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalhw ] }
    test_impl! { vec_vmal_uf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [simd_mladd, vmalf ] }

    impl_mul!([VectorMladd vec_mladd] vec_vmal_ib (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char );
--- a/library/stdarch/crates/intrinsic-test/src/arm/compile.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/compile.rs
@ -1,64 +1,51 @@
-use crate::common::compile_c::CompilationCommandBuilder;
-use crate::common::gen_c::compile_c_programs;
+use crate::common::cli::ProcessedCli;
+use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation};
+
+pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
+    let cpp_compiler = config.cpp_compiler.as_ref()?;

-pub fn compile_c_arm(
-    intrinsics_name_list: &[String],
-    compiler: &str,
-    target: &str,
-    cxx_toolchain_dir: Option<&str>,
-) -> bool {
    // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
    let mut command = CompilationCommandBuilder::new()
        .add_arch_flags(vec!["armv8.6-a", "crypto", "crc", "dotprod", "fp16"])
-        .set_compiler(compiler)
-        .set_target(target)
+        .set_compiler(cpp_compiler)
+        .set_target(&config.target)
        .set_opt_level("2")
-        .set_cxx_toolchain_dir(cxx_toolchain_dir)
+        .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref())
        .set_project_root("c_programs")
        .add_extra_flags(vec!["-ffp-contract=off", "-Wno-narrowing"]);

-    if !target.contains("v7") {
+    if !config.target.contains("v7") {
        command = command.add_arch_flags(vec!["faminmax", "lut", "sha3"]);
    }

-    /*
-     * clang++ cannot link an aarch64_be object file, so we invoke
-     * aarch64_be-unknown-linux-gnu's C++ linker. This ensures that we
-     * are testing the intrinsics against LLVM.
-     *
-     * Note: setting `--sysroot=<...>` which is the obvious thing to do
-     * does not work as it gets caught up with `#include_next <stdlib.h>`
-     * not existing...
-     */
-    if target.contains("aarch64_be") {
-        command = command
-            .set_linker(
-                cxx_toolchain_dir.unwrap_or("").to_string() + "/bin/aarch64_be-none-linux-gnu-g++",
-            )
-            .set_include_paths(vec![
-                "/include",
-                "/aarch64_be-none-linux-gnu/include",
-                "/aarch64_be-none-linux-gnu/include/c++/14.2.1",
-                "/aarch64_be-none-linux-gnu/include/c++/14.2.1/aarch64_be-none-linux-gnu",
-                "/aarch64_be-none-linux-gnu/include/c++/14.2.1/backward",
-                "/aarch64_be-none-linux-gnu/libc/usr/include",
-            ]);
-    }
-
-    if !compiler.contains("clang") {
+    if !cpp_compiler.contains("clang") {
        command = command.add_extra_flag("-flax-vector-conversions");
    }

-    let compiler_commands = intrinsics_name_list
-        .iter()
-        .map(|intrinsic_name| {
-            command
-                .clone()
-                .set_input_name(intrinsic_name)
-                .set_output_name(intrinsic_name)
-                .make_string()
-        })
-        .collect::<Vec<_>>();
+    let mut cpp_compiler = command.into_cpp_compilation();

-    compile_c_programs(&compiler_commands)
+    if config.target.contains("aarch64_be") {
+        let Some(ref cxx_toolchain_dir) = config.cxx_toolchain_dir else {
+            panic!(
+                "target `{}` must specify `cxx_toolchain_dir`",
+                config.target
+            )
+        };
+
+        cpp_compiler.command_mut().args([
+            &format!("--sysroot={cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc"),
+            "--include-directory",
+            &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1"),
+            "--include-directory",
+            &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1/aarch64_be-none-linux-gnu"),
+            "-L",
+            &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
+            "-L",
+            &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/lib"),
+            "-B",
+            &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
+        ]);
+    }
+
+    Some(cpp_compiler)
 }
--- a/library/stdarch/crates/intrinsic-test/src/arm/config.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/config.rs
@ -114,7 +114,6 @@ pub const AARCH_CONFIGURATIONS: &str = r#"
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))]
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_dotprod))]
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))]
-#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))]
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))]
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
 #![feature(fmt_helpers_for_derive)]
--- a/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs
@ -1,8 +1,8 @@
 use crate::common::argument::ArgumentList;
 use crate::common::indentation::Indentation;
 use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition};
-use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, TypeKind};
-use std::ops::Deref;
+use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
+use std::ops::{Deref, DerefMut};

 #[derive(Debug, Clone, PartialEq)]
 pub struct ArmIntrinsicType(pub IntrinsicType);
@ -15,6 +15,12 @@ impl Deref for ArmIntrinsicType {
    }
 }

+impl DerefMut for ArmIntrinsicType {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
 impl IntrinsicDefinition<ArmIntrinsicType> for Intrinsic<ArmIntrinsicType> {
    fn arguments(&self) -> ArgumentList<ArmIntrinsicType> {
        self.arguments.clone()
@ -73,8 +79,9 @@ impl IntrinsicDefinition<ArmIntrinsicType> for Intrinsic<ArmIntrinsicType> {
                    TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(),
                    TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(),
                    TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(),
-                    TypeKind::Int => format!("int{}_t", self.results().inner_size()),
-                    TypeKind::UInt => format!("uint{}_t", self.results().inner_size()),
+                    TypeKind::Int(Sign::Signed) => format!("int{}_t", self.results().inner_size()),
+                    TypeKind::Int(Sign::Unsigned) =>
+                        format!("uint{}_t", self.results().inner_size()),
                    TypeKind::Poly => format!("poly{}_t", self.results().inner_size()),
                    ty => todo!("print_result_c - Unknown type: {:#?}", ty),
                },
--- a/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs
@ -110,7 +110,7 @@ fn json_to_intrinsic(
    Ok(Intrinsic {
        name,
        arguments,
-        results: *results,
+        results: results,
        arch_tags: intr.architectures,
    })
 }
--- a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
@ -4,15 +4,20 @@ mod intrinsic;
 mod json_parser;
 mod types;

+use std::fs::File;
+
+use rayon::prelude::*;
+
+use crate::arm::config::POLY128_OSTREAM_DEF;
 use crate::common::SupportedArchitectureTest;
 use crate::common::cli::ProcessedCli;
 use crate::common::compare::compare_outputs;
+use crate::common::gen_c::{write_main_cpp, write_mod_cpp};
 use crate::common::gen_rust::compile_rust_programs;
 use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition};
 use crate::common::intrinsic_helpers::TypeKind;
-use crate::common::write_file::{write_c_testfiles, write_rust_testfiles};
-use compile::compile_c_arm;
-use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, POLY128_OSTREAM_DEF, build_notices};
+use crate::common::write_file::write_rust_testfiles;
+use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, build_notices};
 use intrinsic::ArmIntrinsicType;
 use json_parser::get_neon_intrinsics;

@ -21,6 +26,13 @@ pub struct ArmArchitectureTest {
    cli_options: ProcessedCli,
 }

+fn chunk_info(intrinsic_count: usize) -> (usize, usize) {
+    let available_parallelism = std::thread::available_parallelism().unwrap().get();
+    let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count));
+
+    (chunk_size, intrinsic_count.div_ceil(chunk_size))
+}
+
 impl SupportedArchitectureTest for ArmArchitectureTest {
    fn create(cli_options: ProcessedCli) -> Box<Self> {
        let a32 = cli_options.target.contains("v7");
@ -51,33 +63,58 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
    }

    fn build_c_file(&self) -> bool {
-        let compiler = self.cli_options.cpp_compiler.as_deref();
-        let target = &self.cli_options.target;
-        let cxx_toolchain_dir = self.cli_options.cxx_toolchain_dir.as_deref();
        let c_target = "aarch64";
+        let platform_headers = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"];

-        let intrinsics_name_list = write_c_testfiles(
-            &self
-                .intrinsics
-                .iter()
-                .map(|i| i as &dyn IntrinsicDefinition<_>)
-                .collect::<Vec<_>>(),
-            target,
+        let (chunk_size, chunk_count) = chunk_info(self.intrinsics.len());
+
+        let cpp_compiler = compile::build_cpp_compilation(&self.cli_options).unwrap();
+
+        let notice = &build_notices("// ");
+        self.intrinsics
+            .par_chunks(chunk_size)
+            .enumerate()
+            .map(|(i, chunk)| {
+                let c_filename = format!("c_programs/mod_{i}.cpp");
+                let mut file = File::create(&c_filename).unwrap();
+                write_mod_cpp(&mut file, notice, c_target, platform_headers, chunk).unwrap();
+
+                // compile this cpp file into a .o file
+                let output = cpp_compiler
+                    .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?;
+                assert!(output.status.success(), "{output:?}");
+
+                Ok(())
+            })
+            .collect::<Result<(), std::io::Error>>()
+            .unwrap();
+
+        let mut file = File::create("c_programs/main.cpp").unwrap();
+        write_main_cpp(
+            &mut file,
            c_target,
-            &["arm_neon.h", "arm_acle.h", "arm_fp16.h"],
-            &build_notices("// "),
-            &[POLY128_OSTREAM_DEF],
-        );
+            POLY128_OSTREAM_DEF,
+            self.intrinsics.iter().map(|i| i.name.as_str()),
+        )
+        .unwrap();

-        match compiler {
-            None => true,
-            Some(compiler) => compile_c_arm(
-                intrinsics_name_list.as_slice(),
-                compiler,
-                target,
-                cxx_toolchain_dir,
-            ),
-        }
+        // compile this cpp file into a .o file
+        info!("compiling main.cpp");
+        let output = cpp_compiler
+            .compile_object_file("main.cpp", "intrinsic-test-programs.o")
+            .unwrap();
+        assert!(output.status.success(), "{output:?}");
+
+        let object_files = (0..chunk_count)
+            .map(|i| format!("mod_{i}.o"))
+            .chain(["intrinsic-test-programs.o".to_owned()]);
+
+        let output = cpp_compiler
+            .link_executable(object_files, "intrinsic-test-programs")
+            .unwrap();
+        assert!(output.status.success(), "{output:?}");
+
+        true
    }

    fn build_rust_file(&self) -> bool {
@ -104,7 +141,7 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
    }

    fn compare_outputs(&self) -> bool {
-        if let Some(ref toolchain) = self.cli_options.toolchain {
+        if self.cli_options.toolchain.is_some() {
            let intrinsics_name_list = self
                .intrinsics
                .iter()
@ -113,8 +150,7 @@ impl SupportedArchitectureTest for ArmArchitectureTest {

            compare_outputs(
                &intrinsics_name_list,
-                toolchain,
-                &self.cli_options.c_runner,
+                &self.cli_options.runner,
                &self.cli_options.target,
            )
        } else {
--- a/library/stdarch/crates/intrinsic-test/src/arm/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/types.rs
@ -1,6 +1,6 @@
 use super::intrinsic::ArmIntrinsicType;
 use crate::common::cli::Language;
-use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, TypeKind};
+use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};

 impl IntrinsicTypeDefinition for ArmIntrinsicType {
    /// Gets a string containing the typename for this type in C format.
@ -73,8 +73,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
            format!(
                "vld{len}{quad}_{type}{size}",
                type = match k {
-                    TypeKind::UInt => "u",
-                    TypeKind::Int => "s",
+                    TypeKind::Int(Sign::Unsigned) => "u",
+                    TypeKind::Int(Sign::Signed) => "s",
                    TypeKind::Float => "f",
                    // The ACLE doesn't support 64-bit polynomial loads on Armv7
                    // if armv7 and bl == 64, use "s", else "p"
@ -107,8 +107,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
            format!(
                "vget{quad}_lane_{type}{size}",
                type = match k {
-                    TypeKind::UInt => "u",
-                    TypeKind::Int => "s",
+                    TypeKind::Int(Sign::Unsigned) => "u",
+                    TypeKind::Int(Sign::Signed) => "s",
                    TypeKind::Float => "f",
                    TypeKind::Poly => "p",
                    x => todo!("get_load_function TypeKind: {:#?}", x),
@ -121,7 +121,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
        }
    }

-    fn from_c(s: &str, target: &str) -> Result<Box<Self>, String> {
+    fn from_c(s: &str, target: &str) -> Result<Self, String> {
        const CONST_STR: &str = "const";
        if let Some(s) = s.strip_suffix('*') {
            let (s, constant) = match s.trim().strip_suffix(CONST_STR) {
@ -131,9 +131,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
            let s = s.trim_end();
            let temp_return = ArmIntrinsicType::from_c(s, target);
            temp_return.map(|mut op| {
-                let edited = op.as_mut();
-                edited.0.ptr = true;
-                edited.0.ptr_constant = constant;
+                op.ptr = true;
+                op.ptr_constant = constant;
                op
            })
        } else {
@ -163,7 +162,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
                    ),
                    None => None,
                };
-                Ok(Box::new(ArmIntrinsicType(IntrinsicType {
+                Ok(ArmIntrinsicType(IntrinsicType {
                    ptr: false,
                    ptr_constant: false,
                    constant,
@ -172,14 +171,14 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
                    simd_len,
                    vec_len,
                    target: target.to_string(),
-                })))
+                }))
            } else {
                let kind = start.parse::<TypeKind>()?;
                let bit_len = match kind {
-                    TypeKind::Int => Some(32),
+                    TypeKind::Int(_) => Some(32),
                    _ => None,
                };
-                Ok(Box::new(ArmIntrinsicType(IntrinsicType {
+                Ok(ArmIntrinsicType(IntrinsicType {
                    ptr: false,
                    ptr_constant: false,
                    constant,
@ -188,7 +187,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
                    simd_len: None,
                    vec_len: None,
                    target: target.to_string(),
-                })))
+                }))
            }
        }
    }
--- a/library/stdarch/crates/intrinsic-test/src/common/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
@ -76,7 +76,7 @@ where
        Argument {
            pos,
            name: String::from(var_name),
-            ty: *ty,
+            ty: ty,
            constraint,
        }
    }
@ -125,19 +125,23 @@ where
    /// Creates a line for each argument that initializes an array for C from which `loads` argument
    /// values can be loaded  as a sliding window.
    /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2.
-    pub fn gen_arglists_c(&self, indentation: Indentation, loads: u32) -> String {
-        self.iter()
-            .filter(|&arg| !arg.has_constraint())
-            .map(|arg| {
-                format!(
-                    "{indentation}const {ty} {name}_vals[] = {values};",
-                    ty = arg.ty.c_scalar_type(),
-                    name = arg.name,
-                    values = arg.ty.populate_random(indentation, loads, &Language::C)
-                )
-            })
-            .collect::<Vec<_>>()
-            .join("\n")
+    pub fn gen_arglists_c(
+        &self,
+        w: &mut impl std::io::Write,
+        indentation: Indentation,
+        loads: u32,
+    ) -> std::io::Result<()> {
+        for arg in self.iter().filter(|&arg| !arg.has_constraint()) {
+            writeln!(
+                w,
+                "{indentation}const {ty} {name}_vals[] = {values};",
+                ty = arg.ty.c_scalar_type(),
+                name = arg.name,
+                values = arg.ty.populate_random(indentation, loads, &Language::C)
+            )?
+        }
+
+        Ok(())
    }

    /// Creates a line for each argument that initializes an array for Rust from which `loads` argument
--- a/library/stdarch/crates/intrinsic-test/src/common/cli.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/cli.rs
@ -60,7 +60,7 @@ pub struct ProcessedCli {
    pub filename: PathBuf,
    pub toolchain: Option<String>,
    pub cpp_compiler: Option<String>,
-    pub c_runner: String,
+    pub runner: String,
    pub target: String,
    pub linker: Option<String>,
    pub cxx_toolchain_dir: Option<String>,
@ -70,7 +70,7 @@ pub struct ProcessedCli {
 impl ProcessedCli {
    pub fn new(cli_options: Cli) -> Self {
        let filename = cli_options.input;
-        let c_runner = cli_options.runner.unwrap_or_default();
+        let runner = cli_options.runner.unwrap_or_default();
        let target = cli_options.target;
        let linker = cli_options.linker;
        let cxx_toolchain_dir = cli_options.cxx_toolchain_dir;
@ -102,7 +102,7 @@ impl ProcessedCli {
        Self {
            toolchain,
            cpp_compiler,
-            c_runner,
+            runner,
            target,
            linker,
            cxx_toolchain_dir,
--- a/library/stdarch/crates/intrinsic-test/src/common/compare.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/compare.rs
@ -2,27 +2,25 @@ use super::cli::FailureReason;
 use rayon::prelude::*;
 use std::process::Command;

-pub fn compare_outputs(
-    intrinsic_name_list: &Vec<String>,
-    toolchain: &str,
-    runner: &str,
-    target: &str,
-) -> bool {
+pub fn compare_outputs(intrinsic_name_list: &Vec<String>, runner: &str, target: &str) -> bool {
+    fn runner_command(runner: &str) -> Command {
+        let mut it = runner.split_whitespace();
+        let mut cmd = Command::new(it.next().unwrap());
+        cmd.args(it);
+
+        cmd
+    }
+
    let intrinsics = intrinsic_name_list
        .par_iter()
        .filter_map(|intrinsic_name| {
-            let c = Command::new("sh")
-                .arg("-c")
-                .arg(format!("{runner} ./c_programs/{intrinsic_name}"))
+            let c = runner_command(runner)
+                .arg("./c_programs/intrinsic-test-programs")
+                .arg(intrinsic_name)
                .output();

-            let rust = Command::new("sh")
-                .current_dir("rust_programs")
-                .arg("-c")
-                .arg(format!(
-                    "cargo {toolchain} run --target {target} --bin {intrinsic_name} --release",
-                ))
-                .env("RUSTFLAGS", "-Cdebuginfo=0")
+            let rust = runner_command(runner)
+                .arg(format!("target/{target}/release/{intrinsic_name}"))
                .output();

            let (c, rust) = match (c, rust) {
@ -42,8 +40,8 @@ pub fn compare_outputs(
            if !rust.status.success() {
                error!(
                    "Failed to run Rust program for intrinsic {intrinsic_name}\nstdout: {stdout}\nstderr: {stderr}",
-                    stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""),
-                    stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""),
+                    stdout = String::from_utf8_lossy(&rust.stdout),
+                    stderr = String::from_utf8_lossy(&rust.stderr),
                );
                return Some(FailureReason::RunRust(intrinsic_name.clone()));
            }
--- a/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs
@ -5,11 +5,7 @@ pub struct CompilationCommandBuilder {
    cxx_toolchain_dir: Option<String>,
    arch_flags: Vec<String>,
    optimization: String,
-    include_paths: Vec<String>,
    project_root: Option<String>,
-    output: String,
-    input: String,
-    linker: Option<String>,
    extra_flags: Vec<String>,
 }

@ -21,11 +17,7 @@ impl CompilationCommandBuilder {
            cxx_toolchain_dir: None,
            arch_flags: Vec::new(),
            optimization: "2".to_string(),
-            include_paths: Vec::new(),
            project_root: None,
-            output: String::new(),
-            input: String::new(),
-            linker: None,
            extra_flags: Vec::new(),
        }
    }
@ -57,37 +49,12 @@ impl CompilationCommandBuilder {
        self
    }

-    /// Sets a list of include paths for compilation.
-    /// The paths that are passed must be relative to the
-    /// "cxx_toolchain_dir" directory path.
-    pub fn set_include_paths(mut self, paths: Vec<&str>) -> Self {
-        self.include_paths = paths.into_iter().map(|path| path.to_string()).collect();
-        self
-    }
-
    /// Sets the root path of all the generated test files.
    pub fn set_project_root(mut self, path: &str) -> Self {
        self.project_root = Some(path.to_string());
        self
    }

-    /// The name of the output executable, without any suffixes
-    pub fn set_output_name(mut self, path: &str) -> Self {
-        self.output = path.to_string();
-        self
-    }
-
-    /// The name of the input C file, without any suffixes
-    pub fn set_input_name(mut self, path: &str) -> Self {
-        self.input = path.to_string();
-        self
-    }
-
-    pub fn set_linker(mut self, linker: String) -> Self {
-        self.linker = Some(linker);
-        self
-    }
-
    pub fn add_extra_flags(mut self, flags: Vec<&str>) -> Self {
        let mut flags: Vec<String> = flags.into_iter().map(|f| f.to_string()).collect();
        self.extra_flags.append(&mut flags);
@ -100,55 +67,69 @@ impl CompilationCommandBuilder {
 }

 impl CompilationCommandBuilder {
-    pub fn make_string(self) -> String {
-        let arch_flags = self.arch_flags.join("+");
-        let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
-        let project_root = self.project_root.unwrap_or_default();
-        let project_root_str = project_root.as_str();
-        let mut output = self.output.clone();
-        if self.linker.is_some() {
-            output += ".o"
-        };
-        let mut command = format!(
-            "{} {flags} -march={arch_flags} \
-            -O{} \
-            -o {project_root}/{} \
-            {project_root}/{}.cpp",
-            self.compiler, self.optimization, output, self.input,
-        );
+    pub fn into_cpp_compilation(self) -> CppCompilation {
+        let mut cpp_compiler = std::process::Command::new(self.compiler);

-        command = command + " " + self.extra_flags.join(" ").as_str();
+        if let Some(project_root) = self.project_root {
+            cpp_compiler.current_dir(project_root);
+        }
+
+        let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
+        cpp_compiler.args(flags.split_whitespace());
+
+        cpp_compiler.arg(format!("-march={}", self.arch_flags.join("+")));
+
+        cpp_compiler.arg(format!("-O{}", self.optimization));
+
+        cpp_compiler.args(self.extra_flags);

        if let Some(target) = &self.target {
-            command = command + " --target=" + target;
+            cpp_compiler.arg(format!("--target={target}"));
        }

-        if let (Some(linker), Some(cxx_toolchain_dir)) = (&self.linker, &self.cxx_toolchain_dir) {
-            let include_args = self
-                .include_paths
-                .iter()
-                .map(|path| "--include-directory=".to_string() + cxx_toolchain_dir + path)
-                .collect::<Vec<_>>()
-                .join(" ");
-
-            command = command
-                + " -c "
-                + include_args.as_str()
-                + " && "
-                + linker
-                + " "
-                + project_root_str
-                + "/"
-                + &output
-                + " -o "
-                + project_root_str
-                + "/"
-                + &self.output
-                + " && rm "
-                + project_root_str
-                + "/"
-                + &output;
-        }
-        command
+        CppCompilation(cpp_compiler)
+    }
+}
+
+pub struct CppCompilation(std::process::Command);
+
+fn clone_command(command: &std::process::Command) -> std::process::Command {
+    let mut cmd = std::process::Command::new(command.get_program());
+    if let Some(current_dir) = command.get_current_dir() {
+        cmd.current_dir(current_dir);
+    }
+    cmd.args(command.get_args());
+
+    for (key, val) in command.get_envs() {
+        cmd.env(key, val.unwrap_or_default());
+    }
+
+    cmd
+}
+
+impl CppCompilation {
+    pub fn command_mut(&mut self) -> &mut std::process::Command {
+        &mut self.0
+    }
+
+    pub fn compile_object_file(
+        &self,
+        input: &str,
+        output: &str,
+    ) -> std::io::Result<std::process::Output> {
+        let mut cmd = clone_command(&self.0);
+        cmd.args([input, "-c", "-o", output]);
+        cmd.output()
+    }
+
+    pub fn link_executable(
+        &self,
+        inputs: impl Iterator<Item = String>,
+        output: &str,
+    ) -> std::io::Result<std::process::Output> {
+        let mut cmd = clone_command(&self.0);
+        cmd.args(inputs);
+        cmd.args(["-o", output]);
+        cmd.output()
    }
 }
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
@ -1,8 +1,3 @@
-use itertools::Itertools;
-use rayon::prelude::*;
-use std::collections::BTreeMap;
-use std::process::Command;
-
 use super::argument::Argument;
 use super::indentation::Indentation;
 use super::intrinsic::IntrinsicDefinition;
@ -11,104 +6,16 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition;
 // The number of times each intrinsic will be called.
 const PASSES: u32 = 20;

-// Formats the main C program template with placeholders
-pub fn format_c_main_template(
-    notices: &str,
-    header_files: &[&str],
-    arch_identifier: &str,
-    arch_specific_definitions: &[&str],
-    arglists: &str,
-    passes: &str,
-) -> String {
-    format!(
-        r#"{notices}{header_files}
-#include <iostream>
-#include <cstring>
-#include <iomanip>
-#include <sstream>
-
-template<typename T1, typename T2> T1 cast(T2 x) {{
-  static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
-  T1 ret{{}};
-  memcpy(&ret, &x, sizeof(T1));
-  return ret;
-}}
-
-std::ostream& operator<<(std::ostream& os, float16_t value) {{
-    uint16_t temp = 0;
-    memcpy(&temp, &value, sizeof(float16_t));
-    std::stringstream ss;
-    ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp;
-    os << ss.str();
-    return os;
-}}
-
-#ifdef __{arch_identifier}__
-{arch_specific_definitions}
-#endif
-
-{arglists}
-
-int main(int argc, char **argv) {{
-{passes}
-    return 0;
-}}"#,
-        header_files = header_files
-            .iter()
-            .map(|header| format!("#include <{header}>"))
-            .collect::<Vec<_>>()
-            .join("\n"),
-        arch_specific_definitions = arch_specific_definitions.join("\n"),
-    )
-}
-
-pub fn compile_c_programs(compiler_commands: &[String]) -> bool {
-    compiler_commands
-        .par_iter()
-        .map(|compiler_command| {
-            let output = Command::new("sh").arg("-c").arg(compiler_command).output();
-            if let Ok(output) = output {
-                if output.status.success() {
-                    true
-                } else {
-                    error!(
-                        "Failed to compile code for intrinsics: \n\nstdout:\n{}\n\nstderr:\n{}",
-                        std::str::from_utf8(&output.stdout).unwrap_or(""),
-                        std::str::from_utf8(&output.stderr).unwrap_or("")
-                    );
-                    false
-                }
-            } else {
-                error!("Command failed: {output:#?}");
-                false
-            }
-        })
-        .find_any(|x| !x)
-        .is_none()
-}
-
-// Creates directory structure and file path mappings
-pub fn setup_c_file_paths(identifiers: &Vec<String>) -> BTreeMap<&String, String> {
-    let _ = std::fs::create_dir("c_programs");
-    identifiers
-        .par_iter()
-        .map(|identifier| {
-            let c_filename = format!(r#"c_programs/{identifier}.cpp"#);
-
-            (identifier, c_filename)
-        })
-        .collect::<BTreeMap<&String, String>>()
-}
-
 pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
+    w: &mut impl std::io::Write,
    intrinsic: &dyn IntrinsicDefinition<T>,
    indentation: Indentation,
    additional: &str,
    passes: u32,
-    _target: &str,
-) -> String {
+) -> std::io::Result<()> {
    let body_indentation = indentation.nested();
-    format!(
+    writeln!(
+        w,
        "{indentation}for (int i=0; i<{passes}; i++) {{\n\
            {loaded_args}\
            {body_indentation}auto __return_value = {intrinsic_call}({args});\n\
@ -121,78 +28,172 @@ pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
    )
 }

-pub fn generate_c_constraint_blocks<T: IntrinsicTypeDefinition>(
+pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>(
+    w: &mut impl std::io::Write,
    intrinsic: &dyn IntrinsicDefinition<T>,
    indentation: Indentation,
-    constraints: &[&Argument<T>],
+    constraints: &mut (impl Iterator<Item = &'a Argument<T>> + Clone),
    name: String,
-    target: &str,
-) -> String {
-    if let Some((current, constraints)) = constraints.split_last() {
-        let range = current
-            .constraint
-            .iter()
-            .map(|c| c.to_range())
-            .flat_map(|r| r.into_iter());
+) -> std::io::Result<()> {
+    let Some(current) = constraints.next() else {
+        return generate_c_test_loop(w, intrinsic, indentation, &name, PASSES);
+    };

-        let body_indentation = indentation.nested();
-        range
-            .map(|i| {
-                format!(
-                    "{indentation}{{\n\
-                        {body_indentation}{ty} {name} = {val};\n\
-                        {pass}\n\
-                    {indentation}}}",
-                    name = current.name,
-                    ty = current.ty.c_type(),
-                    val = i,
-                    pass = generate_c_constraint_blocks(
-                        intrinsic,
-                        body_indentation,
-                        constraints,
-                        format!("{name}-{i}"),
-                        target,
-                    )
-                )
-            })
-            .join("\n")
-    } else {
-        generate_c_test_loop(intrinsic, indentation, &name, PASSES, target)
+    let body_indentation = indentation.nested();
+    for i in current.constraint.iter().flat_map(|c| c.to_range()) {
+        let ty = current.ty.c_type();
+
+        writeln!(w, "{indentation}{{")?;
+        writeln!(w, "{body_indentation}{ty} {} = {i};", current.name)?;
+
+        generate_c_constraint_blocks(
+            w,
+            intrinsic,
+            body_indentation,
+            &mut constraints.clone(),
+            format!("{name}-{i}"),
+        )?;
+
+        writeln!(w, "{indentation}}}")?;
    }
+
+    Ok(())
 }

 // Compiles C test programs using specified compiler
-pub fn create_c_test_program<T: IntrinsicTypeDefinition>(
+pub fn create_c_test_function<T: IntrinsicTypeDefinition>(
+    w: &mut impl std::io::Write,
    intrinsic: &dyn IntrinsicDefinition<T>,
-    header_files: &[&str],
-    target: &str,
-    c_target: &str,
-    notices: &str,
-    arch_specific_definitions: &[&str],
-) -> String {
-    let arguments = intrinsic.arguments();
-    let constraints = arguments
-        .iter()
-        .filter(|&i| i.has_constraint())
-        .collect_vec();
-
+) -> std::io::Result<()> {
    let indentation = Indentation::default();
-    format_c_main_template(
-        notices,
-        header_files,
-        c_target,
-        arch_specific_definitions,
-        intrinsic
-            .arguments()
-            .gen_arglists_c(indentation, PASSES)
-            .as_str(),
-        generate_c_constraint_blocks(
-            intrinsic,
-            indentation.nested(),
-            constraints.as_slice(),
-            Default::default(),
-            target,
-        )
-        .as_str(),
-    )
+
+    writeln!(w, "int run_{}() {{", intrinsic.name())?;
+
+    // Define the arrays of arguments.
+    let arguments = intrinsic.arguments();
+    arguments.gen_arglists_c(w, indentation.nested(), PASSES)?;
+
+    generate_c_constraint_blocks(
+        w,
+        intrinsic,
+        indentation.nested(),
+        &mut arguments.iter().rev().filter(|&i| i.has_constraint()),
+        Default::default(),
+    )?;
+
+    writeln!(w, "    return 0;")?;
+    writeln!(w, "}}")?;
+
+    Ok(())
+}
+
+pub fn write_mod_cpp<T: IntrinsicTypeDefinition>(
+    w: &mut impl std::io::Write,
+    notice: &str,
+    architecture: &str,
+    platform_headers: &[&str],
+    intrinsics: &[impl IntrinsicDefinition<T>],
+) -> std::io::Result<()> {
+    write!(w, "{notice}")?;
+
+    for header in platform_headers {
+        writeln!(w, "#include <{header}>")?;
+    }
+
+    writeln!(
+        w,
+        r#"
+#include <iostream>
+#include <cstring>
+#include <iomanip>
+#include <sstream>
+
+template<typename T1, typename T2> T1 cast(T2 x) {{
+  static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
+  T1 ret{{}};
+  memcpy(&ret, &x, sizeof(T1));
+  return ret;
+}}
+
+std::ostream& operator<<(std::ostream& os, float16_t value);
+
+
+
+"#
+    )?;
+
+    writeln!(w, "#ifdef __{architecture}__")?;
+    writeln!(
+        w,
+        "std::ostream& operator<<(std::ostream& os, poly128_t value);"
+    )?;
+    writeln!(w, "#endif")?;
+
+    for intrinsic in intrinsics {
+        create_c_test_function(w, intrinsic)?;
+    }
+
+    Ok(())
+}
+
+pub fn write_main_cpp<'a>(
+    w: &mut impl std::io::Write,
+    architecture: &str,
+    arch_specific_definitions: &str,
+    intrinsics: impl Iterator<Item = &'a str> + Clone,
+) -> std::io::Result<()> {
+    writeln!(w, "#include <iostream>")?;
+    writeln!(w, "#include <string>")?;
+
+    for header in ["arm_neon.h", "arm_acle.h", "arm_fp16.h"] {
+        writeln!(w, "#include <{header}>")?;
+    }
+
+    writeln!(
+        w,
+        r#"
+#include <cstring>
+#include <iomanip>
+#include <sstream>
+
+std::ostream& operator<<(std::ostream& os, float16_t value) {{
+    uint16_t temp = 0;
+    memcpy(&temp, &value, sizeof(float16_t));
+    std::stringstream ss;
+    ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp;
+    os << ss.str();
+    return os;
+}}
+"#
+    )?;
+
+    writeln!(w, "#ifdef __{architecture}__")?;
+    writeln!(w, "{arch_specific_definitions }")?;
+    writeln!(w, "#endif")?;
+
+    for intrinsic in intrinsics.clone() {
+        writeln!(w, "extern int run_{intrinsic}(void);")?;
+    }
+
+    writeln!(w, "int main(int argc, char **argv) {{")?;
+    writeln!(w, "    std::string intrinsic_name = argv[1];")?;
+
+    writeln!(w, "    if (false) {{")?;
+
+    for intrinsic in intrinsics {
+        writeln!(w, "    }} else if (intrinsic_name == \"{intrinsic}\") {{")?;
+        writeln!(w, "        return run_{intrinsic}();")?;
+    }
+
+    writeln!(w, "    }} else {{")?;
+    writeln!(
+        w,
+        "        std::cerr << \"Unknown command: \" << intrinsic_name << \"\\n\";"
+    )?;
+    writeln!(w, "        return -1;")?;
+    writeln!(w, "    }}")?;
+
+    writeln!(w, "}}")?;
+
+    Ok(())
 }
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
@ -2,7 +2,6 @@ use itertools::Itertools;
 use rayon::prelude::*;
 use std::collections::BTreeMap;
 use std::fs::File;
-use std::io::Write;
 use std::process::Command;

 use super::argument::Argument;
@ -23,8 +22,8 @@ pub fn format_rust_main_template(
 ) -> String {
    format!(
        r#"{notices}#![feature(simd_ffi)]
-#![feature(link_llvm_intrinsics)]
 #![feature(f16)]
+#![allow(unused)]
 {configurations}
 {definitions}

@ -38,6 +37,42 @@ fn main() {{
    )
 }

+fn write_cargo_toml(w: &mut impl std::io::Write, binaries: &[String]) -> std::io::Result<()> {
+    writeln!(
+        w,
+        concat!(
+            "[package]\n",
+            "name = \"intrinsic-test-programs\"\n",
+            "version = \"{version}\"\n",
+            "authors = [{authors}]\n",
+            "license = \"{license}\"\n",
+            "edition = \"2018\"\n",
+            "[workspace]\n",
+            "[dependencies]\n",
+            "core_arch = {{ path = \"../crates/core_arch\" }}",
+        ),
+        version = env!("CARGO_PKG_VERSION"),
+        authors = env!("CARGO_PKG_AUTHORS")
+            .split(":")
+            .format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))),
+        license = env!("CARGO_PKG_LICENSE"),
+    )?;
+
+    for binary in binaries {
+        writeln!(
+            w,
+            concat!(
+                "[[bin]]\n",
+                "name = \"{binary}\"\n",
+                "path = \"{binary}/main.rs\"\n",
+            ),
+            binary = binary,
+        )?;
+    }
+
+    Ok(())
+}
+
 pub fn compile_rust_programs(
    binaries: Vec<String>,
    toolchain: Option<&str>,
@ -45,56 +80,20 @@ pub fn compile_rust_programs(
    linker: Option<&str>,
 ) -> bool {
    let mut cargo = File::create("rust_programs/Cargo.toml").unwrap();
-    cargo
-        .write_all(
-            format!(
-                r#"[package]
-name = "intrinsic-test-programs"
-version = "{version}"
-authors = [{authors}]
-license = "{license}"
-edition = "2018"
-[workspace]
-[dependencies]
-core_arch = {{ path = "../crates/core_arch" }}
-{binaries}"#,
-                version = env!("CARGO_PKG_VERSION"),
-                authors = env!("CARGO_PKG_AUTHORS")
-                    .split(":")
-                    .format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))),
-                license = env!("CARGO_PKG_LICENSE"),
-                binaries = binaries
-                    .iter()
-                    .map(|binary| {
-                        format!(
-                            r#"[[bin]]
-name = "{binary}"
-path = "{binary}/main.rs""#,
-                        )
-                    })
-                    .collect::<Vec<_>>()
-                    .join("\n")
-            )
-            .into_bytes()
-            .as_slice(),
-        )
-        .unwrap();
-
-    let toolchain = match toolchain {
-        None => return true,
-        Some(t) => t,
-    };
+    write_cargo_toml(&mut cargo, &binaries).unwrap();

    /* If there has been a linker explicitly set from the command line then
     * we want to set it via setting it in the RUSTFLAGS*/

-    let cargo_command = format!("cargo {toolchain} build --target {target} --release");
+    let mut cargo_command = Command::new("cargo");
+    cargo_command.current_dir("rust_programs");

-    let mut command = Command::new("sh");
-    command
-        .current_dir("rust_programs")
-        .arg("-c")
-        .arg(cargo_command);
+    if let Some(toolchain) = toolchain {
+        if !toolchain.is_empty() {
+            cargo_command.arg(toolchain);
+        }
+    }
+    cargo_command.args(["build", "--target", target, "--release"]);

    let mut rust_flags = "-Cdebuginfo=0".to_string();
    if let Some(linker) = linker {
@ -102,11 +101,11 @@ path = "{binary}/main.rs""#,
        rust_flags.push_str(linker);
        rust_flags.push_str(" -C link-args=-static");

-        command.env("CPPFLAGS", "-fuse-ld=lld");
+        cargo_command.env("CPPFLAGS", "-fuse-ld=lld");
    }

-    command.env("RUSTFLAGS", rust_flags);
-    let output = command.output();
+    cargo_command.env("RUSTFLAGS", rust_flags);
+    let output = cargo_command.output();

    if let Ok(output) = output {
        if output.status.success() {
--- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
@ -8,14 +8,22 @@ use super::cli::Language;
 use super::indentation::Indentation;
 use super::values::value_for_array;

+#[derive(Debug, PartialEq, Copy, Clone)]
+pub enum Sign {
+    Signed,
+    Unsigned,
+}
+
 #[derive(Debug, PartialEq, Copy, Clone)]
 pub enum TypeKind {
    BFloat,
    Float,
-    Int,
-    UInt,
+    Int(Sign),
+    Char(Sign),
    Poly,
    Void,
+    Mask,
+    Vector,
 }

 impl FromStr for TypeKind {
@ -23,12 +31,17 @@ impl FromStr for TypeKind {

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
-            "bfloat" => Ok(Self::BFloat),
-            "float" => Ok(Self::Float),
-            "int" => Ok(Self::Int),
+            "bfloat" | "BF16" => Ok(Self::BFloat),
+            "float" | "double" | "FP16" | "FP32" | "FP64" => Ok(Self::Float),
+            "int" | "long" | "short" | "SI8" | "SI16" | "SI32" | "SI64" => {
+                Ok(Self::Int(Sign::Signed))
+            }
            "poly" => Ok(Self::Poly),
-            "uint" | "unsigned" => Ok(Self::UInt),
+            "char" => Ok(Self::Char(Sign::Signed)),
+            "uint" | "unsigned" | "UI8" | "UI16" | "UI32" | "UI64" => Ok(Self::Int(Sign::Unsigned)),
            "void" => Ok(Self::Void),
+            "MASK" => Ok(Self::Mask),
+            "M64" | "M128" | "M256" | "M512" => Ok(Self::Vector),
            _ => Err(format!("Impossible to parse argument kind {s}")),
        }
    }
@ -42,10 +55,14 @@ impl fmt::Display for TypeKind {
            match self {
                Self::BFloat => "bfloat",
                Self::Float => "float",
-                Self::Int => "int",
-                Self::UInt => "uint",
+                Self::Int(Sign::Signed) => "int",
+                Self::Int(Sign::Unsigned) => "uint",
                Self::Poly => "poly",
                Self::Void => "void",
+                Self::Char(Sign::Signed) => "char",
+                Self::Char(Sign::Unsigned) => "unsigned char",
+                Self::Mask => "mask",
+                Self::Vector => "vector",
            }
        )
    }
@ -56,9 +73,10 @@ impl TypeKind {
    pub fn c_prefix(&self) -> &str {
        match self {
            Self::Float => "float",
-            Self::Int => "int",
-            Self::UInt => "uint",
+            Self::Int(Sign::Signed) => "int",
+            Self::Int(Sign::Unsigned) => "uint",
            Self::Poly => "poly",
+            Self::Char(Sign::Signed) => "char",
            _ => unreachable!("Not used: {:#?}", self),
        }
    }
@ -66,10 +84,13 @@ impl TypeKind {
    /// Gets the rust prefix for the type kind i.e. i, u, f.
    pub fn rust_prefix(&self) -> &str {
        match self {
+            Self::BFloat => "bf",
            Self::Float => "f",
-            Self::Int => "i",
-            Self::UInt => "u",
+            Self::Int(Sign::Signed) => "i",
+            Self::Int(Sign::Unsigned) => "u",
            Self::Poly => "u",
+            Self::Char(Sign::Unsigned) => "u",
+            Self::Char(Sign::Signed) => "i",
            _ => unreachable!("Unused type kind: {:#?}", self),
        }
    }
@ -133,11 +154,14 @@ impl IntrinsicType {
    }

    pub fn c_scalar_type(&self) -> String {
-        format!(
-            "{prefix}{bits}_t",
-            prefix = self.kind().c_prefix(),
-            bits = self.inner_size()
-        )
+        match self.kind() {
+            TypeKind::Char(_) => String::from("char"),
+            _ => format!(
+                "{prefix}{bits}_t",
+                prefix = self.kind().c_prefix(),
+                bits = self.inner_size()
+            ),
+        }
    }

    pub fn rust_scalar_type(&self) -> String {
@ -155,8 +179,8 @@ impl IntrinsicType {
                bit_len: Some(8),
                ..
            } => match kind {
-                TypeKind::Int => "(int)",
-                TypeKind::UInt => "(unsigned int)",
+                TypeKind::Int(Sign::Signed) => "(int)",
+                TypeKind::Int(Sign::Unsigned) => "(unsigned int)",
                TypeKind::Poly => "(unsigned int)(uint8_t)",
                _ => "",
            },
@ -172,6 +196,21 @@ impl IntrinsicType {
                128 => "",
                _ => panic!("invalid bit_len"),
            },
+            IntrinsicType {
+                kind: TypeKind::Float,
+                bit_len: Some(bit_len),
+                ..
+            } => match bit_len {
+                16 => "(float16_t)",
+                32 => "(float)",
+                64 => "(double)",
+                128 => "",
+                _ => panic!("invalid bit_len"),
+            },
+            IntrinsicType {
+                kind: TypeKind::Char(_),
+                ..
+            } => "(char)",
            _ => "",
        }
    }
@ -185,7 +224,7 @@ impl IntrinsicType {
        match self {
            IntrinsicType {
                bit_len: Some(bit_len @ (8 | 16 | 32 | 64)),
-                kind: kind @ (TypeKind::Int | TypeKind::UInt | TypeKind::Poly),
+                kind: kind @ (TypeKind::Int(_) | TypeKind::Poly | TypeKind::Char(_)),
                simd_len,
                vec_len,
                ..
@ -201,7 +240,8 @@ impl IntrinsicType {
                        .format_with(",\n", |i, fmt| {
                            let src = value_for_array(*bit_len, i);
                            assert!(src == 0 || src.ilog2() < *bit_len);
-                            if *kind == TypeKind::Int && (src >> (*bit_len - 1)) != 0 {
+                            if *kind == TypeKind::Int(Sign::Signed) && (src >> (*bit_len - 1)) != 0
+                            {
                                // `src` is a two's complement representation of a negative value.
                                let mask = !0u64 >> (64 - *bit_len);
                                let ones_compl = src ^ mask;
@ -257,7 +297,7 @@ impl IntrinsicType {
                ..
            } => false,
            IntrinsicType {
-                kind: TypeKind::Int | TypeKind::UInt | TypeKind::Poly,
+                kind: TypeKind::Int(_) | TypeKind::Poly,
                ..
            } => true,
            _ => unimplemented!(),
@ -282,7 +322,9 @@ pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {
    fn get_lane_function(&self) -> String;

    /// can be implemented in an `impl` block
-    fn from_c(_s: &str, _target: &str) -> Result<Box<Self>, String>;
+    fn from_c(_s: &str, _target: &str) -> Result<Self, String>
+    where
+        Self: Sized;

    /// Gets a string containing the typename for this type in C format.
    /// can be directly defined in `impl` blocks
--- a/library/stdarch/crates/intrinsic-test/src/common/write_file.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/write_file.rs
@ -1,5 +1,3 @@
-use super::gen_c::create_c_test_program;
-use super::gen_c::setup_c_file_paths;
 use super::gen_rust::{create_rust_test_program, setup_rust_file_paths};
 use super::intrinsic::IntrinsicDefinition;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
@ -11,37 +9,6 @@ pub fn write_file(filename: &String, code: String) {
    file.write_all(code.into_bytes().as_slice()).unwrap();
 }

-pub fn write_c_testfiles<T: IntrinsicTypeDefinition + Sized>(
-    intrinsics: &Vec<&dyn IntrinsicDefinition<T>>,
-    target: &str,
-    c_target: &str,
-    headers: &[&str],
-    notice: &str,
-    arch_specific_definitions: &[&str],
-) -> Vec<String> {
-    let intrinsics_name_list = intrinsics
-        .iter()
-        .map(|i| i.name().clone())
-        .collect::<Vec<_>>();
-    let filename_mapping = setup_c_file_paths(&intrinsics_name_list);
-
-    intrinsics.iter().for_each(|&i| {
-        let c_code = create_c_test_program(
-            i,
-            headers,
-            target,
-            c_target,
-            notice,
-            arch_specific_definitions,
-        );
-        if let Some(filename) = filename_mapping.get(&i.name()) {
-            write_file(filename, c_code)
-        };
-    });
-
-    intrinsics_name_list
-}
-
 pub fn write_rust_testfiles<T: IntrinsicTypeDefinition>(
    intrinsics: Vec<&dyn IntrinsicDefinition<T>>,
    rust_target: &str,
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@ -30,12 +30,15 @@ fn main() {

    let test_environment = test_environment_result.unwrap();

+    info!("building C binaries");
    if !test_environment.build_c_file() {
        std::process::exit(2);
    }
+    info!("building Rust binaries");
    if !test_environment.build_rust_file() {
        std::process::exit(3);
    }
+    info!("comaparing outputs");
    if !test_environment.compare_outputs() {
        std::process::exit(1);
    }
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@ -57,12 +57,12 @@ pub fn simd_test(
        .unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"))
    {
        "i686" | "x86_64" | "i586" => "is_x86_feature_detected",
-        "arm" | "armv7" => "is_arm_feature_detected",
+        "arm" | "armv7" | "thumbv7neon" => "is_arm_feature_detected",
        "aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected",
        maybe_riscv if maybe_riscv.starts_with("riscv") => "is_riscv_feature_detected",
        "powerpc" | "powerpcle" => "is_powerpc_feature_detected",
        "powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected",
-        "loongarch64" => "is_loongarch_feature_detected",
+        "loongarch32" | "loongarch64" => "is_loongarch_feature_detected",
        "s390x" => "is_s390x_feature_detected",
        t => panic!("unknown target: {t}"),
    };
--- a/library/stdarch/crates/std_detect/README.md
+++ b/library/stdarch/crates/std_detect/README.md
@ -55,7 +55,7 @@ crate from working on applications in which `std` is not available.
  application.

 * Linux/Android:
-  * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch64`, `s390x`:
+  * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch{32,64}`, `s390x`:
    `std_detect` supports these on Linux by querying ELF auxiliary vectors (using `getauxval`
    when available), and if that fails, by querying `/proc/self/auxv`.
  * `arm64`: partial support for doing run-time feature detection by directly
--- a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs
@ -2,7 +2,7 @@

 features! {
    @TARGET: loongarch;
-    @CFG: target_arch = "loongarch64";
+    @CFG: any(target_arch = "loongarch32", target_arch = "loongarch64");
    @MACRO_NAME: is_loongarch_feature_detected;
    @MACRO_ATTRS:
    /// Checks if `loongarch` feature is enabled.
--- a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs
@ -49,7 +49,7 @@ cfg_if! {
    } else if #[cfg(target_arch = "mips64")] {
        #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")]
        pub use mips64::*;
-    } else if #[cfg(target_arch = "loongarch64")] {
+    } else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] {
        #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")]
        pub use loongarch::*;
    } else if #[cfg(target_arch = "s390x")] {
--- a/library/stdarch/crates/std_detect/src/detect/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/mod.rs
@ -103,6 +103,7 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
            target_arch = "powerpc64",
            target_arch = "mips",
            target_arch = "mips64",
+            target_arch = "loongarch32",
            target_arch = "loongarch64",
            target_arch = "s390x",
        ))] {
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs
@ -80,6 +80,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
            target_arch = "riscv64",
            target_arch = "mips",
            target_arch = "mips64",
+            target_arch = "loongarch32",
            target_arch = "loongarch64",
        ))]
        {
@ -182,6 +183,7 @@ fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> {
        target_arch = "riscv64",
        target_arch = "mips",
        target_arch = "mips64",
+        target_arch = "loongarch32",
        target_arch = "loongarch64",
    ))]
    {
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs
@ -51,7 +51,7 @@ cfg_if::cfg_if! {
    } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] {
        mod powerpc;
        pub(crate) use self::powerpc::detect_features;
-    } else if #[cfg(target_arch = "loongarch64")] {
+    } else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] {
        mod loongarch;
        pub(crate) use self::loongarch::detect_features;
    } else if #[cfg(target_arch = "s390x")] {
--- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
+++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs
@ -11,6 +11,7 @@
        target_arch = "s390x",
        target_arch = "riscv32",
        target_arch = "riscv64",
+        target_arch = "loongarch32",
        target_arch = "loongarch64"
    ),
    feature(stdarch_internal)
@ -30,7 +31,7 @@
    feature(stdarch_riscv_feature_detection)
 )]
 #![cfg_attr(
-    target_arch = "loongarch64",
+    any(target_arch = "loongarch32", target_arch = "loongarch64"),
    feature(stdarch_loongarch_feature_detection)
 )]

@ -45,6 +46,7 @@
    target_arch = "s390x",
    target_arch = "riscv32",
    target_arch = "riscv64",
+    target_arch = "loongarch32",
    target_arch = "loongarch64"
 ))]
 #[macro_use]
@ -65,8 +67,8 @@ fn aarch64() {
 }

 #[test]
-#[cfg(target_arch = "loongarch64")]
-fn loongarch64() {
+#[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))]
+fn loongarch() {
    let _ = is_loongarch_feature_detected!("lsx");
    let _ = is_loongarch_feature_detected!("lsx",);
 }
--- a/library/stdarch/crates/stdarch-gen-arm/Cargo.toml
+++ b/library/stdarch/crates/stdarch-gen-arm/Cargo.toml
@ -13,7 +13,6 @@ edition = "2024"

 [dependencies]
 itertools = "0.14.0"
-lazy_static = "1.4.0"
 proc-macro2 = "1.0"
 quote = "1.0"
 regex = "1.5"
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@ -187,7 +187,7 @@ intrinsics:
    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
    return_type: "{neon_type[1]}"
    attr: [*neon-stable]
-    assert_instr: [sabdl]
+    assert_instr: [sabdl2]
    safety: safe
    types:
      - [int8x16_t, int16x8_t, int8x8_t, uint8x8_t]
@ -230,7 +230,7 @@ intrinsics:
          - stable
          - - 'feature = "neon_intrinsics"'
            - 'since = "1.59.0"'
-    assert_instr: [sabdl]
+    assert_instr: [sabdl2]
    safety: safe
    types:
      - [int16x8_t, int32x4_t, int16x4_t, uint16x4_t]
@ -273,7 +273,7 @@ intrinsics:
          - stable
          - - 'feature = "neon_intrinsics"'
            - 'since = "1.59.0"'
-    assert_instr: [sabdl]
+    assert_instr: [sabdl2]
    safety: safe
    types:
      - [int32x4_t, int64x2_t, int32x2_t, uint32x2_t]
@ -1462,7 +1462,7 @@ intrinsics:
    arguments: ["a: {neon_type[0]}"]
    return_type: "{neon_type[1]}"
    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
    safety: safe
    types:
@ -1530,7 +1530,7 @@ intrinsics:
    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
    return_type: "{neon_type[2]}"
    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
    safety: safe
    types:
@ -1582,7 +1582,7 @@ intrinsics:
    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
    return_type: "{type[2]}"
    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn2]]}]]
      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
    safety: safe
    types:
@ -5147,7 +5147,7 @@ intrinsics:
    attr:
      - *neon-stable
    safety: safe
-    assert_instr: [pmull]
+    assert_instr: [pmull2]
    types:
      - [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t]
    compose:
@ -5169,7 +5169,7 @@ intrinsics:
      - *neon-aes
      - *neon-stable
    safety: safe
-    assert_instr: [pmull]
+    assert_instr: [pmull2]
    types:
      - [poly64x2_t, "p128"]
    compose:
@ -5741,7 +5741,7 @@ intrinsics:
    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
    return_type: "{neon_type[0]}"
    attr: [*neon-stable]
-    assert_instr: [ssubw]
+    assert_instr: [ssubw2]
    safety: safe
    types:
      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@ -5762,7 +5762,7 @@ intrinsics:
    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
    return_type: "{neon_type[0]}"
    attr: [*neon-stable]
-    assert_instr: [usubw]
+    assert_instr: [usubw2]
    safety: safe
    types:
      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@ -5783,7 +5783,7 @@ intrinsics:
    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
    return_type: "{neon_type[1]}"
    attr: [*neon-stable]
-    assert_instr: [ssubl]
+    assert_instr: [ssubl2]
    safety: safe
    types:
      - [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t]
@ -5813,7 +5813,7 @@ intrinsics:
    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
    return_type: "{neon_type[1]}"
    attr: [*neon-stable]
-    assert_instr: [usubl]
+    assert_instr: [usubl2]
    safety: safe
    types:
      - [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t]
@ -6580,7 +6580,6 @@ intrinsics:
              arch: aarch64,arm64ec


-
  - name: "vmaxnm{neon_type.no}"
    doc: Floating-point Maximum Number (vector)
    arguments: ["a: {neon_type}", "b: {neon_type}"]
@ -6592,11 +6591,7 @@ intrinsics:
      - float64x1_t
      - float64x2_t
    compose:
-      - LLVMLink:
-          name: "fmaxnm.{neon_type}"
-          links:
-            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_fmax, [a, b]]


  - name: "vmaxnmh_{type}"
@ -6611,11 +6606,7 @@ intrinsics:
    types:
      - f16
    compose:
-      - LLVMLink:
-          name: "vmaxh.{neon_type}"
-          links:
-            - link: "llvm.aarch64.neon.fmaxnm.{type}"
-              arch: aarch64,arm64ec
+      - FnCall: ["f16::max", [a, b]]


  - name: "vminnmh_{type}"
@ -6630,11 +6621,7 @@ intrinsics:
    types:
      - f16
    compose:
-      - LLVMLink:
-          name: "vminh.{neon_type}"
-          links:
-            - link: "llvm.aarch64.neon.fminnm.{type}"
-              arch: aarch64,arm64ec
+      - FnCall: ["f16::min", [a, b]]


  - name: "vmaxnmv{neon_type[0].no}"
@ -6648,11 +6635,7 @@ intrinsics:
      - [float32x2_t, f32]
      - [float64x2_t, f64]
    compose:
-      - LLVMLink:
-          name: "fmaxnmv.{neon_type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_max, [a]]

  - name: "vmaxnmv{neon_type[0].no}"
    doc: Floating-point maximum number across vector
@ -6664,11 +6647,7 @@ intrinsics:
    types:
      - [float32x4_t, f32]
    compose:
-      - LLVMLink:
-          name: "fmaxnmv.{neon_type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_max, [a]]


  - name: "vmaxnmv{neon_type[0].no}"
@ -6684,11 +6663,7 @@ intrinsics:
      - [float16x4_t, f16]
      - [float16x8_t, f16]
    compose:
-      - LLVMLink:
-          name: "fmaxnmv.{neon_type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_max, [a]]


  - name: "vminnmv{neon_type[0].no}"
@ -6704,11 +6679,7 @@ intrinsics:
      - [float16x4_t, f16]
      - [float16x8_t, f16]
    compose:
-      - LLVMLink:
-          name: "fminnmv.{neon_type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_min, [a]]


  - name: "vmaxv{neon_type[0].no}"
@ -6814,11 +6785,7 @@ intrinsics:
      - float64x1_t
      - float64x2_t
    compose:
-      - LLVMLink:
-          name: "fminnm.{neon_type}"
-          links:
-            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_fmin, [a, b]]

  - name: "vminnmv{neon_type[0].no}"
    doc: "Floating-point minimum number across vector"
@ -6832,11 +6799,7 @@ intrinsics:
      - [float32x2_t, "f32"]
      - [float64x2_t, "f64"]
    compose:
-      - LLVMLink:
-          name: "vminnmv.{neon_type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_min, [a]]

  - name: "vminnmv{neon_type[0].no}"
    doc: "Floating-point minimum number across vector"
@ -6849,11 +6812,7 @@ intrinsics:
    types:
      - [float32x4_t, "f32"]
    compose:
-      - LLVMLink:
-          name: "vminnmv.{neon_type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_min, [a]]

  - name: "vmovl_high{neon_type[0].noq}"
    doc: Vector move
@ -9950,7 +9909,7 @@ intrinsics:
    return_type: "{neon_type[0]}"
    attr:
      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal2]]}]]
    safety: safe
    types:
      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
@ -9977,7 +9936,7 @@ intrinsics:
    return_type: "{neon_type[0]}"
    attr:
      - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal2]]}]]
    safety: safe
    types:
      - [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t]
@ -11386,7 +11345,7 @@ intrinsics:
    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
    return_type: "{neon_type[1]}"
    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl]]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl2]]}]]
      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
    safety: safe
    types:
@ -13023,6 +12982,26 @@ intrinsics:
            - link: "llvm.aarch64.crc32cx"
              arch: aarch64,arm64ec

+  - name: "vabsd_s64"
+    doc: "Absolute Value (wrapping)."
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [abs]
+    safety: safe
+    types:
+      - [i64, i64]
+    compose:
+      # This is behaviorally equivalent to `i64::wrapping_abs`, but keeps the value in a SIMD
+      # register. That can be beneficial when combined with other instructions. This LLVM
+      # issue provides some extra context https://github.com/llvm/llvm-project/issues/148388.
+      - LLVMLink:
+          name: "vabsd_s64"
+          links:
+             - link: "llvm.aarch64.neon.abs.i64"
+               arch: aarch64,arm64ec
+
  - name: "{type[0]}"
    doc: "Absolute Value (wrapping)."
    arguments: ["a: {type[1]}"]
@ -13032,15 +13011,18 @@ intrinsics:
    assert_instr: [abs]
    safety: safe
    types:
-      - ['vabsd_s64', i64, i64]
      - ['vabs_s64', int64x1_t, v1i64]
      - ['vabsq_s64', int64x2_t, v2i64]
    compose:
-      - LLVMLink:
-          name: "{type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.abs.{type[2]}"
-              arch: aarch64,arm64ec
+      - Let:
+          - neg
+          - "{type[1]}"
+          - FnCall: [simd_neg, [a]]
+      - Let:
+          - mask
+          - "{type[1]}"
+          - FnCall: [simd_ge, [a, neg]]
+      - FnCall: [simd_select, [mask, a, neg]]

  - name: "vuqadd{neon_type[0].no}"
    doc: "Signed saturating Accumulate of Unsigned value."
@ -13142,11 +13124,7 @@ intrinsics:
    types:
      - [int64x2_t, i64]
    compose:
-      - FnCall:
-          - transmute
-          - - FnCall:
-                - "vaddvq_u64"
-                - - FnCall: [transmute, [a]]
+      - FnCall: [simd_reduce_add_unordered, [a]]

  - name: "vpaddd_u64"
    doc: "Add pairwise"
@ -13159,7 +13137,7 @@ intrinsics:
    types:
      - [uint64x2_t, u64]
    compose:
-      - FnCall: [vaddvq_u64, [a]]
+      - FnCall: [simd_reduce_add_unordered, [a]]

  - name: "vaddv{neon_type[0].no}"
    doc: "Add across vector"
@ -13176,11 +13154,7 @@ intrinsics:
      - [int16x8_t, i16]
      - [int32x4_t, i32]
    compose:
-      - LLVMLink:
-          name: "vaddv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.saddv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_add_unordered, [a]]

  - name: "vaddv{neon_type[0].no}"
    doc: "Add across vector"
@ -13193,11 +13167,7 @@ intrinsics:
    types:
      - [int32x2_t, i32]
    compose:
-      - LLVMLink:
-          name: "vaddv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.saddv.i32.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_add_unordered, [a]]

  - name: "vaddv{neon_type[0].no}"
    doc: "Add across vector"
@ -13210,11 +13180,7 @@ intrinsics:
    types:
      - [int64x2_t, i64]
    compose:
-      - LLVMLink:
-          name: "vaddv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.saddv.i64.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_add_unordered, [a]]

  - name: "vaddv{neon_type[0].no}"
    doc: "Add across vector"
@ -13231,11 +13197,7 @@ intrinsics:
      - [uint16x8_t, u16]
      - [uint32x4_t, u32]
    compose:
-      - LLVMLink:
-          name: "vaddv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.uaddv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_add_unordered, [a]]

  - name: "vaddv{neon_type[0].no}"
    doc: "Add across vector"
@ -13248,11 +13210,7 @@ intrinsics:
    types:
      - [uint32x2_t, u32, i32]
    compose:
-      - LLVMLink:
-          name: "vaddv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_add_unordered, [a]]

  - name: "vaddv{neon_type[0].no}"
    doc: "Add across vector"
@ -13265,11 +13223,7 @@ intrinsics:
    types:
      - [uint64x2_t, u64, i64]
    compose:
-      - LLVMLink:
-          name: "vaddv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_add_unordered, [a]]

  - name: "vaddlv{neon_type[0].no}"
    doc: "Signed Add Long across Vector"
@ -13327,11 +13281,7 @@ intrinsics:
      - [int16x8_t, i16, 'smaxv']
      - [int32x4_t, i32, 'smaxv']
    compose:
-      - LLVMLink:
-          name: "vmaxv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.smaxv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_max, [a]]

  - name: "vmaxv{neon_type[0].no}"
    doc: "Horizontal vector max."
@ -13349,11 +13299,7 @@ intrinsics:
      - [uint16x8_t, u16, 'umaxv']
      - [uint32x4_t, u32, 'umaxv']
    compose:
-      - LLVMLink:
-          name: "vmaxv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.umaxv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_max, [a]]

  - name: "vmaxv{neon_type[0].no}"
    doc: "Horizontal vector max."
@ -13390,11 +13336,7 @@ intrinsics:
      - [int16x8_t, i16, 'sminv']
      - [int32x4_t, i32, 'sminv']
    compose:
-      - LLVMLink:
-          name: "vminv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.sminv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_min, [a]]

  - name: "vminv{neon_type[0].no}"
    doc: "Horizontal vector min."
@ -13412,11 +13354,7 @@ intrinsics:
      - [uint16x8_t, u16, 'uminv']
      - [uint32x4_t, u32, 'uminv']
    compose:
-      - LLVMLink:
-          name: "vminv{neon_type[0].no}"
-          links:
-            - link: "llvm.aarch64.neon.uminv.{type[1]}.{neon_type[0]}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_reduce_min, [a]]

  - name: "vminv{neon_type[0].no}"
    doc: "Horizontal vector min."
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@ -7135,13 +7135,8 @@ intrinsics:
      - int32x2_t
      - int32x4_t
    compose:
-      - LLVMLink:
-          name: "smax.{neon_type}"
-          links:
-            - link: "llvm.arm.neon.vmaxs.{neon_type}"
-              arch: arm
-            - link: "llvm.aarch64.neon.smax.{neon_type}"
-              arch: aarch64,arm64ec
+      - Let: [mask, "{neon_type}", {FnCall: [simd_ge, [a, b]]}]
+      - FnCall: [simd_select, [mask, a, b]]

  - name: "vmax{neon_type.no}"
    doc: Maximum (vector)
@ -7162,13 +7157,8 @@ intrinsics:
      - uint32x2_t
      - uint32x4_t
    compose:
-      - LLVMLink:
-          name: "smax.{neon_type}"
-          links:
-            - link: "llvm.arm.neon.vmaxu.{neon_type}"
-              arch: arm
-            - link: "llvm.aarch64.neon.umax.{neon_type}"
-              arch: aarch64,arm64ec
+      - Let: [mask, "{neon_type}", {FnCall: [simd_ge, [a, b]]}]
+      - FnCall: [simd_select, [mask, a, b]]

  - name: "vmax{neon_type.no}"
    doc: Maximum (vector)
@ -7233,13 +7223,7 @@ intrinsics:
      - float32x2_t
      - float32x4_t
    compose:
-      - LLVMLink:
-          name: "fmaxnm.{neon_type}"
-          links:
-            - link: "llvm.arm.neon.vmaxnm.{neon_type}"
-              arch: arm
-            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_fmax, [a, b]]


  - name: "vmaxnm{neon_type.no}"
@ -7257,13 +7241,7 @@ intrinsics:
      - float16x4_t
      - float16x8_t
    compose:
-      - LLVMLink:
-          name: "fmaxnm.{neon_type}"
-          links:
-            - link: "llvm.arm.neon.vmaxnm.{neon_type}"
-              arch: arm
-            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_fmax, [a, b]]


  - name: "vminnm{neon_type.no}"
@ -7281,13 +7259,7 @@ intrinsics:
      - float16x4_t
      - float16x8_t
    compose:
-      - LLVMLink:
-          name: "fminnm.{neon_type}"
-          links:
-            - link: "llvm.arm.neon.vminnm.{neon_type}"
-              arch: arm
-            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_fmin, [a, b]]


  - name: "vmin{neon_type.no}"
@ -7309,13 +7281,8 @@ intrinsics:
      - int32x2_t
      - int32x4_t
    compose:
-      - LLVMLink:
-          name: "smin.{neon_type}"
-          links:
-            - link: "llvm.arm.neon.vmins.{neon_type}"
-              arch: arm
-            - link: "llvm.aarch64.neon.smin.{neon_type}"
-              arch: aarch64,arm64ec
+      - Let: [mask, "{neon_type}", {FnCall: [simd_le, [a, b]]}]
+      - FnCall: [simd_select, [mask, a, b]]

  - name: "vmin{neon_type.no}"
    doc: "Minimum (vector)"
@ -7336,13 +7303,8 @@ intrinsics:
      - uint32x2_t
      - uint32x4_t
    compose:
-      - LLVMLink:
-          name: "umin.{neon_type}"
-          links:
-            - link: "llvm.arm.neon.vminu.{neon_type}"
-              arch: arm
-            - link: "llvm.aarch64.neon.umin.{neon_type}"
-              arch: aarch64,arm64ec
+      - Let: [mask, "{neon_type}", {FnCall: [simd_le, [a, b]]}]
+      - FnCall: [simd_select, [mask, a, b]]

  - name: "vmin{neon_type.no}"
    doc: "Minimum (vector)"
@ -7408,13 +7370,7 @@ intrinsics:
      - float32x2_t
      - float32x4_t
    compose:
-      - LLVMLink:
-          name: "fminnm.{neon_type}"
-          links:
-            - link: "llvm.arm.neon.vminnm.{neon_type}"
-              arch: arm
-            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_fmin, [a, b]]

  - name: "vpadd{neon_type.no}"
    doc: Floating-point add pairwise
@ -7874,9 +7830,9 @@ intrinsics:
    static_defs: ['const N: i32']
    safety: safe
    types:
-      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
-      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
-      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
+      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
+      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
+      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
    compose:
      - FnCall: [static_assert!, ["{type[2]}"]]
      - LLVMLink:
@ -7929,9 +7885,9 @@ intrinsics:
    static_defs: ['const N: i32']
    safety: safe
    types:
-      - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
-      - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
-      - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
+      - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
+      - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
+      - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
    compose:
      - FnCall: [static_assert!, ["{type[2]}"]]
      - LLVMLink:
@ -8105,9 +8061,9 @@ intrinsics:
    static_defs: ['const N: i32']
    safety: safe
    types:
-      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
-      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
-      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
+      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
+      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
+      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
    compose:
      - FnCall: [static_assert!, ["{type[2]}"]]
      - LLVMLink:
@ -8215,9 +8171,9 @@ intrinsics:
    static_defs: ['const N: i32']
    safety: safe
    types:
-      - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
-      - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
-      - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
+      - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
+      - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
+      - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
    compose:
      - FnCall: [static_assert!, ["{type[2]}"]]
      - LLVMLink:
@ -8939,9 +8895,9 @@ intrinsics:
    static_defs: ['const N: i32']
    safety: safe
    types:
-      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
-      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
-      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
+      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
+      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
+      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
    compose:
      - FnCall: [static_assert!, ["{type[2]}"]]
      - LLVMLink:
@ -9576,7 +9532,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn2]]}]]
      - *neon-not-arm-stable
      - *neon-cfg-arm-unstable
    safety: safe
@ -9617,7 +9574,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn2]]}]]
      - *neon-fp16
      - *neon-unstable-f16
    safety: safe
@ -9645,7 +9603,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
      - *neon-not-arm-stable
      - *neon-cfg-arm-unstable
    safety: safe
@ -9673,7 +9632,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vorr]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
      - *neon-not-arm-stable
      - *neon-cfg-arm-unstable
    safety: safe
@ -9707,7 +9667,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
      - *neon-not-arm-stable
      - *neon-cfg-arm-unstable
    safety: safe
@ -9735,7 +9696,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vzip]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
      - *neon-not-arm-stable
      - *neon-cfg-arm-unstable
    safety: safe
@ -9767,7 +9729,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vzip.16"']]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
      - *neon-fp16
      - *neon-unstable-f16
    safety: safe
@ -9794,7 +9757,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp2]]}]]
      - *neon-not-arm-stable
      - *neon-cfg-arm-unstable
    safety: safe
@ -9835,7 +9799,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp2]]}]]
      - *neon-fp16
      - *neon-unstable-f16
    safety: safe
@ -9863,7 +9828,8 @@ intrinsics:
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
      - *neon-not-arm-stable
      - *neon-cfg-arm-unstable
    safety: safe
@ -12881,13 +12847,16 @@ intrinsics:
      - int16x8_t
      - int32x4_t
    compose:
-      - LLVMLink:
-          name: "vabs{neon_type.no}"
-          links:
-            - link: "llvm.aarch64.neon.abs.{neon_type}"
-              arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vabs.{neon_type}"
-              arch: arm
+      - Let:
+          - neg
+          - "{neon_type}"
+          - FnCall: [simd_neg, [a]]
+      - Let:
+          - mask
+          - "{neon_type}"
+          - FnCall: [simd_ge, [a, neg]]
+      - FnCall: [simd_select, [mask, a, neg]]
+

  - name: "vpmin{neon_type.no}"
    doc: "Folding minimum of adjacent pairs"
@ -13862,8 +13831,8 @@ intrinsics:
      - [int8x16_t, '8',  '1 <= N && N <= 8',  'v16i8', 'int8x16_t::splat', '-N as i8']
      - [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16']
      - [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16']
-      - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N as i32']
-      - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N as i32']
+      - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N']
+      - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N']
      - [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64']
      - [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64']
    compose:
@ -13891,8 +13860,8 @@ intrinsics:
      - [uint8x16_t, "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',    'v16i8', 'int8x16_t::splat', 'N as i8']
      - [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v4i16', 'int16x4_t::splat', 'N as i16']
      - [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v8i16', 'int16x8_t::splat', 'N as i16']
-      - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N as i32']
-      - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N as i32']
+      - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N']
+      - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N']
      - [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64']
      - [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64']
      - [poly8x8_t,  "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',     'v8i8', 'int8x8_t::splat',  'N as i8']
@ -14138,6 +14107,7 @@ intrinsics:
    doc: "Load one single-element structure and Replicate to all lanes (of one register)."
    arguments: ["ptr: {type[1]}"]
    return_type: "{neon_type[2]}"
+    big_endian_inverse: false
    attr:
      - *neon-v7
      - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']] }  ]]
@ -14147,40 +14117,36 @@ intrinsics:
    safety:
      unsafe: [neon]
    types:
-      - ['vld1_dup_s8', '*const i8', 'int8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_s8::<0>', 'i8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
-      - ['vld1_dup_u8', '*const u8', 'uint8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_u8::<0>', 'u8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
-      - ['vld1_dup_p8', '*const p8', 'poly8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_p8::<0>', 'u8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
+      - ['vld1_dup_s8', '*const i8', 'int8x8_t', 'vld1.8', 'ld1r', 'i8x8::splat']
+      - ['vld1_dup_u8', '*const u8', 'uint8x8_t', 'vld1.8', 'ld1r', 'u8x8::splat']
+      - ['vld1_dup_p8', '*const p8', 'poly8x8_t', 'vld1.8', 'ld1r', 'u8x8::splat']

-      - ['vld1q_dup_s8', '*const i8', 'int8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_s8::<0>', 'i8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']
-      - ['vld1q_dup_u8', '*const u8', 'uint8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_u8::<0>', 'u8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']
-      - ['vld1q_dup_p8', '*const p8', 'poly8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_p8::<0>', 'u8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']
+      - ['vld1q_dup_s8', '*const i8', 'int8x16_t', 'vld1.8', 'ld1r', 'i8x16::splat']
+      - ['vld1q_dup_u8', '*const u8', 'uint8x16_t', 'vld1.8', 'ld1r', 'u8x16::splat']
+      - ['vld1q_dup_p8', '*const p8', 'poly8x16_t', 'vld1.8', 'ld1r', 'u8x16::splat']

-      - ['vld1_dup_s16', '*const i16', 'int16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_s16::<0>', 'i16x4::splat(0)', '[0, 0, 0, 0]']
-      - ['vld1_dup_u16', '*const u16', 'uint16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_u16::<0>', 'u16x4::splat(0)', '[0, 0, 0, 0]']
-      - ['vld1_dup_p16', '*const p16', 'poly16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_p16::<0>', 'u16x4::splat(0)', '[0, 0, 0, 0]']
+      - ['vld1_dup_s16', '*const i16', 'int16x4_t', 'vld1.16', 'ld1r', 'i16x4::splat']
+      - ['vld1_dup_u16', '*const u16', 'uint16x4_t', 'vld1.16', 'ld1r', 'u16x4::splat']
+      - ['vld1_dup_p16', '*const p16', 'poly16x4_t', 'vld1.16', 'ld1r', 'u16x4::splat']

-      - ['vld1q_dup_s16', '*const i16', 'int16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_s16::<0>', 'i16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
-      - ['vld1q_dup_u16', '*const u16', 'uint16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_u16::<0>', 'u16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
-      - ['vld1q_dup_p16', '*const p16', 'poly16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_p16::<0>', 'u16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
+      - ['vld1q_dup_s16', '*const i16', 'int16x8_t', 'vld1.16', 'ld1r', 'i16x8::splat']
+      - ['vld1q_dup_u16', '*const u16', 'uint16x8_t', 'vld1.16', 'ld1r', 'u16x8::splat']
+      - ['vld1q_dup_p16', '*const p16', 'poly16x8_t', 'vld1.16', 'ld1r', 'u16x8::splat']

-      - ['vld1_dup_s32', '*const i32', 'int32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_s32::<0>', 'i32x2::splat(0)', '[0, 0]']
-      - ['vld1_dup_u32', '*const u32', 'uint32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_u32::<0>', 'u32x2::splat(0)', '[0, 0]']
-      - ['vld1_dup_f32', '*const f32', 'float32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_f32::<0>', 'f32x2::splat(0.0)', '[0, 0]']
+      - ['vld1_dup_s32', '*const i32', 'int32x2_t', 'vld1.32', 'ld1r', 'i32x2::splat']
+      - ['vld1_dup_u32', '*const u32', 'uint32x2_t', 'vld1.32', 'ld1r', 'u32x2::splat']
+      - ['vld1_dup_f32', '*const f32', 'float32x2_t', 'vld1.32', 'ld1r', 'f32x2::splat']

-      - ['vld1q_dup_s32', '*const i32', 'int32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_s32::<0>', 'i32x4::splat(0)', '[0, 0, 0, 0]']
-      - ['vld1q_dup_u32', '*const u32', 'uint32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_u32::<0>', 'u32x4::splat(0)', '[0, 0, 0, 0]']
-      - ['vld1q_dup_f32', '*const f32', 'float32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_f32::<0>', 'f32x4::splat(0.0)', '[0, 0, 0, 0]']
+      - ['vld1q_dup_s32', '*const i32', 'int32x4_t', 'vld1.32', 'ld1r', 'i32x4::splat']
+      - ['vld1q_dup_u32', '*const u32', 'uint32x4_t', 'vld1.32', 'ld1r', 'u32x4::splat']
+      - ['vld1q_dup_f32', '*const f32', 'float32x4_t', 'vld1.32', 'ld1r', 'f32x4::splat']

-      - ['vld1q_dup_s64', '*const i64', 'int64x2_t', 'vldr', 'ld1', 'vld1q_lane_s64::<0>', 'i64x2::splat(0)', '[0, 0]']
-      - ['vld1q_dup_u64', '*const u64', 'uint64x2_t', 'vldr', 'ld1', 'vld1q_lane_u64::<0>', 'u64x2::splat(0)', '[0, 0]']
+      - ['vld1q_dup_s64', '*const i64', 'int64x2_t', 'vldr', 'ld1r', 'i64x2::splat']
+      - ['vld1q_dup_u64', '*const u64', 'uint64x2_t', 'vldr', 'ld1r', 'u64x2::splat']
    compose:
-      - Let:
-          - x
-          - FnCall:
-              - '{type[5]}'
-              - - ptr
-                - FnCall: [transmute, ['{type[6]}']]
-      - FnCall: ['simd_shuffle!', [x, x, '{type[7]}']]
+      - FnCall:
+          - transmute
+          - - FnCall: ['{type[5]}', ["*ptr"]]

  - name: "{type[0]}"
    doc: "Absolute difference and accumulate (64-bit)"
--- a/library/stdarch/crates/stdarch-gen-arm/src/expression.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/expression.rs
@ -1,5 +1,4 @@
 use itertools::Itertools;
-use lazy_static::lazy_static;
 use proc_macro2::{Literal, Punct, Spacing, TokenStream};
 use quote::{ToTokens, TokenStreamExt, format_ident, quote};
 use regex::Regex;
@ -7,6 +6,7 @@ use serde::de::{self, MapAccess, Visitor};
 use serde::{Deserialize, Deserializer, Serialize};
 use std::fmt;
 use std::str::FromStr;
+use std::sync::LazyLock;

 use crate::intrinsic::Intrinsic;
 use crate::wildstring::WildStringPart;
@ -374,10 +374,8 @@ impl FromStr for Expression {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        lazy_static! {
-            static ref MACRO_RE: Regex =
-                Regex::new(r"^(?P<name>[\w\d_]+)!\((?P<ex>.*?)\);?$").unwrap();
-        }
+        static MACRO_RE: LazyLock<Regex> =
+            LazyLock::new(|| Regex::new(r"^(?P<name>[\w\d_]+)!\((?P<ex>.*?)\);?$").unwrap());

        if s == "SvUndef" {
            Ok(Expression::SvUndef)
--- a/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/load_store_tests.rs
@ -2,6 +2,7 @@ use std::fs::File;
 use std::io::Write;
 use std::path::PathBuf;
 use std::str::FromStr;
+use std::sync::LazyLock;

 use crate::format_code;
 use crate::input::InputType;
@ -10,7 +11,6 @@ use crate::typekinds::BaseType;
 use crate::typekinds::{ToRepr, TypeKind};

 use itertools::Itertools;
-use lazy_static::lazy_static;
 use proc_macro2::TokenStream;
 use quote::{format_ident, quote};

@ -639,8 +639,8 @@ impl LdIntrCharacteristics {
    }
 }

-lazy_static! {
-    static ref PREAMBLE: String = format!(
+static PREAMBLE: LazyLock<String> = LazyLock::new(|| {
+    format!(
        r#"#![allow(unused)]

 use super::*;
@ -801,13 +801,11 @@ fn assert_vector_matches_u64(vector: svuint64_t, expected: svuint64_t) {{
    assert!(!svptest_any(defined, cmp))
 }}
 "#
-    );
-}
+    )
+});

-lazy_static! {
-    static ref MANUAL_TESTS: String = format!(
-        "#[simd_test(enable = \"sve\")]
-unsafe fn test_ffr() {{
+const MANUAL_TESTS: &str = "#[simd_test(enable = \"sve\")]
+unsafe fn test_ffr() {
    svsetffr();
    let ffr = svrdffr();
    assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svindex_u8(1, 0));
@ -816,7 +814,5 @@ unsafe fn test_ffr() {{
    svwrffr(pred);
    let ffr = svrdffr_z(svptrue_b8());
    assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svdup_n_u8_z(pred, 1));
-}}
-"
-    );
 }
+";
--- a/library/stdarch/crates/stdarch-gen-arm/src/typekinds.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/typekinds.rs
@ -1,10 +1,10 @@
-use lazy_static::lazy_static;
 use proc_macro2::TokenStream;
 use quote::{ToTokens, TokenStreamExt, quote};
 use regex::Regex;
 use serde_with::{DeserializeFromStr, SerializeDisplay};
 use std::fmt;
 use std::str::FromStr;
+use std::sync::LazyLock;

 use crate::context;
 use crate::expression::{Expression, FnCall};
@ -496,9 +496,9 @@ impl FromStr for VectorType {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        lazy_static! {
-            static ref RE: Regex = Regex::new(r"^(?:(?:sv(?P<sv_ty>(?:uint|int|bool|float)(?:\d+)?))|(?:(?P<ty>(?:uint|int|bool|poly|float)(?:\d+)?)x(?P<lanes>(?:\d+)?)))(?:x(?P<tuple_size>2|3|4))?_t$").unwrap();
-        }
+        static RE: LazyLock<Regex> = LazyLock::new(|| {
+            Regex::new(r"^(?:(?:sv(?P<sv_ty>(?:uint|int|bool|float)(?:\d+)?))|(?:(?P<ty>(?:uint|int|bool|poly|float)(?:\d+)?)x(?P<lanes>(?:\d+)?)))(?:x(?P<tuple_size>2|3|4))?_t$").unwrap()
+        });

        if let Some(c) = RE.captures(s) {
            let (base_type, lanes) = Self::sanitise_lanes(
@ -698,9 +698,8 @@ impl FromStr for BaseType {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        lazy_static! {
-            static ref RE: Regex = Regex::new(r"^(?P<kind>[a-zA-Z]+)(?P<size>\d+)?(_t)?$").unwrap();
-        }
+        static RE: LazyLock<Regex> =
+            LazyLock::new(|| Regex::new(r"^(?P<kind>[a-zA-Z]+)(?P<size>\d+)?(_t)?$").unwrap());

        if let Some(c) = RE.captures(s) {
            let kind = c["kind"].parse()?;
--- a/library/stdarch/crates/stdarch-gen-arm/src/wildcards.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/wildcards.rs
@ -1,8 +1,7 @@
-use lazy_static::lazy_static;
 use regex::Regex;
 use serde_with::{DeserializeFromStr, SerializeDisplay};
-use std::fmt;
 use std::str::FromStr;
+use std::{fmt, sync::LazyLock};

 use crate::{
    fn_suffix::SuffixKind,
@ -66,9 +65,9 @@ impl FromStr for Wildcard {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        lazy_static! {
-            static ref RE: Regex = Regex::new(r"^(?P<wildcard>\w+?)(?:_x(?P<tuple_size>[2-4]))?(?:\[(?P<index>\d+)\])?(?:\.(?P<modifiers>\w+))?(?:\s+as\s+(?P<scale_to>.*?))?$").unwrap();
-        }
+        static RE: LazyLock<Regex> = LazyLock::new(|| {
+            Regex::new(r"^(?P<wildcard>\w+?)(?:_x(?P<tuple_size>[2-4]))?(?:\[(?P<index>\d+)\])?(?:\.(?P<modifiers>\w+))?(?:\s+as\s+(?P<scale_to>.*?))?$").unwrap()
+        });

        if let Some(c) = RE.captures(s) {
            let wildcard_name = &c["wildcard"];
--- a/library/stdarch/crates/stdarch-gen-loongarch/README.md
+++ b/library/stdarch/crates/stdarch-gen-loongarch/README.md
@ -11,7 +11,6 @@ LSX:
 # Generate bindings
 OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsxintrin.h
 OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsx.spec
-rustfmt crates/core_arch/src/loongarch64/lsx/generated.rs

 # Generate tests
 OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsx.spec test
@ -25,7 +24,6 @@ LASX:
 # Generate bindings
 OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasxintrin.h
 OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasx.spec
-rustfmt crates/core_arch/src/loongarch64/lasx/generated.rs

 # Generate tests
 OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasx.spec test
--- a/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
+++ b/library/stdarch/crates/stdarch-gen-loongarch/src/main.rs
@ -274,13 +274,14 @@ fn gen_bind_body(
        }
    };

+    let is_mem = in_t.iter().any(|s| s.contains("POINTER"));
    let is_store = current_name.to_string().contains("vst");
    let link_function = {
        let fn_decl = {
            let fn_output = if out_t.to_lowercase() == "void" {
                String::new()
            } else {
-                format!("-> {}", type_to_rst(out_t, is_store))
+                format!(" -> {}", type_to_rst(out_t, is_store))
            };
            let fn_inputs = match para_num {
                1 => format!("(a: {})", type_to_rst(in_t[0], is_store)),
@ -304,7 +305,7 @@ fn gen_bind_body(
                ),
                _ => panic!("unsupported parameter number"),
            };
-            format!("fn __{current_name}{fn_inputs} {fn_output};")
+            format!("fn __{current_name}{fn_inputs}{fn_output};")
        };
        let function = format!(
            r#"    #[link_name = "llvm.loongarch.{}"]
@ -456,31 +457,40 @@ fn gen_bind_body(
            };
            rustc_legacy_const_generics = "rustc_legacy_const_generics(2, 3)";
        }
-        format!("pub unsafe fn {current_name}{fn_inputs} {fn_output}")
+        format!(
+            "pub {}fn {current_name}{fn_inputs} {fn_output}",
+            if is_mem { "unsafe " } else { "" }
+        )
    };
+    let unsafe_start = if !is_mem { "unsafe { " } else { "" };
+    let unsafe_end = if !is_mem { " }" } else { "" };
    let mut call_params = {
        match para_num {
-            1 => format!("__{current_name}(a)"),
-            2 => format!("__{current_name}(a, b)"),
-            3 => format!("__{current_name}(a, b, c)"),
-            4 => format!("__{current_name}(a, b, c, d)"),
+            1 => format!("{unsafe_start}__{current_name}(a){unsafe_end}"),
+            2 => format!("{unsafe_start}__{current_name}(a, b){unsafe_end}"),
+            3 => format!("{unsafe_start}__{current_name}(a, b, c){unsafe_end}"),
+            4 => format!("{unsafe_start}__{current_name}(a, b, c, d){unsafe_end}"),
            _ => panic!("unsupported parameter number"),
        }
    };
    if para_num == 1 && in_t[0] == "HI" {
        call_params = match asm_fmts[1].as_str() {
            "si10" => {
-                format!("static_assert_simm_bits!(IMM_S10, 10);\n    __{current_name}(IMM_S10)")
+                format!(
+                    "static_assert_simm_bits!(IMM_S10, 10);\n    {unsafe_start}__{current_name}(IMM_S10){unsafe_end}"
+                )
            }
            "i13" => {
-                format!("static_assert_simm_bits!(IMM_S13, 13);\n    __{current_name}(IMM_S13)")
+                format!(
+                    "static_assert_simm_bits!(IMM_S13, 13);\n    {unsafe_start}__{current_name}(IMM_S13){unsafe_end}"
+                )
            }
            _ => panic!("unsupported assembly format: {}", asm_fmts[2]),
        }
    } else if para_num == 2 && (in_t[1] == "UQI" || in_t[1] == "USI") {
        call_params = if asm_fmts[2].starts_with("ui") {
            format!(
-                "static_assert_uimm_bits!(IMM{0}, {0});\n    __{current_name}(a, IMM{0})",
+                "static_assert_uimm_bits!(IMM{0}, {0});\n    {unsafe_start}__{current_name}(a, IMM{0}){unsafe_end}",
                asm_fmts[2].get(2..).unwrap()
            )
        } else {
@ -489,14 +499,16 @@ fn gen_bind_body(
    } else if para_num == 2 && in_t[1] == "QI" {
        call_params = match asm_fmts[2].as_str() {
            "si5" => {
-                format!("static_assert_simm_bits!(IMM_S5, 5);\n    __{current_name}(a, IMM_S5)")
+                format!(
+                    "static_assert_simm_bits!(IMM_S5, 5);\n    {unsafe_start}__{current_name}(a, IMM_S5){unsafe_end}"
+                )
            }
            _ => panic!("unsupported assembly format: {}", asm_fmts[2]),
        };
    } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "SI" {
        call_params = if asm_fmts[2].starts_with("si") {
            format!(
-                "static_assert_simm_bits!(IMM_S{0}, {0});\n    __{current_name}(mem_addr, IMM_S{0})",
+                "static_assert_simm_bits!(IMM_S{0}, {0});\n    {unsafe_start}__{current_name}(mem_addr, IMM_S{0}){unsafe_end}",
                asm_fmts[2].get(2..).unwrap()
            )
        } else {
@ -504,13 +516,13 @@ fn gen_bind_body(
        }
    } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "DI" {
        call_params = match asm_fmts[2].as_str() {
-            "rk" => format!("__{current_name}(mem_addr, b)"),
+            "rk" => format!("{unsafe_start}__{current_name}(mem_addr, b){unsafe_end}"),
            _ => panic!("unsupported assembly format: {}", asm_fmts[2]),
        };
    } else if para_num == 3 && (in_t[2] == "USI" || in_t[2] == "UQI") {
        call_params = if asm_fmts[2].starts_with("ui") {
            format!(
-                "static_assert_uimm_bits!(IMM{0}, {0});\n    __{current_name}(a, b, IMM{0})",
+                "static_assert_uimm_bits!(IMM{0}, {0});\n    {unsafe_start}__{current_name}(a, b, IMM{0}){unsafe_end}",
                asm_fmts[2].get(2..).unwrap()
            )
        } else {
@ -519,19 +531,19 @@ fn gen_bind_body(
    } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "SI" {
        call_params = match asm_fmts[2].as_str() {
            "si12" => format!(
-                "static_assert_simm_bits!(IMM_S12, 12);\n    __{current_name}(a, mem_addr, IMM_S12)"
+                "static_assert_simm_bits!(IMM_S12, 12);\n    {unsafe_start}__{current_name}(a, mem_addr, IMM_S12){unsafe_end}"
            ),
            _ => panic!("unsupported assembly format: {}", asm_fmts[2]),
        };
    } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "DI" {
        call_params = match asm_fmts[2].as_str() {
-            "rk" => format!("__{current_name}(a, mem_addr, b)"),
+            "rk" => format!("{unsafe_start}__{current_name}(a, mem_addr, b){unsafe_end}"),
            _ => panic!("unsupported assembly format: {}", asm_fmts[2]),
        };
    } else if para_num == 4 {
        call_params = match (asm_fmts[2].as_str(), current_name.chars().last().unwrap()) {
            ("si8", t) => format!(
-                "static_assert_simm_bits!(IMM_S8, 8);\n    static_assert_uimm_bits!(IMM{0}, {0});\n    __{current_name}(a, mem_addr, IMM_S8, IMM{0})",
+                "static_assert_simm_bits!(IMM_S8, 8);\n    static_assert_uimm_bits!(IMM{0}, {0});\n    {unsafe_start}__{current_name}(a, mem_addr, IMM_S8, IMM{0}){unsafe_end}",
                type_to_imm(t)
            ),
            (_, _) => panic!(
--- a/library/stdarch/crates/stdarch-test/Cargo.toml
+++ b/library/stdarch/crates/stdarch-test/Cargo.toml
@ -7,7 +7,6 @@ edition = "2024"
 [dependencies]
 assert-instr-macro = { path = "../assert-instr-macro" }
 simd-test-macro = { path = "../simd-test-macro" }
-lazy_static = "1.0"
 rustc-demangle = "0.1.8"
 cfg-if = "1.0"

@ -20,7 +19,7 @@ cc = "1.0"
 # time, and we want to make updates to this explicit rather than automatically
 # picking up updates which might break CI with new instruction names.
 [target.'cfg(target_arch = "wasm32")'.dependencies]
-wasmprinter = "=0.2.67"
+wasmprinter = "=0.235"

 [features]
 default = []
--- a/library/stdarch/crates/stdarch-test/src/lib.rs
+++ b/library/stdarch/crates/stdarch-test/src/lib.rs
@ -6,14 +6,12 @@
 #![deny(rust_2018_idioms)]
 #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]

-#[macro_use]
-extern crate lazy_static;
 #[macro_use]
 extern crate cfg_if;

 pub use assert_instr_macro::*;
 pub use simd_test_macro::*;
-use std::{cmp, collections::HashSet, env, hash, hint::black_box, str};
+use std::{cmp, collections::HashSet, env, hash, hint::black_box, str, sync::LazyLock};

 cfg_if! {
    if #[cfg(target_arch = "wasm32")] {
@ -25,9 +23,7 @@ cfg_if! {
    }
 }

-lazy_static! {
-    static ref DISASSEMBLY: HashSet<Function> = disassemble_myself();
-}
+static DISASSEMBLY: LazyLock<HashSet<Function>> = LazyLock::new(disassemble_myself);

 #[derive(Debug)]
 struct Function {
@ -65,11 +61,12 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
    black_box(shim_addr);

    //eprintln!("shim name: {fnname}");
-    let function = &DISASSEMBLY
-        .get(&Function::new(fnname))
-        .unwrap_or_else(|| panic!("function \"{fnname}\" not found in the disassembly"));
+    let Some(function) = &DISASSEMBLY.get(&Function::new(fnname)) else {
+        panic!("function `{fnname}` not found in the disassembly")
+    };
    //eprintln!("  function: {:?}", function);

+    // Trim any filler instructions.
    let mut instrs = &function.instrs[..];
    while instrs.last().is_some_and(|s| s == "nop" || s == "int3") {
        instrs = &instrs[..instrs.len() - 1];
@ -84,12 +81,26 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
    // 2. It is a mark, indicating that the instruction will be
    // compiled into other instructions - mainly because of llvm
    // optimization.
-    let expected = if expected == "unknown" {
-        "<unknown>" // Workaround for rust-lang/stdarch#1674, todo: remove when the issue is fixed
-    } else {
-        expected
+    let expected = match expected {
+        // `<unknown>` is what LLVM will generate for unknown instructions. We use this to fail
+        // loudly when LLVM does start supporting these instructions.
+        //
+        // This was introduced in https://github.com/rust-lang/stdarch/pull/1674 to work around the
+        // RISC-V P extension not yet being supported.
+        "unknown" => "<unknown>",
+        _ => expected,
    };
-    let found = expected == "nop" || instrs.iter().any(|s| s.starts_with(expected));
+
+    // Check whether the given instruction is part of the disassemblied body.
+    let found = expected == "nop"
+        || instrs.iter().any(|instruction| {
+            instruction.starts_with(expected)
+            // Check that the next character is non-alphanumeric. This prevents false negatives
+            // when e.g. `fminnm` was used but `fmin` was expected.
+            //
+            // TODO: resolve the conflicts (x86_64 and aarch64 have a bunch, probably others)
+            // && !instruction[expected.len()..].starts_with(|c: char| c.is_ascii_alphanumeric())
+        });

    // Look for subroutine call instructions in the disassembly to detect whether
    // inlining failed: all intrinsics are `#[inline(always)]`, so calling one
--- a/library/stdarch/examples/connect5.rs
+++ b/library/stdarch/examples/connect5.rs
@ -558,7 +558,8 @@ fn search(pos: &Pos, alpha: i32, beta: i32, depth: i32, _ply: i32) -> i32 {
    assert_ne!(bm, MOVE_NONE);
    assert!(bs >= -EVAL_INF && bs <= EVAL_INF);

-    if _ply == 0 { bm } else { bs } //best move at the root node, best score elsewhere
+    // best move at the root node, best score elsewhere
+    if _ply == 0 { bm } else { bs }
 }

 /// Evaluation function: give different scores to different patterns after a fixed depth.
@ -570,15 +571,11 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    {
        if check_x86_avx512_features() {
-            unsafe {
-                if check_patternlive4_avx512(pos, def) {
-                    return -4096;
-                }
-            }
-        } else {
-            if check_patternlive4(pos, def) {
+            if unsafe { check_patternlive4_avx512(pos, def) } {
                return -4096;
            }
+        } else if check_patternlive4(pos, def) {
+            return -4096;
        }
    }

@ -593,15 +590,11 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    {
        if check_x86_avx512_features() {
-            unsafe {
-                if check_patternlive4_avx512(pos, atk) {
-                    return 2560;
-                }
-            }
-        } else {
-            if check_patternlive4(pos, atk) {
+            if unsafe { check_patternlive4_avx512(pos, atk) } {
                return 2560;
            }
+        } else if check_patternlive4(pos, atk) {
+            return 2560;
        }
    }

@ -616,15 +609,11 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    {
        if check_x86_avx512_features() {
-            unsafe {
-                if check_patterndead4_avx512(pos, atk) > 0 {
-                    return 2560;
-                }
-            }
-        } else {
-            if check_patterndead4(pos, atk) > 0 {
+            if unsafe { check_patterndead4_avx512(pos, atk) > 0 } {
                return 2560;
            }
+        } else if check_patterndead4(pos, atk) > 0 {
+            return 2560;
        }
    }

@ -909,9 +898,7 @@ fn pos_is_winner_avx512(pos: &Pos) -> bool {
                                        0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10];
    let mut count_match: i32 = 0;

-    for dir in 0..2 {
-        // direction 0 and 1
-        let mut board0 = board0org[dir];
+    for mut board0 in board0org {
        let boardf = _mm512_and_si512(answer, board0);
        let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf);
        count_match += _popcnt32(temp_mask as i32);
--- a/library/stdarch/triagebot.toml
+++ b/library/stdarch/triagebot.toml
@ -1,7 +1,7 @@
 [assign]

 [assign.owners]
-"*" = ["@Amanieu"]
+"*" = ["@Amanieu", "@folkertdev", "@sayantn"]

 [ping.windows]
 message = """\