Auto merge of #144222 - Kobzol:stdarch-push, r=folkertdev
stdarch subtree update
Subtree update of `stdarch` to 5531955678.
Created using https://github.com/rust-lang/josh-sync.
I saw that there were non-trivial changes made to `std_detect` in `stdarch` recently. So I want to get them merged here before we move forward with https://github.com/rust-lang/rust/pull/143412.
r? `@folkertdev`
This commit is contained in:
commit
2e53675668
72 changed files with 4644 additions and 5044 deletions
23
library/stdarch/.github/workflows/main.yml
vendored
23
library/stdarch/.github/workflows/main.yml
vendored
|
|
@ -255,6 +255,28 @@ jobs:
|
|||
env:
|
||||
TARGET: ${{ matrix.target.tuple }}
|
||||
|
||||
# Check that the generated files agree with the checked-in versions.
|
||||
check-stdarch-gen:
|
||||
needs: [style]
|
||||
name: Check stdarch-gen-{arm, loongarch} output
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install Rust
|
||||
run: rustup update nightly && rustup default nightly && rustup component add rustfmt
|
||||
- name: Check arm spec
|
||||
run: |
|
||||
cargo run --bin=stdarch-gen-arm --release -- crates/stdarch-gen-arm/spec
|
||||
git diff --exit-code
|
||||
- name: Check lsx.spec
|
||||
run: |
|
||||
cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lsx.spec
|
||||
git diff --exit-code
|
||||
- name: Check lasx.spec
|
||||
run: |
|
||||
cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lasx.spec
|
||||
git diff --exit-code
|
||||
|
||||
build-std-detect:
|
||||
needs: [style]
|
||||
name: Build std_detect
|
||||
|
|
@ -271,6 +293,7 @@ jobs:
|
|||
- verify
|
||||
- test
|
||||
- build-std-detect
|
||||
- check-stdarch-gen
|
||||
runs-on: ubuntu-latest
|
||||
# We need to ensure this job does *not* get skipped if its dependencies fail,
|
||||
# because a skipped job is considered a success by GitHub. So we have to
|
||||
|
|
|
|||
22
library/stdarch/.github/workflows/rustc-pull.yml
vendored
Normal file
22
library/stdarch/.github/workflows/rustc-pull.yml
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
# Perform a subtree sync (pull) using the josh-sync tool once every few days (or on demand).
|
||||
name: rustc-pull
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Run at 04:00 UTC every Monday and Thursday
|
||||
- cron: '0 4 * * 1,4'
|
||||
|
||||
jobs:
|
||||
pull:
|
||||
if: github.repository == 'rust-lang/stdarch'
|
||||
uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main
|
||||
with:
|
||||
# https://rust-lang.zulipchat.com/#narrow/channel/208962-t-libs.2Fstdarch/topic/Subtree.20sync.20automation/with/528461782
|
||||
zulip-stream-id: 208962
|
||||
zulip-bot-email: "stdarch-ci-bot@rust-lang.zulipchat.com"
|
||||
pr-base-branch: master
|
||||
branch-name: rustc-pull
|
||||
secrets:
|
||||
zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
|
@ -73,20 +73,26 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.4.0"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
||||
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.26"
|
||||
version = "1.2.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "956a5e21988b87f372569b66183b78babf23ebc2e744b733e4350a752c4dafac"
|
||||
checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
|
||||
dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
|
|
@ -99,9 +105,9 @@ checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
|
|||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.40"
|
||||
version = "4.5.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
|
||||
checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
|
|
@ -109,9 +115,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.5.40"
|
||||
version = "4.5.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
|
||||
checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
|
|
@ -121,14 +127,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.5.40"
|
||||
version = "4.5.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce"
|
||||
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -338,9 +344,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.9.0"
|
||||
version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
|
||||
checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.15.4",
|
||||
|
|
@ -403,9 +409,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.172"
|
||||
version = "0.2.174"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
|
||||
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
||||
|
||||
[[package]]
|
||||
name = "linked-hash-map"
|
||||
|
|
@ -624,7 +630,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -685,7 +691,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -703,7 +709,6 @@ name = "stdarch-gen-arm"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
|
|
@ -727,7 +732,6 @@ dependencies = [
|
|||
"assert-instr-macro",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"lazy_static",
|
||||
"rustc-demangle",
|
||||
"simd-test-macro",
|
||||
"wasmprinter",
|
||||
|
|
@ -742,7 +746,7 @@ dependencies = [
|
|||
"quote",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"syn 2.0.102",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -780,9 +784,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.102"
|
||||
version = "2.0.104"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462"
|
||||
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -834,21 +838,23 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
|||
|
||||
[[package]]
|
||||
name = "wasmparser"
|
||||
version = "0.113.3"
|
||||
version = "0.235.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "286049849b5a5bd09a8773171be96824afabffc7cc3df6caaf33a38db6cd07ae"
|
||||
checksum = "161296c618fa2d63f6ed5fffd1112937e803cb9ec71b32b01a76321555660917"
|
||||
dependencies = [
|
||||
"indexmap 2.9.0",
|
||||
"bitflags",
|
||||
"indexmap 2.10.0",
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasmprinter"
|
||||
version = "0.2.67"
|
||||
version = "0.235.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6615a5587149e753bf4b93f90fa3c3f41c88597a7a2da72879afcabeda9648f"
|
||||
checksum = "75aa8e9076de6b9544e6dab4badada518cca0bf4966d35b131bbd057aed8fa0a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"termcolor",
|
||||
"wasmparser",
|
||||
]
|
||||
|
||||
|
|
@ -945,20 +951,20 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.8.25"
|
||||
version = "0.8.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
|
||||
checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
|
||||
dependencies = [
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.8.25"
|
||||
version = "0.8.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
|
||||
checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.102",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ members = [
|
|||
"examples",
|
||||
]
|
||||
exclude = [
|
||||
"crates/wasm-assert-instr-tests"
|
||||
"crates/wasm-assert-instr-tests",
|
||||
"rust_programs",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
|
|
@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang-19 \
|
||||
clang \
|
||||
lld
|
||||
|
||||
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
|
|
@ -9,15 +9,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang-19 \
|
||||
clang \
|
||||
curl \
|
||||
xz-utils \
|
||||
lld
|
||||
|
||||
ENV TOOLCHAIN="arm-gnu-toolchain-14.2.rel1-x86_64-aarch64_be-none-linux-gnu"
|
||||
ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu"
|
||||
|
||||
# Download the aarch64_be gcc toolchain
|
||||
RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.2.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
|
||||
RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
|
||||
RUN tar -xvf "${TOOLCHAIN}.tar.xz"
|
||||
RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang-19 \
|
||||
clang \
|
||||
lld
|
||||
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user-static ca-certificates \
|
||||
gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
|
||||
gcc-loongarch64-linux-gnu libc6-dev-loong64-cross
|
||||
|
||||
|
||||
ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
wget xz-utils make file llvm
|
||||
|
||||
ENV VERSION=2025.01.20
|
||||
ENV VERSION=2025.07.03
|
||||
|
||||
RUN wget "https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${VERSION}/riscv32-glibc-ubuntu-24.04-gcc-nightly-${VERSION}-nightly.tar.xz" \
|
||||
-O riscv-toolchain.tar.xz
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl ca-certificates \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
||||
|
|
@ -7,7 +7,9 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
|||
xz-utils \
|
||||
clang
|
||||
|
||||
RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasmtime-v18.0.2-x86_64-linux.tar.xz | tar xJf -
|
||||
ENV PATH=$PATH:/wasmtime-v18.0.2-x86_64-linux
|
||||
ENV VERSION=v34.0.1
|
||||
|
||||
RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/${VERSION}/wasmtime-${VERSION}-x86_64-linux.tar.xz | tar xJf -
|
||||
ENV PATH=$PATH:/wasmtime-${VERSION}-x86_64-linux
|
||||
|
||||
ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime --dir /checkout/target/wasm32-wasip1/release/deps::."
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:25.04
|
||||
FROM ubuntu:25.10
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
|
|
@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
wget \
|
||||
xz-utils
|
||||
|
||||
RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.53.0-2025-03-16-lin.tar.xz -O sde.tar.xz
|
||||
RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.58.0-2025-06-16-lin.tar.xz -O sde.tar.xz
|
||||
RUN mkdir intel-sde
|
||||
RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
|
||||
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
|
||||
|
|
|
|||
|
|
@ -1,41 +1,35 @@
|
|||
# Copyright (C) 2024-2024 Intel Corporation.
|
||||
#
|
||||
# Copyright (C) 2017-2025 Intel Corporation.
|
||||
#
|
||||
# This software and the related documents are Intel copyrighted materials, and your
|
||||
# use of them is governed by the express license under which they were provided to
|
||||
# you ("License"). Unless the License provides otherwise, you may not use, modify,
|
||||
# copy, publish, distribute, disclose or transmit this software or the related
|
||||
# documents without Intel's prior written permission.
|
||||
#
|
||||
#
|
||||
# This software and the related documents are provided as is, with no express or
|
||||
# implied warranties, other than those that are expressly stated in the License.
|
||||
#
|
||||
# The CPUID information in this file is for software enabling purposes only and
|
||||
# it is not a full and accurate representation of the CPU under development which
|
||||
# it represents.
|
||||
# The CPUID information in this file is not a guarantee of the availability of
|
||||
# features or characteristics in the final released CPU.
|
||||
#
|
||||
# CPUID_VERSION = 1.0
|
||||
# Input => Output
|
||||
# EAX ECX => EAX EBX ECX EDX
|
||||
00000000 ******** => 00000024 68747541 444d4163 69746e65
|
||||
00000001 ******** => 000d06f0 00100800 7ffaf3ff bfebfbff
|
||||
00000001 ******** => 00400f10 00100800 7ffaf3ff bfebfbff
|
||||
00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000
|
||||
00000003 ******** => 00000000 00000000 00000000 00000000
|
||||
00000004 00000000 => 7c004121 02c0003f 0000003f 00000000 #Deterministic Cache
|
||||
00000004 00000000 => 7c004121 01c0003f 0000003f 00000000 #Deterministic Cache
|
||||
00000004 00000001 => 7c004122 01c0003f 0000003f 00000000
|
||||
00000004 00000002 => 7c004143 03c0003f 000007ff 00000000
|
||||
00000004 00000003 => 7c0fc163 04c0003f 0005ffff 00000004
|
||||
00000004 00000002 => 7c004143 03c0003f 000003ff 00000000
|
||||
00000004 00000003 => 7c0fc163 0280003f 0000dfff 00000004
|
||||
00000004 00000004 => 00000000 00000000 00000000 00000000
|
||||
00000005 ******** => 00000040 00000040 00000003 00042120 #MONITOR/MWAIT
|
||||
00000006 ******** => 00000077 00000002 00000001 00000000 #Thermal and Power
|
||||
00000007 00000000 => 00000001 f3bfbfbf bbc05ffe 03d55130 #Extended Features
|
||||
00000007 00000001 => 88ee00bf 00000002 00000000 1d29cd3e
|
||||
00000007 00000000 => 00000001 f3bfbfbf bac05ffe 03d54130 #Extended Features
|
||||
00000007 00000001 => 98ee00bf 00000002 00000020 1d29cd3e
|
||||
00000008 ******** => 00000000 00000000 00000000 00000000
|
||||
00000009 ******** => 00000000 00000000 00000000 00000000 #Direct Cache
|
||||
0000000a ******** => 07300403 00000000 00000000 00000603
|
||||
0000000b 00000000 => 00000001 00000002 00000100 0000001e #Extended Topology
|
||||
0000000b 00000001 => 00000004 00000002 00000201 0000001e
|
||||
0000000b 00000000 => 00000001 00000002 00000100 00000000 #Extended Topology
|
||||
0000000b 00000001 => 00000004 00000002 00000201 00000000
|
||||
0000000c ******** => 00000000 00000000 00000000 00000000
|
||||
0000000d 00000000 => 000e02e7 00002b00 00002b00 00000000 #xcr0
|
||||
0000000d 00000001 => 0000001f 00000240 00000100 00000000
|
||||
|
|
@ -52,10 +46,8 @@
|
|||
0000001d 00000001 => 04002000 00080040 00000010 00000000 #AMX Palette1
|
||||
0000001e 00000000 => 00000001 00004010 00000000 00000000 #AMX Tmul
|
||||
0000001e 00000001 => 000001ff 00000000 00000000 00000000
|
||||
0000001f 00000000 => 00000001 00000002 00000100 0000001e
|
||||
0000001f 00000001 => 00000007 00000070 00000201 0000001e
|
||||
0000001f 00000002 => 00000000 00000000 00000002 0000001e
|
||||
00000024 00000000 => 00000000 00070002 00000000 00000000 #AVX10
|
||||
00000024 00000000 => 00000001 00070002 00000000 00000000 #AVX10
|
||||
00000024 00000001 => 00000000 00000000 00000004 00000000
|
||||
80000000 ******** => 80000008 00000000 00000000 00000000
|
||||
80000001 ******** => 00000000 00000000 00200961 2c100000
|
||||
80000002 ******** => 00000000 00000000 00000000 00000000
|
||||
|
|
@ -66,6 +58,6 @@
|
|||
80000007 ******** => 00000000 00000000 00000000 00000100
|
||||
80000008 ******** => 00003028 00000200 00000200 00000000
|
||||
|
||||
# This file was copied from intel-sde/misc/cpuid/dmr/cpuid.def, and modified to
|
||||
# This file was copied from intel-sde/misc/cpuid/future/cpuid.def, and modified to
|
||||
# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM`,
|
||||
# `AVX512_VP2INTERSECT` and the VEX variants of AVX512 was added in the CPUID.
|
||||
|
|
|
|||
|
|
@ -144,21 +144,21 @@ case ${TARGET} in
|
|||
aarch64-unknown-linux-gnu*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
|
||||
TEST_CXX_COMPILER="clang++-19"
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
;;
|
||||
|
||||
aarch64_be-unknown-linux-gnu*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
|
||||
TEST_CXX_COMPILER="clang++-19"
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
;;
|
||||
|
||||
armv7-unknown-linux-gnueabihf*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
|
||||
TEST_CXX_COMPILER="clang++-19"
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
|
||||
;;
|
||||
*)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -193,6 +193,7 @@ others at:
|
|||
* [`powerpc64`]
|
||||
* [`nvptx`]
|
||||
* [`wasm32`]
|
||||
* [`loongarch32`]
|
||||
* [`loongarch64`]
|
||||
* [`s390x`]
|
||||
|
||||
|
|
@ -208,6 +209,7 @@ others at:
|
|||
[`powerpc64`]: ../../core/arch/powerpc64/index.html
|
||||
[`nvptx`]: ../../core/arch/nvptx/index.html
|
||||
[`wasm32`]: ../../core/arch/wasm32/index.html
|
||||
[`loongarch32`]: ../../core/arch/loongarch32/index.html
|
||||
[`loongarch64`]: ../../core/arch/loongarch64/index.html
|
||||
[`s390x`]: ../../core/arch/s390x/index.html
|
||||
|
||||
|
|
|
|||
47
library/stdarch/crates/core_arch/src/loongarch32/mod.rs
Normal file
47
library/stdarch/crates/core_arch/src/loongarch32/mod.rs
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
//! `LoongArch32` intrinsics
|
||||
|
||||
use crate::arch::asm;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.loongarch.cacop.w"]
|
||||
fn __cacop(a: i32, b: i32, c: i32);
|
||||
#[link_name = "llvm.loongarch.csrrd.w"]
|
||||
fn __csrrd(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.csrwr.w"]
|
||||
fn __csrwr(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.csrxchg.w"]
|
||||
fn __csrxchg(a: i32, b: i32, c: i32) -> i32;
|
||||
}
|
||||
|
||||
/// Generates the cache operation instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn cacop<const IMM12: i32>(a: i32, b: i32) {
|
||||
static_assert_simm_bits!(IMM12, 12);
|
||||
__cacop(a, b, IMM12);
|
||||
}
|
||||
|
||||
/// Reads the CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn csrrd<const IMM14: i32>() -> i32 {
|
||||
static_assert_uimm_bits!(IMM14, 14);
|
||||
__csrrd(IMM14)
|
||||
}
|
||||
|
||||
/// Writes the CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn csrwr<const IMM14: i32>(a: i32) -> i32 {
|
||||
static_assert_uimm_bits!(IMM14, 14);
|
||||
__csrwr(a, IMM14)
|
||||
}
|
||||
|
||||
/// Exchanges the CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn csrxchg<const IMM14: i32>(a: i32, b: i32) -> i32 {
|
||||
static_assert_uimm_bits!(IMM14, 14);
|
||||
__csrxchg(a, b, IMM14)
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,4 +1,4 @@
|
|||
//! `LoongArch` intrinsics
|
||||
//! `LoongArch64` intrinsics
|
||||
|
||||
mod lasx;
|
||||
mod lsx;
|
||||
|
|
@ -13,89 +13,30 @@ use crate::arch::asm;
|
|||
/// Reads the 64-bit stable counter value and the counter ID
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn rdtime_d() -> (i64, isize) {
|
||||
let val: i64;
|
||||
let tid: isize;
|
||||
asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
|
||||
(val, tid)
|
||||
}
|
||||
|
||||
/// Reads the lower 32-bit stable counter value and the counter ID
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn rdtimel_w() -> (i32, isize) {
|
||||
let val: i32;
|
||||
let tid: isize;
|
||||
asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
|
||||
(val, tid)
|
||||
}
|
||||
|
||||
/// Reads the upper 32-bit stable counter value and the counter ID
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn rdtimeh_w() -> (i32, isize) {
|
||||
let val: i32;
|
||||
let tid: isize;
|
||||
asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
|
||||
pub fn rdtime_d() -> (i64, isize) {
|
||||
let (val, tid): (i64, isize);
|
||||
unsafe { asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)) };
|
||||
(val, tid)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.loongarch.crc.w.b.w"]
|
||||
fn __crc_w_b_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crc.w.h.w"]
|
||||
fn __crc_w_h_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crc.w.w.w"]
|
||||
fn __crc_w_w_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crc.w.d.w"]
|
||||
fn __crc_w_d_w(a: i64, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.b.w"]
|
||||
fn __crcc_w_b_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.h.w"]
|
||||
fn __crcc_w_h_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.w.w"]
|
||||
fn __crcc_w_w_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.d.w"]
|
||||
fn __crcc_w_d_w(a: i64, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.cacop.d"]
|
||||
fn __cacop(a: i64, b: i64, c: i64);
|
||||
#[link_name = "llvm.loongarch.dbar"]
|
||||
fn __dbar(a: i32);
|
||||
#[link_name = "llvm.loongarch.ibar"]
|
||||
fn __ibar(a: i32);
|
||||
#[link_name = "llvm.loongarch.movgr2fcsr"]
|
||||
fn __movgr2fcsr(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.movfcsr2gr"]
|
||||
fn __movfcsr2gr(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.csrrd.d"]
|
||||
fn __csrrd(a: i32) -> i64;
|
||||
#[link_name = "llvm.loongarch.csrwr.d"]
|
||||
fn __csrwr(a: i64, b: i32) -> i64;
|
||||
#[link_name = "llvm.loongarch.csrxchg.d"]
|
||||
fn __csrxchg(a: i64, b: i64, c: i32) -> i64;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.b"]
|
||||
fn __iocsrrd_b(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.h"]
|
||||
fn __iocsrrd_h(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.w"]
|
||||
fn __iocsrrd_w(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.d"]
|
||||
fn __iocsrrd_d(a: i32) -> i64;
|
||||
#[link_name = "llvm.loongarch.iocsrwr.b"]
|
||||
fn __iocsrwr_b(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.iocsrwr.h"]
|
||||
fn __iocsrwr_h(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.iocsrwr.w"]
|
||||
fn __iocsrwr_w(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.iocsrwr.d"]
|
||||
fn __iocsrwr_d(a: i64, b: i32);
|
||||
#[link_name = "llvm.loongarch.break"]
|
||||
fn __break(a: i32);
|
||||
#[link_name = "llvm.loongarch.cpucfg"]
|
||||
fn __cpucfg(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.syscall"]
|
||||
fn __syscall(a: i32);
|
||||
#[link_name = "llvm.loongarch.asrtle.d"]
|
||||
fn __asrtle(a: i64, b: i64);
|
||||
#[link_name = "llvm.loongarch.asrtgt.d"]
|
||||
|
|
@ -104,70 +45,20 @@ unsafe extern "unadjusted" {
|
|||
fn __lddir(a: i64, b: i64) -> i64;
|
||||
#[link_name = "llvm.loongarch.ldpte.d"]
|
||||
fn __ldpte(a: i64, b: i64);
|
||||
#[link_name = "llvm.loongarch.frecipe.s"]
|
||||
fn __frecipe_s(a: f32) -> f32;
|
||||
#[link_name = "llvm.loongarch.frecipe.d"]
|
||||
fn __frecipe_d(a: f64) -> f64;
|
||||
#[link_name = "llvm.loongarch.frsqrte.s"]
|
||||
fn __frsqrte_s(a: f32) -> f32;
|
||||
#[link_name = "llvm.loongarch.frsqrte.d"]
|
||||
fn __frsqrte_d(a: f64) -> f64;
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crc_w_b_w(a: i32, b: i32) -> i32 {
|
||||
__crc_w_b_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crc_w_h_w(a: i32, b: i32) -> i32 {
|
||||
__crc_w_h_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crc_w_w_w(a: i32, b: i32) -> i32 {
|
||||
__crc_w_w_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crc_w_d_w(a: i64, b: i32) -> i32 {
|
||||
__crc_w_d_w(a, b)
|
||||
pub fn crc_w_d_w(a: i64, b: i32) -> i32 {
|
||||
unsafe { __crc_w_d_w(a, b) }
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crcc_w_b_w(a: i32, b: i32) -> i32 {
|
||||
__crcc_w_b_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crcc_w_h_w(a: i32, b: i32) -> i32 {
|
||||
__crcc_w_h_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crcc_w_w_w(a: i32, b: i32) -> i32 {
|
||||
__crcc_w_w_w(a, b)
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn crcc_w_d_w(a: i64, b: i32) -> i32 {
|
||||
__crcc_w_d_w(a, b)
|
||||
pub fn crcc_w_d_w(a: i64, b: i32) -> i32 {
|
||||
unsafe { __crcc_w_d_w(a, b) }
|
||||
}
|
||||
|
||||
/// Generates the cache operation instruction
|
||||
|
|
@ -178,38 +69,6 @@ pub unsafe fn cacop<const IMM12: i64>(a: i64, b: i64) {
|
|||
__cacop(a, b, IMM12);
|
||||
}
|
||||
|
||||
/// Generates the memory barrier instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn dbar<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__dbar(IMM15);
|
||||
}
|
||||
|
||||
/// Generates the instruction-fetch barrier instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn ibar<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__ibar(IMM15);
|
||||
}
|
||||
|
||||
/// Moves data from a GPR to the FCSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
|
||||
static_assert_uimm_bits!(IMM5, 5);
|
||||
__movgr2fcsr(IMM5, a);
|
||||
}
|
||||
|
||||
/// Moves data from a FCSR to the GPR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn movfcsr2gr<const IMM5: i32>() -> i32 {
|
||||
static_assert_uimm_bits!(IMM5, 5);
|
||||
__movfcsr2gr(IMM5)
|
||||
}
|
||||
|
||||
/// Reads the CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
|
|
@ -234,27 +93,6 @@ pub unsafe fn csrxchg<const IMM14: i32>(a: i64, b: i64) -> i64 {
|
|||
__csrxchg(a, b, IMM14)
|
||||
}
|
||||
|
||||
/// Reads the 8-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_b(a: i32) -> i32 {
|
||||
__iocsrrd_b(a)
|
||||
}
|
||||
|
||||
/// Reads the 16-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_h(a: i32) -> i32 {
|
||||
__iocsrrd_h(a)
|
||||
}
|
||||
|
||||
/// Reads the 32-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_w(a: i32) -> i32 {
|
||||
__iocsrrd_w(a)
|
||||
}
|
||||
|
||||
/// Reads the 64-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
|
|
@ -262,27 +100,6 @@ pub unsafe fn iocsrrd_d(a: i32) -> i64 {
|
|||
__iocsrrd_d(a)
|
||||
}
|
||||
|
||||
/// Writes the 8-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_b(a: i32, b: i32) {
|
||||
__iocsrwr_b(a, b)
|
||||
}
|
||||
|
||||
/// Writes the 16-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_h(a: i32, b: i32) {
|
||||
__iocsrwr_h(a, b)
|
||||
}
|
||||
|
||||
/// Writes the 32-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_w(a: i32, b: i32) {
|
||||
__iocsrwr_w(a, b)
|
||||
}
|
||||
|
||||
/// Writes the 64-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
|
|
@ -290,29 +107,6 @@ pub unsafe fn iocsrwr_d(a: i64, b: i32) {
|
|||
__iocsrwr_d(a, b)
|
||||
}
|
||||
|
||||
/// Generates the breakpoint instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn brk<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__break(IMM15);
|
||||
}
|
||||
|
||||
/// Reads the CPU configuration register
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn cpucfg(a: i32) -> i32 {
|
||||
__cpucfg(a)
|
||||
}
|
||||
|
||||
/// Generates the syscall instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn syscall<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__syscall(IMM15);
|
||||
}
|
||||
|
||||
/// Generates the less-than-or-equal asseration instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
|
|
@ -342,35 +136,3 @@ pub unsafe fn lddir<const B: i64>(a: i64) -> i64 {
|
|||
pub unsafe fn ldpte<const B: i64>(a: i64) {
|
||||
__ldpte(a, B)
|
||||
}
|
||||
|
||||
/// Calculate the approximate single-precision result of 1.0 divided
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn frecipe_s(a: f32) -> f32 {
|
||||
__frecipe_s(a)
|
||||
}
|
||||
|
||||
/// Calculate the approximate double-precision result of 1.0 divided
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn frecipe_d(a: f64) -> f64 {
|
||||
__frecipe_d(a)
|
||||
}
|
||||
|
||||
/// Calculate the approximate single-precision result of dividing 1.0 by the square root
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn frsqrte_s(a: f32) -> f32 {
|
||||
__frsqrte_s(a)
|
||||
}
|
||||
|
||||
/// Calculate the approximate double-precision result of dividing 1.0 by the square root
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn frsqrte_d(a: f64) -> f64 {
|
||||
__frsqrte_d(a)
|
||||
}
|
||||
|
|
|
|||
242
library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs
Normal file
242
library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
//! `Shared LoongArch` intrinsics
|
||||
|
||||
use crate::arch::asm;
|
||||
|
||||
/// Reads the lower 32-bit stable counter value and the counter ID
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn rdtimel_w() -> (i32, isize) {
|
||||
let (val, tid): (i32, isize);
|
||||
unsafe { asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)) };
|
||||
(val, tid)
|
||||
}
|
||||
|
||||
/// Reads the upper 32-bit stable counter value and the counter ID
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn rdtimeh_w() -> (i32, isize) {
|
||||
let (val, tid): (i32, isize);
|
||||
unsafe { asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)) };
|
||||
(val, tid)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.loongarch.crc.w.b.w"]
|
||||
fn __crc_w_b_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crc.w.h.w"]
|
||||
fn __crc_w_h_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crc.w.w.w"]
|
||||
fn __crc_w_w_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.b.w"]
|
||||
fn __crcc_w_b_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.h.w"]
|
||||
fn __crcc_w_h_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.crcc.w.w.w"]
|
||||
fn __crcc_w_w_w(a: i32, b: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.dbar"]
|
||||
fn __dbar(a: i32);
|
||||
#[link_name = "llvm.loongarch.ibar"]
|
||||
fn __ibar(a: i32);
|
||||
#[link_name = "llvm.loongarch.movgr2fcsr"]
|
||||
fn __movgr2fcsr(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.movfcsr2gr"]
|
||||
fn __movfcsr2gr(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.b"]
|
||||
fn __iocsrrd_b(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.h"]
|
||||
fn __iocsrrd_h(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrrd.w"]
|
||||
fn __iocsrrd_w(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.iocsrwr.b"]
|
||||
fn __iocsrwr_b(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.iocsrwr.h"]
|
||||
fn __iocsrwr_h(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.iocsrwr.w"]
|
||||
fn __iocsrwr_w(a: i32, b: i32);
|
||||
#[link_name = "llvm.loongarch.break"]
|
||||
fn __break(a: i32);
|
||||
#[link_name = "llvm.loongarch.cpucfg"]
|
||||
fn __cpucfg(a: i32) -> i32;
|
||||
#[link_name = "llvm.loongarch.syscall"]
|
||||
fn __syscall(a: i32);
|
||||
#[link_name = "llvm.loongarch.frecipe.s"]
|
||||
fn __frecipe_s(a: f32) -> f32;
|
||||
#[link_name = "llvm.loongarch.frecipe.d"]
|
||||
fn __frecipe_d(a: f64) -> f64;
|
||||
#[link_name = "llvm.loongarch.frsqrte.s"]
|
||||
fn __frsqrte_s(a: f32) -> f32;
|
||||
#[link_name = "llvm.loongarch.frsqrte.d"]
|
||||
fn __frsqrte_d(a: f64) -> f64;
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn crc_w_b_w(a: i32, b: i32) -> i32 {
|
||||
unsafe { __crc_w_b_w(a, b) }
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn crc_w_h_w(a: i32, b: i32) -> i32 {
|
||||
unsafe { __crc_w_h_w(a, b) }
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn crc_w_w_w(a: i32, b: i32) -> i32 {
|
||||
unsafe { __crc_w_w_w(a, b) }
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn crcc_w_b_w(a: i32, b: i32) -> i32 {
|
||||
unsafe { __crcc_w_b_w(a, b) }
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn crcc_w_h_w(a: i32, b: i32) -> i32 {
|
||||
unsafe { __crcc_w_h_w(a, b) }
|
||||
}
|
||||
|
||||
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn crcc_w_w_w(a: i32, b: i32) -> i32 {
|
||||
unsafe { __crcc_w_w_w(a, b) }
|
||||
}
|
||||
|
||||
/// Generates the memory barrier instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn dbar<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
unsafe { __dbar(IMM15) };
|
||||
}
|
||||
|
||||
/// Generates the instruction-fetch barrier instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn ibar<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
unsafe { __ibar(IMM15) };
|
||||
}
|
||||
|
||||
/// Moves data from a GPR to the FCSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
|
||||
static_assert_uimm_bits!(IMM5, 5);
|
||||
__movgr2fcsr(IMM5, a);
|
||||
}
|
||||
|
||||
/// Moves data from a FCSR to the GPR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn movfcsr2gr<const IMM5: i32>() -> i32 {
|
||||
static_assert_uimm_bits!(IMM5, 5);
|
||||
unsafe { __movfcsr2gr(IMM5) }
|
||||
}
|
||||
|
||||
/// Reads the 8-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_b(a: i32) -> i32 {
|
||||
__iocsrrd_b(a)
|
||||
}
|
||||
|
||||
/// Reads the 16-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_h(a: i32) -> i32 {
|
||||
__iocsrrd_h(a)
|
||||
}
|
||||
|
||||
/// Reads the 32-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrrd_w(a: i32) -> i32 {
|
||||
__iocsrrd_w(a)
|
||||
}
|
||||
|
||||
/// Writes the 8-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_b(a: i32, b: i32) {
|
||||
__iocsrwr_b(a, b)
|
||||
}
|
||||
|
||||
/// Writes the 16-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_h(a: i32, b: i32) {
|
||||
__iocsrwr_h(a, b)
|
||||
}
|
||||
|
||||
/// Writes the 32-bit IO-CSR
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn iocsrwr_w(a: i32, b: i32) {
|
||||
__iocsrwr_w(a, b)
|
||||
}
|
||||
|
||||
/// Generates the breakpoint instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn brk<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__break(IMM15);
|
||||
}
|
||||
|
||||
/// Reads the CPU configuration register
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn cpucfg(a: i32) -> i32 {
|
||||
unsafe { __cpucfg(a) }
|
||||
}
|
||||
|
||||
/// Generates the syscall instruction
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub unsafe fn syscall<const IMM15: i32>() {
|
||||
static_assert_uimm_bits!(IMM15, 15);
|
||||
__syscall(IMM15);
|
||||
}
|
||||
|
||||
/// Calculate the approximate single-precision result of 1.0 divided
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn frecipe_s(a: f32) -> f32 {
|
||||
unsafe { __frecipe_s(a) }
|
||||
}
|
||||
|
||||
/// Calculate the approximate double-precision result of 1.0 divided
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn frecipe_d(a: f64) -> f64 {
|
||||
unsafe { __frecipe_d(a) }
|
||||
}
|
||||
|
||||
/// Calculate the approximate single-precision result of dividing 1.0 by the square root
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn frsqrte_s(a: f32) -> f32 {
|
||||
unsafe { __frsqrte_s(a) }
|
||||
}
|
||||
|
||||
/// Calculate the approximate double-precision result of dividing 1.0 by the square root
|
||||
#[inline]
|
||||
#[target_feature(enable = "frecipe")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn frsqrte_d(a: f64) -> f64 {
|
||||
unsafe { __frsqrte_d(a) }
|
||||
}
|
||||
|
|
@ -16,6 +16,9 @@ mod riscv_shared;
|
|||
))]
|
||||
mod arm_shared;
|
||||
|
||||
#[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64", doc))]
|
||||
mod loongarch_shared;
|
||||
|
||||
mod simd;
|
||||
|
||||
#[doc = include_str!("core_arch_docs.md")]
|
||||
|
|
@ -271,13 +274,25 @@ pub mod arch {
|
|||
pub use crate::core_arch::nvptx::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `loongarch` platform.
|
||||
/// Platform-specific intrinsics for the `loongarch32` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "loongarch32", doc))]
|
||||
#[doc(cfg(target_arch = "loongarch32"))]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub mod loongarch32 {
|
||||
pub use crate::core_arch::loongarch_shared::*;
|
||||
pub use crate::core_arch::loongarch32::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `loongarch64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "loongarch64", doc))]
|
||||
#[doc(cfg(target_arch = "loongarch64"))]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub mod loongarch64 {
|
||||
pub use crate::core_arch::loongarch_shared::*;
|
||||
pub use crate::core_arch::loongarch64::*;
|
||||
}
|
||||
|
||||
|
|
@ -334,6 +349,10 @@ mod powerpc64;
|
|||
#[doc(cfg(target_arch = "nvptx64"))]
|
||||
mod nvptx;
|
||||
|
||||
#[cfg(any(target_arch = "loongarch32", doc))]
|
||||
#[doc(cfg(target_arch = "loongarch32"))]
|
||||
mod loongarch32;
|
||||
|
||||
#[cfg(any(target_arch = "loongarch64", doc))]
|
||||
#[doc(cfg(target_arch = "loongarch64"))]
|
||||
mod loongarch64;
|
||||
|
|
|
|||
|
|
@ -1181,6 +1181,20 @@ mod sealed {
|
|||
|
||||
impl_vec_trait! { [VectorOrc vec_orc]+ 2c (orc) }
|
||||
|
||||
// Z vector intrinsic C23 math.h LLVM IR ISO/IEC 60559 operation inexact vfidb parameters
|
||||
//
|
||||
// vec_rint rint llvm.rint roundToIntegralExact yes 0, 0
|
||||
// vec_roundc nearbyint llvm.nearbyint n/a no 4, 0
|
||||
// vec_floor / vec_roundm floor llvm.floor roundToIntegralTowardNegative no 4, 7
|
||||
// vec_ceil / vec_roundp ceil llvm.ceil roundToIntegralTowardPositive no 4, 6
|
||||
// vec_trunc / vec_roundz trunc llvm.trunc roundToIntegralTowardZero no 4, 5
|
||||
// vec_round roundeven llvm.roundeven roundToIntegralTiesToEven no 4, 4
|
||||
// n/a round llvm.round roundToIntegralTiesAway no 4, 1
|
||||
|
||||
// `simd_round_ties_even` is implemented as `llvm.rint`.
|
||||
test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] }
|
||||
test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] }
|
||||
|
||||
test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32, "vector-enhancements-1" vfisb] }
|
||||
test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] }
|
||||
|
||||
|
|
@ -1189,9 +1203,6 @@ mod sealed {
|
|||
test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
|
||||
test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }
|
||||
|
||||
test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] }
|
||||
test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] }
|
||||
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
pub trait VectorRoundc {
|
||||
unsafe fn vec_roundc(self) -> Self;
|
||||
|
|
@ -2254,14 +2265,14 @@ mod sealed {
|
|||
|
||||
#[inline]
|
||||
#[target_feature(enable = "vector")]
|
||||
#[cfg_attr(test, assert_instr("vlbb"))]
|
||||
#[cfg_attr(test, assert_instr(vlbb))]
|
||||
unsafe fn test_vec_load_bndry(ptr: *const i32) -> MaybeUninit<vector_signed_int> {
|
||||
vector_signed_int::vec_load_bndry::<512>(ptr)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "vector")]
|
||||
#[cfg_attr(test, assert_instr(vst))]
|
||||
#[cfg_attr(test, assert_instr(vstl))]
|
||||
unsafe fn test_vec_store_len(vector: vector_signed_int, ptr: *mut i32, byte_count: u32) {
|
||||
vector.vec_store_len(ptr, byte_count)
|
||||
}
|
||||
|
|
@ -2787,11 +2798,11 @@ mod sealed {
|
|||
}
|
||||
|
||||
test_impl! { vec_vmal_ib(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [simd_mladd, vmalb ] }
|
||||
test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalh ] }
|
||||
test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalhw ] }
|
||||
test_impl! { vec_vmal_if(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [simd_mladd, vmalf ] }
|
||||
|
||||
test_impl! { vec_vmal_ub(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [simd_mladd, vmalb ] }
|
||||
test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalh ] }
|
||||
test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalhw ] }
|
||||
test_impl! { vec_vmal_uf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [simd_mladd, vmalf ] }
|
||||
|
||||
impl_mul!([VectorMladd vec_mladd] vec_vmal_ib (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char );
|
||||
|
|
|
|||
|
|
@ -1,64 +1,51 @@
|
|||
use crate::common::compile_c::CompilationCommandBuilder;
|
||||
use crate::common::gen_c::compile_c_programs;
|
||||
use crate::common::cli::ProcessedCli;
|
||||
use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation};
|
||||
|
||||
pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
|
||||
let cpp_compiler = config.cpp_compiler.as_ref()?;
|
||||
|
||||
pub fn compile_c_arm(
|
||||
intrinsics_name_list: &[String],
|
||||
compiler: &str,
|
||||
target: &str,
|
||||
cxx_toolchain_dir: Option<&str>,
|
||||
) -> bool {
|
||||
// -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
|
||||
let mut command = CompilationCommandBuilder::new()
|
||||
.add_arch_flags(vec!["armv8.6-a", "crypto", "crc", "dotprod", "fp16"])
|
||||
.set_compiler(compiler)
|
||||
.set_target(target)
|
||||
.set_compiler(cpp_compiler)
|
||||
.set_target(&config.target)
|
||||
.set_opt_level("2")
|
||||
.set_cxx_toolchain_dir(cxx_toolchain_dir)
|
||||
.set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref())
|
||||
.set_project_root("c_programs")
|
||||
.add_extra_flags(vec!["-ffp-contract=off", "-Wno-narrowing"]);
|
||||
|
||||
if !target.contains("v7") {
|
||||
if !config.target.contains("v7") {
|
||||
command = command.add_arch_flags(vec!["faminmax", "lut", "sha3"]);
|
||||
}
|
||||
|
||||
/*
|
||||
* clang++ cannot link an aarch64_be object file, so we invoke
|
||||
* aarch64_be-unknown-linux-gnu's C++ linker. This ensures that we
|
||||
* are testing the intrinsics against LLVM.
|
||||
*
|
||||
* Note: setting `--sysroot=<...>` which is the obvious thing to do
|
||||
* does not work as it gets caught up with `#include_next <stdlib.h>`
|
||||
* not existing...
|
||||
*/
|
||||
if target.contains("aarch64_be") {
|
||||
command = command
|
||||
.set_linker(
|
||||
cxx_toolchain_dir.unwrap_or("").to_string() + "/bin/aarch64_be-none-linux-gnu-g++",
|
||||
)
|
||||
.set_include_paths(vec![
|
||||
"/include",
|
||||
"/aarch64_be-none-linux-gnu/include",
|
||||
"/aarch64_be-none-linux-gnu/include/c++/14.2.1",
|
||||
"/aarch64_be-none-linux-gnu/include/c++/14.2.1/aarch64_be-none-linux-gnu",
|
||||
"/aarch64_be-none-linux-gnu/include/c++/14.2.1/backward",
|
||||
"/aarch64_be-none-linux-gnu/libc/usr/include",
|
||||
]);
|
||||
}
|
||||
|
||||
if !compiler.contains("clang") {
|
||||
if !cpp_compiler.contains("clang") {
|
||||
command = command.add_extra_flag("-flax-vector-conversions");
|
||||
}
|
||||
|
||||
let compiler_commands = intrinsics_name_list
|
||||
.iter()
|
||||
.map(|intrinsic_name| {
|
||||
command
|
||||
.clone()
|
||||
.set_input_name(intrinsic_name)
|
||||
.set_output_name(intrinsic_name)
|
||||
.make_string()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let mut cpp_compiler = command.into_cpp_compilation();
|
||||
|
||||
compile_c_programs(&compiler_commands)
|
||||
if config.target.contains("aarch64_be") {
|
||||
let Some(ref cxx_toolchain_dir) = config.cxx_toolchain_dir else {
|
||||
panic!(
|
||||
"target `{}` must specify `cxx_toolchain_dir`",
|
||||
config.target
|
||||
)
|
||||
};
|
||||
|
||||
cpp_compiler.command_mut().args([
|
||||
&format!("--sysroot={cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc"),
|
||||
"--include-directory",
|
||||
&format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1"),
|
||||
"--include-directory",
|
||||
&format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1/aarch64_be-none-linux-gnu"),
|
||||
"-L",
|
||||
&format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
|
||||
"-L",
|
||||
&format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/lib"),
|
||||
"-B",
|
||||
&format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
|
||||
]);
|
||||
}
|
||||
|
||||
Some(cpp_compiler)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -114,7 +114,6 @@ pub const AARCH_CONFIGURATIONS: &str = r#"
|
|||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_dotprod))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
|
||||
#![feature(fmt_helpers_for_derive)]
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use crate::common::argument::ArgumentList;
|
||||
use crate::common::indentation::Indentation;
|
||||
use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition};
|
||||
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, TypeKind};
|
||||
use std::ops::Deref;
|
||||
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ArmIntrinsicType(pub IntrinsicType);
|
||||
|
|
@ -15,6 +15,12 @@ impl Deref for ArmIntrinsicType {
|
|||
}
|
||||
}
|
||||
|
||||
impl DerefMut for ArmIntrinsicType {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl IntrinsicDefinition<ArmIntrinsicType> for Intrinsic<ArmIntrinsicType> {
|
||||
fn arguments(&self) -> ArgumentList<ArmIntrinsicType> {
|
||||
self.arguments.clone()
|
||||
|
|
@ -73,8 +79,9 @@ impl IntrinsicDefinition<ArmIntrinsicType> for Intrinsic<ArmIntrinsicType> {
|
|||
TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(),
|
||||
TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(),
|
||||
TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(),
|
||||
TypeKind::Int => format!("int{}_t", self.results().inner_size()),
|
||||
TypeKind::UInt => format!("uint{}_t", self.results().inner_size()),
|
||||
TypeKind::Int(Sign::Signed) => format!("int{}_t", self.results().inner_size()),
|
||||
TypeKind::Int(Sign::Unsigned) =>
|
||||
format!("uint{}_t", self.results().inner_size()),
|
||||
TypeKind::Poly => format!("poly{}_t", self.results().inner_size()),
|
||||
ty => todo!("print_result_c - Unknown type: {:#?}", ty),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ fn json_to_intrinsic(
|
|||
Ok(Intrinsic {
|
||||
name,
|
||||
arguments,
|
||||
results: *results,
|
||||
results: results,
|
||||
arch_tags: intr.architectures,
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,15 +4,20 @@ mod intrinsic;
|
|||
mod json_parser;
|
||||
mod types;
|
||||
|
||||
use std::fs::File;
|
||||
|
||||
use rayon::prelude::*;
|
||||
|
||||
use crate::arm::config::POLY128_OSTREAM_DEF;
|
||||
use crate::common::SupportedArchitectureTest;
|
||||
use crate::common::cli::ProcessedCli;
|
||||
use crate::common::compare::compare_outputs;
|
||||
use crate::common::gen_c::{write_main_cpp, write_mod_cpp};
|
||||
use crate::common::gen_rust::compile_rust_programs;
|
||||
use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition};
|
||||
use crate::common::intrinsic_helpers::TypeKind;
|
||||
use crate::common::write_file::{write_c_testfiles, write_rust_testfiles};
|
||||
use compile::compile_c_arm;
|
||||
use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, POLY128_OSTREAM_DEF, build_notices};
|
||||
use crate::common::write_file::write_rust_testfiles;
|
||||
use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, build_notices};
|
||||
use intrinsic::ArmIntrinsicType;
|
||||
use json_parser::get_neon_intrinsics;
|
||||
|
||||
|
|
@ -21,6 +26,13 @@ pub struct ArmArchitectureTest {
|
|||
cli_options: ProcessedCli,
|
||||
}
|
||||
|
||||
fn chunk_info(intrinsic_count: usize) -> (usize, usize) {
|
||||
let available_parallelism = std::thread::available_parallelism().unwrap().get();
|
||||
let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count));
|
||||
|
||||
(chunk_size, intrinsic_count.div_ceil(chunk_size))
|
||||
}
|
||||
|
||||
impl SupportedArchitectureTest for ArmArchitectureTest {
|
||||
fn create(cli_options: ProcessedCli) -> Box<Self> {
|
||||
let a32 = cli_options.target.contains("v7");
|
||||
|
|
@ -51,33 +63,58 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
|
|||
}
|
||||
|
||||
fn build_c_file(&self) -> bool {
|
||||
let compiler = self.cli_options.cpp_compiler.as_deref();
|
||||
let target = &self.cli_options.target;
|
||||
let cxx_toolchain_dir = self.cli_options.cxx_toolchain_dir.as_deref();
|
||||
let c_target = "aarch64";
|
||||
let platform_headers = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"];
|
||||
|
||||
let intrinsics_name_list = write_c_testfiles(
|
||||
&self
|
||||
.intrinsics
|
||||
.iter()
|
||||
.map(|i| i as &dyn IntrinsicDefinition<_>)
|
||||
.collect::<Vec<_>>(),
|
||||
target,
|
||||
let (chunk_size, chunk_count) = chunk_info(self.intrinsics.len());
|
||||
|
||||
let cpp_compiler = compile::build_cpp_compilation(&self.cli_options).unwrap();
|
||||
|
||||
let notice = &build_notices("// ");
|
||||
self.intrinsics
|
||||
.par_chunks(chunk_size)
|
||||
.enumerate()
|
||||
.map(|(i, chunk)| {
|
||||
let c_filename = format!("c_programs/mod_{i}.cpp");
|
||||
let mut file = File::create(&c_filename).unwrap();
|
||||
write_mod_cpp(&mut file, notice, c_target, platform_headers, chunk).unwrap();
|
||||
|
||||
// compile this cpp file into a .o file
|
||||
let output = cpp_compiler
|
||||
.compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?;
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.collect::<Result<(), std::io::Error>>()
|
||||
.unwrap();
|
||||
|
||||
let mut file = File::create("c_programs/main.cpp").unwrap();
|
||||
write_main_cpp(
|
||||
&mut file,
|
||||
c_target,
|
||||
&["arm_neon.h", "arm_acle.h", "arm_fp16.h"],
|
||||
&build_notices("// "),
|
||||
&[POLY128_OSTREAM_DEF],
|
||||
);
|
||||
POLY128_OSTREAM_DEF,
|
||||
self.intrinsics.iter().map(|i| i.name.as_str()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
match compiler {
|
||||
None => true,
|
||||
Some(compiler) => compile_c_arm(
|
||||
intrinsics_name_list.as_slice(),
|
||||
compiler,
|
||||
target,
|
||||
cxx_toolchain_dir,
|
||||
),
|
||||
}
|
||||
// compile this cpp file into a .o file
|
||||
info!("compiling main.cpp");
|
||||
let output = cpp_compiler
|
||||
.compile_object_file("main.cpp", "intrinsic-test-programs.o")
|
||||
.unwrap();
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
|
||||
let object_files = (0..chunk_count)
|
||||
.map(|i| format!("mod_{i}.o"))
|
||||
.chain(["intrinsic-test-programs.o".to_owned()]);
|
||||
|
||||
let output = cpp_compiler
|
||||
.link_executable(object_files, "intrinsic-test-programs")
|
||||
.unwrap();
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
fn build_rust_file(&self) -> bool {
|
||||
|
|
@ -104,7 +141,7 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
|
|||
}
|
||||
|
||||
fn compare_outputs(&self) -> bool {
|
||||
if let Some(ref toolchain) = self.cli_options.toolchain {
|
||||
if self.cli_options.toolchain.is_some() {
|
||||
let intrinsics_name_list = self
|
||||
.intrinsics
|
||||
.iter()
|
||||
|
|
@ -113,8 +150,7 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
|
|||
|
||||
compare_outputs(
|
||||
&intrinsics_name_list,
|
||||
toolchain,
|
||||
&self.cli_options.c_runner,
|
||||
&self.cli_options.runner,
|
||||
&self.cli_options.target,
|
||||
)
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use super::intrinsic::ArmIntrinsicType;
|
||||
use crate::common::cli::Language;
|
||||
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, TypeKind};
|
||||
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
|
||||
|
||||
impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
||||
/// Gets a string containing the typename for this type in C format.
|
||||
|
|
@ -73,8 +73,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
|||
format!(
|
||||
"vld{len}{quad}_{type}{size}",
|
||||
type = match k {
|
||||
TypeKind::UInt => "u",
|
||||
TypeKind::Int => "s",
|
||||
TypeKind::Int(Sign::Unsigned) => "u",
|
||||
TypeKind::Int(Sign::Signed) => "s",
|
||||
TypeKind::Float => "f",
|
||||
// The ACLE doesn't support 64-bit polynomial loads on Armv7
|
||||
// if armv7 and bl == 64, use "s", else "p"
|
||||
|
|
@ -107,8 +107,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
|||
format!(
|
||||
"vget{quad}_lane_{type}{size}",
|
||||
type = match k {
|
||||
TypeKind::UInt => "u",
|
||||
TypeKind::Int => "s",
|
||||
TypeKind::Int(Sign::Unsigned) => "u",
|
||||
TypeKind::Int(Sign::Signed) => "s",
|
||||
TypeKind::Float => "f",
|
||||
TypeKind::Poly => "p",
|
||||
x => todo!("get_load_function TypeKind: {:#?}", x),
|
||||
|
|
@ -121,7 +121,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
|||
}
|
||||
}
|
||||
|
||||
fn from_c(s: &str, target: &str) -> Result<Box<Self>, String> {
|
||||
fn from_c(s: &str, target: &str) -> Result<Self, String> {
|
||||
const CONST_STR: &str = "const";
|
||||
if let Some(s) = s.strip_suffix('*') {
|
||||
let (s, constant) = match s.trim().strip_suffix(CONST_STR) {
|
||||
|
|
@ -131,9 +131,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
|||
let s = s.trim_end();
|
||||
let temp_return = ArmIntrinsicType::from_c(s, target);
|
||||
temp_return.map(|mut op| {
|
||||
let edited = op.as_mut();
|
||||
edited.0.ptr = true;
|
||||
edited.0.ptr_constant = constant;
|
||||
op.ptr = true;
|
||||
op.ptr_constant = constant;
|
||||
op
|
||||
})
|
||||
} else {
|
||||
|
|
@ -163,7 +162,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
|||
),
|
||||
None => None,
|
||||
};
|
||||
Ok(Box::new(ArmIntrinsicType(IntrinsicType {
|
||||
Ok(ArmIntrinsicType(IntrinsicType {
|
||||
ptr: false,
|
||||
ptr_constant: false,
|
||||
constant,
|
||||
|
|
@ -172,14 +171,14 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
|||
simd_len,
|
||||
vec_len,
|
||||
target: target.to_string(),
|
||||
})))
|
||||
}))
|
||||
} else {
|
||||
let kind = start.parse::<TypeKind>()?;
|
||||
let bit_len = match kind {
|
||||
TypeKind::Int => Some(32),
|
||||
TypeKind::Int(_) => Some(32),
|
||||
_ => None,
|
||||
};
|
||||
Ok(Box::new(ArmIntrinsicType(IntrinsicType {
|
||||
Ok(ArmIntrinsicType(IntrinsicType {
|
||||
ptr: false,
|
||||
ptr_constant: false,
|
||||
constant,
|
||||
|
|
@ -188,7 +187,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
|||
simd_len: None,
|
||||
vec_len: None,
|
||||
target: target.to_string(),
|
||||
})))
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ where
|
|||
Argument {
|
||||
pos,
|
||||
name: String::from(var_name),
|
||||
ty: *ty,
|
||||
ty: ty,
|
||||
constraint,
|
||||
}
|
||||
}
|
||||
|
|
@ -125,19 +125,23 @@ where
|
|||
/// Creates a line for each argument that initializes an array for C from which `loads` argument
|
||||
/// values can be loaded as a sliding window.
|
||||
/// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2.
|
||||
pub fn gen_arglists_c(&self, indentation: Indentation, loads: u32) -> String {
|
||||
self.iter()
|
||||
.filter(|&arg| !arg.has_constraint())
|
||||
.map(|arg| {
|
||||
format!(
|
||||
"{indentation}const {ty} {name}_vals[] = {values};",
|
||||
ty = arg.ty.c_scalar_type(),
|
||||
name = arg.name,
|
||||
values = arg.ty.populate_random(indentation, loads, &Language::C)
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
pub fn gen_arglists_c(
|
||||
&self,
|
||||
w: &mut impl std::io::Write,
|
||||
indentation: Indentation,
|
||||
loads: u32,
|
||||
) -> std::io::Result<()> {
|
||||
for arg in self.iter().filter(|&arg| !arg.has_constraint()) {
|
||||
writeln!(
|
||||
w,
|
||||
"{indentation}const {ty} {name}_vals[] = {values};",
|
||||
ty = arg.ty.c_scalar_type(),
|
||||
name = arg.name,
|
||||
values = arg.ty.populate_random(indentation, loads, &Language::C)
|
||||
)?
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Creates a line for each argument that initializes an array for Rust from which `loads` argument
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ pub struct ProcessedCli {
|
|||
pub filename: PathBuf,
|
||||
pub toolchain: Option<String>,
|
||||
pub cpp_compiler: Option<String>,
|
||||
pub c_runner: String,
|
||||
pub runner: String,
|
||||
pub target: String,
|
||||
pub linker: Option<String>,
|
||||
pub cxx_toolchain_dir: Option<String>,
|
||||
|
|
@ -70,7 +70,7 @@ pub struct ProcessedCli {
|
|||
impl ProcessedCli {
|
||||
pub fn new(cli_options: Cli) -> Self {
|
||||
let filename = cli_options.input;
|
||||
let c_runner = cli_options.runner.unwrap_or_default();
|
||||
let runner = cli_options.runner.unwrap_or_default();
|
||||
let target = cli_options.target;
|
||||
let linker = cli_options.linker;
|
||||
let cxx_toolchain_dir = cli_options.cxx_toolchain_dir;
|
||||
|
|
@ -102,7 +102,7 @@ impl ProcessedCli {
|
|||
Self {
|
||||
toolchain,
|
||||
cpp_compiler,
|
||||
c_runner,
|
||||
runner,
|
||||
target,
|
||||
linker,
|
||||
cxx_toolchain_dir,
|
||||
|
|
|
|||
|
|
@ -2,27 +2,25 @@ use super::cli::FailureReason;
|
|||
use rayon::prelude::*;
|
||||
use std::process::Command;
|
||||
|
||||
pub fn compare_outputs(
|
||||
intrinsic_name_list: &Vec<String>,
|
||||
toolchain: &str,
|
||||
runner: &str,
|
||||
target: &str,
|
||||
) -> bool {
|
||||
pub fn compare_outputs(intrinsic_name_list: &Vec<String>, runner: &str, target: &str) -> bool {
|
||||
fn runner_command(runner: &str) -> Command {
|
||||
let mut it = runner.split_whitespace();
|
||||
let mut cmd = Command::new(it.next().unwrap());
|
||||
cmd.args(it);
|
||||
|
||||
cmd
|
||||
}
|
||||
|
||||
let intrinsics = intrinsic_name_list
|
||||
.par_iter()
|
||||
.filter_map(|intrinsic_name| {
|
||||
let c = Command::new("sh")
|
||||
.arg("-c")
|
||||
.arg(format!("{runner} ./c_programs/{intrinsic_name}"))
|
||||
let c = runner_command(runner)
|
||||
.arg("./c_programs/intrinsic-test-programs")
|
||||
.arg(intrinsic_name)
|
||||
.output();
|
||||
|
||||
let rust = Command::new("sh")
|
||||
.current_dir("rust_programs")
|
||||
.arg("-c")
|
||||
.arg(format!(
|
||||
"cargo {toolchain} run --target {target} --bin {intrinsic_name} --release",
|
||||
))
|
||||
.env("RUSTFLAGS", "-Cdebuginfo=0")
|
||||
let rust = runner_command(runner)
|
||||
.arg(format!("target/{target}/release/{intrinsic_name}"))
|
||||
.output();
|
||||
|
||||
let (c, rust) = match (c, rust) {
|
||||
|
|
@ -42,8 +40,8 @@ pub fn compare_outputs(
|
|||
if !rust.status.success() {
|
||||
error!(
|
||||
"Failed to run Rust program for intrinsic {intrinsic_name}\nstdout: {stdout}\nstderr: {stderr}",
|
||||
stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""),
|
||||
stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""),
|
||||
stdout = String::from_utf8_lossy(&rust.stdout),
|
||||
stderr = String::from_utf8_lossy(&rust.stderr),
|
||||
);
|
||||
return Some(FailureReason::RunRust(intrinsic_name.clone()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,11 +5,7 @@ pub struct CompilationCommandBuilder {
|
|||
cxx_toolchain_dir: Option<String>,
|
||||
arch_flags: Vec<String>,
|
||||
optimization: String,
|
||||
include_paths: Vec<String>,
|
||||
project_root: Option<String>,
|
||||
output: String,
|
||||
input: String,
|
||||
linker: Option<String>,
|
||||
extra_flags: Vec<String>,
|
||||
}
|
||||
|
||||
|
|
@ -21,11 +17,7 @@ impl CompilationCommandBuilder {
|
|||
cxx_toolchain_dir: None,
|
||||
arch_flags: Vec::new(),
|
||||
optimization: "2".to_string(),
|
||||
include_paths: Vec::new(),
|
||||
project_root: None,
|
||||
output: String::new(),
|
||||
input: String::new(),
|
||||
linker: None,
|
||||
extra_flags: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
|
@ -57,37 +49,12 @@ impl CompilationCommandBuilder {
|
|||
self
|
||||
}
|
||||
|
||||
/// Sets a list of include paths for compilation.
|
||||
/// The paths that are passed must be relative to the
|
||||
/// "cxx_toolchain_dir" directory path.
|
||||
pub fn set_include_paths(mut self, paths: Vec<&str>) -> Self {
|
||||
self.include_paths = paths.into_iter().map(|path| path.to_string()).collect();
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the root path of all the generated test files.
|
||||
pub fn set_project_root(mut self, path: &str) -> Self {
|
||||
self.project_root = Some(path.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// The name of the output executable, without any suffixes
|
||||
pub fn set_output_name(mut self, path: &str) -> Self {
|
||||
self.output = path.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
/// The name of the input C file, without any suffixes
|
||||
pub fn set_input_name(mut self, path: &str) -> Self {
|
||||
self.input = path.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_linker(mut self, linker: String) -> Self {
|
||||
self.linker = Some(linker);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_extra_flags(mut self, flags: Vec<&str>) -> Self {
|
||||
let mut flags: Vec<String> = flags.into_iter().map(|f| f.to_string()).collect();
|
||||
self.extra_flags.append(&mut flags);
|
||||
|
|
@ -100,55 +67,69 @@ impl CompilationCommandBuilder {
|
|||
}
|
||||
|
||||
impl CompilationCommandBuilder {
|
||||
pub fn make_string(self) -> String {
|
||||
let arch_flags = self.arch_flags.join("+");
|
||||
let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
|
||||
let project_root = self.project_root.unwrap_or_default();
|
||||
let project_root_str = project_root.as_str();
|
||||
let mut output = self.output.clone();
|
||||
if self.linker.is_some() {
|
||||
output += ".o"
|
||||
};
|
||||
let mut command = format!(
|
||||
"{} {flags} -march={arch_flags} \
|
||||
-O{} \
|
||||
-o {project_root}/{} \
|
||||
{project_root}/{}.cpp",
|
||||
self.compiler, self.optimization, output, self.input,
|
||||
);
|
||||
pub fn into_cpp_compilation(self) -> CppCompilation {
|
||||
let mut cpp_compiler = std::process::Command::new(self.compiler);
|
||||
|
||||
command = command + " " + self.extra_flags.join(" ").as_str();
|
||||
if let Some(project_root) = self.project_root {
|
||||
cpp_compiler.current_dir(project_root);
|
||||
}
|
||||
|
||||
let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
|
||||
cpp_compiler.args(flags.split_whitespace());
|
||||
|
||||
cpp_compiler.arg(format!("-march={}", self.arch_flags.join("+")));
|
||||
|
||||
cpp_compiler.arg(format!("-O{}", self.optimization));
|
||||
|
||||
cpp_compiler.args(self.extra_flags);
|
||||
|
||||
if let Some(target) = &self.target {
|
||||
command = command + " --target=" + target;
|
||||
cpp_compiler.arg(format!("--target={target}"));
|
||||
}
|
||||
|
||||
if let (Some(linker), Some(cxx_toolchain_dir)) = (&self.linker, &self.cxx_toolchain_dir) {
|
||||
let include_args = self
|
||||
.include_paths
|
||||
.iter()
|
||||
.map(|path| "--include-directory=".to_string() + cxx_toolchain_dir + path)
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
command = command
|
||||
+ " -c "
|
||||
+ include_args.as_str()
|
||||
+ " && "
|
||||
+ linker
|
||||
+ " "
|
||||
+ project_root_str
|
||||
+ "/"
|
||||
+ &output
|
||||
+ " -o "
|
||||
+ project_root_str
|
||||
+ "/"
|
||||
+ &self.output
|
||||
+ " && rm "
|
||||
+ project_root_str
|
||||
+ "/"
|
||||
+ &output;
|
||||
}
|
||||
command
|
||||
CppCompilation(cpp_compiler)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CppCompilation(std::process::Command);
|
||||
|
||||
fn clone_command(command: &std::process::Command) -> std::process::Command {
|
||||
let mut cmd = std::process::Command::new(command.get_program());
|
||||
if let Some(current_dir) = command.get_current_dir() {
|
||||
cmd.current_dir(current_dir);
|
||||
}
|
||||
cmd.args(command.get_args());
|
||||
|
||||
for (key, val) in command.get_envs() {
|
||||
cmd.env(key, val.unwrap_or_default());
|
||||
}
|
||||
|
||||
cmd
|
||||
}
|
||||
|
||||
impl CppCompilation {
|
||||
pub fn command_mut(&mut self) -> &mut std::process::Command {
|
||||
&mut self.0
|
||||
}
|
||||
|
||||
pub fn compile_object_file(
|
||||
&self,
|
||||
input: &str,
|
||||
output: &str,
|
||||
) -> std::io::Result<std::process::Output> {
|
||||
let mut cmd = clone_command(&self.0);
|
||||
cmd.args([input, "-c", "-o", output]);
|
||||
cmd.output()
|
||||
}
|
||||
|
||||
pub fn link_executable(
|
||||
&self,
|
||||
inputs: impl Iterator<Item = String>,
|
||||
output: &str,
|
||||
) -> std::io::Result<std::process::Output> {
|
||||
let mut cmd = clone_command(&self.0);
|
||||
cmd.args(inputs);
|
||||
cmd.args(["-o", output]);
|
||||
cmd.output()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,3 @@
|
|||
use itertools::Itertools;
|
||||
use rayon::prelude::*;
|
||||
use std::collections::BTreeMap;
|
||||
use std::process::Command;
|
||||
|
||||
use super::argument::Argument;
|
||||
use super::indentation::Indentation;
|
||||
use super::intrinsic::IntrinsicDefinition;
|
||||
|
|
@ -11,104 +6,16 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition;
|
|||
// The number of times each intrinsic will be called.
|
||||
const PASSES: u32 = 20;
|
||||
|
||||
// Formats the main C program template with placeholders
|
||||
pub fn format_c_main_template(
|
||||
notices: &str,
|
||||
header_files: &[&str],
|
||||
arch_identifier: &str,
|
||||
arch_specific_definitions: &[&str],
|
||||
arglists: &str,
|
||||
passes: &str,
|
||||
) -> String {
|
||||
format!(
|
||||
r#"{notices}{header_files}
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
template<typename T1, typename T2> T1 cast(T2 x) {{
|
||||
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
|
||||
T1 ret{{}};
|
||||
memcpy(&ret, &x, sizeof(T1));
|
||||
return ret;
|
||||
}}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, float16_t value) {{
|
||||
uint16_t temp = 0;
|
||||
memcpy(&temp, &value, sizeof(float16_t));
|
||||
std::stringstream ss;
|
||||
ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp;
|
||||
os << ss.str();
|
||||
return os;
|
||||
}}
|
||||
|
||||
#ifdef __{arch_identifier}__
|
||||
{arch_specific_definitions}
|
||||
#endif
|
||||
|
||||
{arglists}
|
||||
|
||||
int main(int argc, char **argv) {{
|
||||
{passes}
|
||||
return 0;
|
||||
}}"#,
|
||||
header_files = header_files
|
||||
.iter()
|
||||
.map(|header| format!("#include <{header}>"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n"),
|
||||
arch_specific_definitions = arch_specific_definitions.join("\n"),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn compile_c_programs(compiler_commands: &[String]) -> bool {
|
||||
compiler_commands
|
||||
.par_iter()
|
||||
.map(|compiler_command| {
|
||||
let output = Command::new("sh").arg("-c").arg(compiler_command).output();
|
||||
if let Ok(output) = output {
|
||||
if output.status.success() {
|
||||
true
|
||||
} else {
|
||||
error!(
|
||||
"Failed to compile code for intrinsics: \n\nstdout:\n{}\n\nstderr:\n{}",
|
||||
std::str::from_utf8(&output.stdout).unwrap_or(""),
|
||||
std::str::from_utf8(&output.stderr).unwrap_or("")
|
||||
);
|
||||
false
|
||||
}
|
||||
} else {
|
||||
error!("Command failed: {output:#?}");
|
||||
false
|
||||
}
|
||||
})
|
||||
.find_any(|x| !x)
|
||||
.is_none()
|
||||
}
|
||||
|
||||
// Creates directory structure and file path mappings
|
||||
pub fn setup_c_file_paths(identifiers: &Vec<String>) -> BTreeMap<&String, String> {
|
||||
let _ = std::fs::create_dir("c_programs");
|
||||
identifiers
|
||||
.par_iter()
|
||||
.map(|identifier| {
|
||||
let c_filename = format!(r#"c_programs/{identifier}.cpp"#);
|
||||
|
||||
(identifier, c_filename)
|
||||
})
|
||||
.collect::<BTreeMap<&String, String>>()
|
||||
}
|
||||
|
||||
pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
|
||||
w: &mut impl std::io::Write,
|
||||
intrinsic: &dyn IntrinsicDefinition<T>,
|
||||
indentation: Indentation,
|
||||
additional: &str,
|
||||
passes: u32,
|
||||
_target: &str,
|
||||
) -> String {
|
||||
) -> std::io::Result<()> {
|
||||
let body_indentation = indentation.nested();
|
||||
format!(
|
||||
writeln!(
|
||||
w,
|
||||
"{indentation}for (int i=0; i<{passes}; i++) {{\n\
|
||||
{loaded_args}\
|
||||
{body_indentation}auto __return_value = {intrinsic_call}({args});\n\
|
||||
|
|
@ -121,78 +28,172 @@ pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
|
|||
)
|
||||
}
|
||||
|
||||
pub fn generate_c_constraint_blocks<T: IntrinsicTypeDefinition>(
|
||||
pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>(
|
||||
w: &mut impl std::io::Write,
|
||||
intrinsic: &dyn IntrinsicDefinition<T>,
|
||||
indentation: Indentation,
|
||||
constraints: &[&Argument<T>],
|
||||
constraints: &mut (impl Iterator<Item = &'a Argument<T>> + Clone),
|
||||
name: String,
|
||||
target: &str,
|
||||
) -> String {
|
||||
if let Some((current, constraints)) = constraints.split_last() {
|
||||
let range = current
|
||||
.constraint
|
||||
.iter()
|
||||
.map(|c| c.to_range())
|
||||
.flat_map(|r| r.into_iter());
|
||||
) -> std::io::Result<()> {
|
||||
let Some(current) = constraints.next() else {
|
||||
return generate_c_test_loop(w, intrinsic, indentation, &name, PASSES);
|
||||
};
|
||||
|
||||
let body_indentation = indentation.nested();
|
||||
range
|
||||
.map(|i| {
|
||||
format!(
|
||||
"{indentation}{{\n\
|
||||
{body_indentation}{ty} {name} = {val};\n\
|
||||
{pass}\n\
|
||||
{indentation}}}",
|
||||
name = current.name,
|
||||
ty = current.ty.c_type(),
|
||||
val = i,
|
||||
pass = generate_c_constraint_blocks(
|
||||
intrinsic,
|
||||
body_indentation,
|
||||
constraints,
|
||||
format!("{name}-{i}"),
|
||||
target,
|
||||
)
|
||||
)
|
||||
})
|
||||
.join("\n")
|
||||
} else {
|
||||
generate_c_test_loop(intrinsic, indentation, &name, PASSES, target)
|
||||
let body_indentation = indentation.nested();
|
||||
for i in current.constraint.iter().flat_map(|c| c.to_range()) {
|
||||
let ty = current.ty.c_type();
|
||||
|
||||
writeln!(w, "{indentation}{{")?;
|
||||
writeln!(w, "{body_indentation}{ty} {} = {i};", current.name)?;
|
||||
|
||||
generate_c_constraint_blocks(
|
||||
w,
|
||||
intrinsic,
|
||||
body_indentation,
|
||||
&mut constraints.clone(),
|
||||
format!("{name}-{i}"),
|
||||
)?;
|
||||
|
||||
writeln!(w, "{indentation}}}")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Compiles C test programs using specified compiler
|
||||
pub fn create_c_test_program<T: IntrinsicTypeDefinition>(
|
||||
pub fn create_c_test_function<T: IntrinsicTypeDefinition>(
|
||||
w: &mut impl std::io::Write,
|
||||
intrinsic: &dyn IntrinsicDefinition<T>,
|
||||
header_files: &[&str],
|
||||
target: &str,
|
||||
c_target: &str,
|
||||
notices: &str,
|
||||
arch_specific_definitions: &[&str],
|
||||
) -> String {
|
||||
let arguments = intrinsic.arguments();
|
||||
let constraints = arguments
|
||||
.iter()
|
||||
.filter(|&i| i.has_constraint())
|
||||
.collect_vec();
|
||||
|
||||
) -> std::io::Result<()> {
|
||||
let indentation = Indentation::default();
|
||||
format_c_main_template(
|
||||
notices,
|
||||
header_files,
|
||||
c_target,
|
||||
arch_specific_definitions,
|
||||
intrinsic
|
||||
.arguments()
|
||||
.gen_arglists_c(indentation, PASSES)
|
||||
.as_str(),
|
||||
generate_c_constraint_blocks(
|
||||
intrinsic,
|
||||
indentation.nested(),
|
||||
constraints.as_slice(),
|
||||
Default::default(),
|
||||
target,
|
||||
)
|
||||
.as_str(),
|
||||
)
|
||||
|
||||
writeln!(w, "int run_{}() {{", intrinsic.name())?;
|
||||
|
||||
// Define the arrays of arguments.
|
||||
let arguments = intrinsic.arguments();
|
||||
arguments.gen_arglists_c(w, indentation.nested(), PASSES)?;
|
||||
|
||||
generate_c_constraint_blocks(
|
||||
w,
|
||||
intrinsic,
|
||||
indentation.nested(),
|
||||
&mut arguments.iter().rev().filter(|&i| i.has_constraint()),
|
||||
Default::default(),
|
||||
)?;
|
||||
|
||||
writeln!(w, " return 0;")?;
|
||||
writeln!(w, "}}")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_mod_cpp<T: IntrinsicTypeDefinition>(
|
||||
w: &mut impl std::io::Write,
|
||||
notice: &str,
|
||||
architecture: &str,
|
||||
platform_headers: &[&str],
|
||||
intrinsics: &[impl IntrinsicDefinition<T>],
|
||||
) -> std::io::Result<()> {
|
||||
write!(w, "{notice}")?;
|
||||
|
||||
for header in platform_headers {
|
||||
writeln!(w, "#include <{header}>")?;
|
||||
}
|
||||
|
||||
writeln!(
|
||||
w,
|
||||
r#"
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
template<typename T1, typename T2> T1 cast(T2 x) {{
|
||||
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
|
||||
T1 ret{{}};
|
||||
memcpy(&ret, &x, sizeof(T1));
|
||||
return ret;
|
||||
}}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, float16_t value);
|
||||
|
||||
|
||||
|
||||
"#
|
||||
)?;
|
||||
|
||||
writeln!(w, "#ifdef __{architecture}__")?;
|
||||
writeln!(
|
||||
w,
|
||||
"std::ostream& operator<<(std::ostream& os, poly128_t value);"
|
||||
)?;
|
||||
writeln!(w, "#endif")?;
|
||||
|
||||
for intrinsic in intrinsics {
|
||||
create_c_test_function(w, intrinsic)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_main_cpp<'a>(
|
||||
w: &mut impl std::io::Write,
|
||||
architecture: &str,
|
||||
arch_specific_definitions: &str,
|
||||
intrinsics: impl Iterator<Item = &'a str> + Clone,
|
||||
) -> std::io::Result<()> {
|
||||
writeln!(w, "#include <iostream>")?;
|
||||
writeln!(w, "#include <string>")?;
|
||||
|
||||
for header in ["arm_neon.h", "arm_acle.h", "arm_fp16.h"] {
|
||||
writeln!(w, "#include <{header}>")?;
|
||||
}
|
||||
|
||||
writeln!(
|
||||
w,
|
||||
r#"
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, float16_t value) {{
|
||||
uint16_t temp = 0;
|
||||
memcpy(&temp, &value, sizeof(float16_t));
|
||||
std::stringstream ss;
|
||||
ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp;
|
||||
os << ss.str();
|
||||
return os;
|
||||
}}
|
||||
"#
|
||||
)?;
|
||||
|
||||
writeln!(w, "#ifdef __{architecture}__")?;
|
||||
writeln!(w, "{arch_specific_definitions }")?;
|
||||
writeln!(w, "#endif")?;
|
||||
|
||||
for intrinsic in intrinsics.clone() {
|
||||
writeln!(w, "extern int run_{intrinsic}(void);")?;
|
||||
}
|
||||
|
||||
writeln!(w, "int main(int argc, char **argv) {{")?;
|
||||
writeln!(w, " std::string intrinsic_name = argv[1];")?;
|
||||
|
||||
writeln!(w, " if (false) {{")?;
|
||||
|
||||
for intrinsic in intrinsics {
|
||||
writeln!(w, " }} else if (intrinsic_name == \"{intrinsic}\") {{")?;
|
||||
writeln!(w, " return run_{intrinsic}();")?;
|
||||
}
|
||||
|
||||
writeln!(w, " }} else {{")?;
|
||||
writeln!(
|
||||
w,
|
||||
" std::cerr << \"Unknown command: \" << intrinsic_name << \"\\n\";"
|
||||
)?;
|
||||
writeln!(w, " return -1;")?;
|
||||
writeln!(w, " }}")?;
|
||||
|
||||
writeln!(w, "}}")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ use itertools::Itertools;
|
|||
use rayon::prelude::*;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::process::Command;
|
||||
|
||||
use super::argument::Argument;
|
||||
|
|
@ -23,8 +22,8 @@ pub fn format_rust_main_template(
|
|||
) -> String {
|
||||
format!(
|
||||
r#"{notices}#![feature(simd_ffi)]
|
||||
#![feature(link_llvm_intrinsics)]
|
||||
#![feature(f16)]
|
||||
#![allow(unused)]
|
||||
{configurations}
|
||||
{definitions}
|
||||
|
||||
|
|
@ -38,6 +37,42 @@ fn main() {{
|
|||
)
|
||||
}
|
||||
|
||||
fn write_cargo_toml(w: &mut impl std::io::Write, binaries: &[String]) -> std::io::Result<()> {
|
||||
writeln!(
|
||||
w,
|
||||
concat!(
|
||||
"[package]\n",
|
||||
"name = \"intrinsic-test-programs\"\n",
|
||||
"version = \"{version}\"\n",
|
||||
"authors = [{authors}]\n",
|
||||
"license = \"{license}\"\n",
|
||||
"edition = \"2018\"\n",
|
||||
"[workspace]\n",
|
||||
"[dependencies]\n",
|
||||
"core_arch = {{ path = \"../crates/core_arch\" }}",
|
||||
),
|
||||
version = env!("CARGO_PKG_VERSION"),
|
||||
authors = env!("CARGO_PKG_AUTHORS")
|
||||
.split(":")
|
||||
.format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))),
|
||||
license = env!("CARGO_PKG_LICENSE"),
|
||||
)?;
|
||||
|
||||
for binary in binaries {
|
||||
writeln!(
|
||||
w,
|
||||
concat!(
|
||||
"[[bin]]\n",
|
||||
"name = \"{binary}\"\n",
|
||||
"path = \"{binary}/main.rs\"\n",
|
||||
),
|
||||
binary = binary,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn compile_rust_programs(
|
||||
binaries: Vec<String>,
|
||||
toolchain: Option<&str>,
|
||||
|
|
@ -45,56 +80,20 @@ pub fn compile_rust_programs(
|
|||
linker: Option<&str>,
|
||||
) -> bool {
|
||||
let mut cargo = File::create("rust_programs/Cargo.toml").unwrap();
|
||||
cargo
|
||||
.write_all(
|
||||
format!(
|
||||
r#"[package]
|
||||
name = "intrinsic-test-programs"
|
||||
version = "{version}"
|
||||
authors = [{authors}]
|
||||
license = "{license}"
|
||||
edition = "2018"
|
||||
[workspace]
|
||||
[dependencies]
|
||||
core_arch = {{ path = "../crates/core_arch" }}
|
||||
{binaries}"#,
|
||||
version = env!("CARGO_PKG_VERSION"),
|
||||
authors = env!("CARGO_PKG_AUTHORS")
|
||||
.split(":")
|
||||
.format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))),
|
||||
license = env!("CARGO_PKG_LICENSE"),
|
||||
binaries = binaries
|
||||
.iter()
|
||||
.map(|binary| {
|
||||
format!(
|
||||
r#"[[bin]]
|
||||
name = "{binary}"
|
||||
path = "{binary}/main.rs""#,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
)
|
||||
.into_bytes()
|
||||
.as_slice(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let toolchain = match toolchain {
|
||||
None => return true,
|
||||
Some(t) => t,
|
||||
};
|
||||
write_cargo_toml(&mut cargo, &binaries).unwrap();
|
||||
|
||||
/* If there has been a linker explicitly set from the command line then
|
||||
* we want to set it via setting it in the RUSTFLAGS*/
|
||||
|
||||
let cargo_command = format!("cargo {toolchain} build --target {target} --release");
|
||||
let mut cargo_command = Command::new("cargo");
|
||||
cargo_command.current_dir("rust_programs");
|
||||
|
||||
let mut command = Command::new("sh");
|
||||
command
|
||||
.current_dir("rust_programs")
|
||||
.arg("-c")
|
||||
.arg(cargo_command);
|
||||
if let Some(toolchain) = toolchain {
|
||||
if !toolchain.is_empty() {
|
||||
cargo_command.arg(toolchain);
|
||||
}
|
||||
}
|
||||
cargo_command.args(["build", "--target", target, "--release"]);
|
||||
|
||||
let mut rust_flags = "-Cdebuginfo=0".to_string();
|
||||
if let Some(linker) = linker {
|
||||
|
|
@ -102,11 +101,11 @@ path = "{binary}/main.rs""#,
|
|||
rust_flags.push_str(linker);
|
||||
rust_flags.push_str(" -C link-args=-static");
|
||||
|
||||
command.env("CPPFLAGS", "-fuse-ld=lld");
|
||||
cargo_command.env("CPPFLAGS", "-fuse-ld=lld");
|
||||
}
|
||||
|
||||
command.env("RUSTFLAGS", rust_flags);
|
||||
let output = command.output();
|
||||
cargo_command.env("RUSTFLAGS", rust_flags);
|
||||
let output = cargo_command.output();
|
||||
|
||||
if let Ok(output) = output {
|
||||
if output.status.success() {
|
||||
|
|
|
|||
|
|
@ -8,14 +8,22 @@ use super::cli::Language;
|
|||
use super::indentation::Indentation;
|
||||
use super::values::value_for_array;
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
pub enum Sign {
|
||||
Signed,
|
||||
Unsigned,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
pub enum TypeKind {
|
||||
BFloat,
|
||||
Float,
|
||||
Int,
|
||||
UInt,
|
||||
Int(Sign),
|
||||
Char(Sign),
|
||||
Poly,
|
||||
Void,
|
||||
Mask,
|
||||
Vector,
|
||||
}
|
||||
|
||||
impl FromStr for TypeKind {
|
||||
|
|
@ -23,12 +31,17 @@ impl FromStr for TypeKind {
|
|||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"bfloat" => Ok(Self::BFloat),
|
||||
"float" => Ok(Self::Float),
|
||||
"int" => Ok(Self::Int),
|
||||
"bfloat" | "BF16" => Ok(Self::BFloat),
|
||||
"float" | "double" | "FP16" | "FP32" | "FP64" => Ok(Self::Float),
|
||||
"int" | "long" | "short" | "SI8" | "SI16" | "SI32" | "SI64" => {
|
||||
Ok(Self::Int(Sign::Signed))
|
||||
}
|
||||
"poly" => Ok(Self::Poly),
|
||||
"uint" | "unsigned" => Ok(Self::UInt),
|
||||
"char" => Ok(Self::Char(Sign::Signed)),
|
||||
"uint" | "unsigned" | "UI8" | "UI16" | "UI32" | "UI64" => Ok(Self::Int(Sign::Unsigned)),
|
||||
"void" => Ok(Self::Void),
|
||||
"MASK" => Ok(Self::Mask),
|
||||
"M64" | "M128" | "M256" | "M512" => Ok(Self::Vector),
|
||||
_ => Err(format!("Impossible to parse argument kind {s}")),
|
||||
}
|
||||
}
|
||||
|
|
@ -42,10 +55,14 @@ impl fmt::Display for TypeKind {
|
|||
match self {
|
||||
Self::BFloat => "bfloat",
|
||||
Self::Float => "float",
|
||||
Self::Int => "int",
|
||||
Self::UInt => "uint",
|
||||
Self::Int(Sign::Signed) => "int",
|
||||
Self::Int(Sign::Unsigned) => "uint",
|
||||
Self::Poly => "poly",
|
||||
Self::Void => "void",
|
||||
Self::Char(Sign::Signed) => "char",
|
||||
Self::Char(Sign::Unsigned) => "unsigned char",
|
||||
Self::Mask => "mask",
|
||||
Self::Vector => "vector",
|
||||
}
|
||||
)
|
||||
}
|
||||
|
|
@ -56,9 +73,10 @@ impl TypeKind {
|
|||
pub fn c_prefix(&self) -> &str {
|
||||
match self {
|
||||
Self::Float => "float",
|
||||
Self::Int => "int",
|
||||
Self::UInt => "uint",
|
||||
Self::Int(Sign::Signed) => "int",
|
||||
Self::Int(Sign::Unsigned) => "uint",
|
||||
Self::Poly => "poly",
|
||||
Self::Char(Sign::Signed) => "char",
|
||||
_ => unreachable!("Not used: {:#?}", self),
|
||||
}
|
||||
}
|
||||
|
|
@ -66,10 +84,13 @@ impl TypeKind {
|
|||
/// Gets the rust prefix for the type kind i.e. i, u, f.
|
||||
pub fn rust_prefix(&self) -> &str {
|
||||
match self {
|
||||
Self::BFloat => "bf",
|
||||
Self::Float => "f",
|
||||
Self::Int => "i",
|
||||
Self::UInt => "u",
|
||||
Self::Int(Sign::Signed) => "i",
|
||||
Self::Int(Sign::Unsigned) => "u",
|
||||
Self::Poly => "u",
|
||||
Self::Char(Sign::Unsigned) => "u",
|
||||
Self::Char(Sign::Signed) => "i",
|
||||
_ => unreachable!("Unused type kind: {:#?}", self),
|
||||
}
|
||||
}
|
||||
|
|
@ -133,11 +154,14 @@ impl IntrinsicType {
|
|||
}
|
||||
|
||||
pub fn c_scalar_type(&self) -> String {
|
||||
format!(
|
||||
"{prefix}{bits}_t",
|
||||
prefix = self.kind().c_prefix(),
|
||||
bits = self.inner_size()
|
||||
)
|
||||
match self.kind() {
|
||||
TypeKind::Char(_) => String::from("char"),
|
||||
_ => format!(
|
||||
"{prefix}{bits}_t",
|
||||
prefix = self.kind().c_prefix(),
|
||||
bits = self.inner_size()
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rust_scalar_type(&self) -> String {
|
||||
|
|
@ -155,8 +179,8 @@ impl IntrinsicType {
|
|||
bit_len: Some(8),
|
||||
..
|
||||
} => match kind {
|
||||
TypeKind::Int => "(int)",
|
||||
TypeKind::UInt => "(unsigned int)",
|
||||
TypeKind::Int(Sign::Signed) => "(int)",
|
||||
TypeKind::Int(Sign::Unsigned) => "(unsigned int)",
|
||||
TypeKind::Poly => "(unsigned int)(uint8_t)",
|
||||
_ => "",
|
||||
},
|
||||
|
|
@ -172,6 +196,21 @@ impl IntrinsicType {
|
|||
128 => "",
|
||||
_ => panic!("invalid bit_len"),
|
||||
},
|
||||
IntrinsicType {
|
||||
kind: TypeKind::Float,
|
||||
bit_len: Some(bit_len),
|
||||
..
|
||||
} => match bit_len {
|
||||
16 => "(float16_t)",
|
||||
32 => "(float)",
|
||||
64 => "(double)",
|
||||
128 => "",
|
||||
_ => panic!("invalid bit_len"),
|
||||
},
|
||||
IntrinsicType {
|
||||
kind: TypeKind::Char(_),
|
||||
..
|
||||
} => "(char)",
|
||||
_ => "",
|
||||
}
|
||||
}
|
||||
|
|
@ -185,7 +224,7 @@ impl IntrinsicType {
|
|||
match self {
|
||||
IntrinsicType {
|
||||
bit_len: Some(bit_len @ (8 | 16 | 32 | 64)),
|
||||
kind: kind @ (TypeKind::Int | TypeKind::UInt | TypeKind::Poly),
|
||||
kind: kind @ (TypeKind::Int(_) | TypeKind::Poly | TypeKind::Char(_)),
|
||||
simd_len,
|
||||
vec_len,
|
||||
..
|
||||
|
|
@ -201,7 +240,8 @@ impl IntrinsicType {
|
|||
.format_with(",\n", |i, fmt| {
|
||||
let src = value_for_array(*bit_len, i);
|
||||
assert!(src == 0 || src.ilog2() < *bit_len);
|
||||
if *kind == TypeKind::Int && (src >> (*bit_len - 1)) != 0 {
|
||||
if *kind == TypeKind::Int(Sign::Signed) && (src >> (*bit_len - 1)) != 0
|
||||
{
|
||||
// `src` is a two's complement representation of a negative value.
|
||||
let mask = !0u64 >> (64 - *bit_len);
|
||||
let ones_compl = src ^ mask;
|
||||
|
|
@ -257,7 +297,7 @@ impl IntrinsicType {
|
|||
..
|
||||
} => false,
|
||||
IntrinsicType {
|
||||
kind: TypeKind::Int | TypeKind::UInt | TypeKind::Poly,
|
||||
kind: TypeKind::Int(_) | TypeKind::Poly,
|
||||
..
|
||||
} => true,
|
||||
_ => unimplemented!(),
|
||||
|
|
@ -282,7 +322,9 @@ pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {
|
|||
fn get_lane_function(&self) -> String;
|
||||
|
||||
/// can be implemented in an `impl` block
|
||||
fn from_c(_s: &str, _target: &str) -> Result<Box<Self>, String>;
|
||||
fn from_c(_s: &str, _target: &str) -> Result<Self, String>
|
||||
where
|
||||
Self: Sized;
|
||||
|
||||
/// Gets a string containing the typename for this type in C format.
|
||||
/// can be directly defined in `impl` blocks
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
use super::gen_c::create_c_test_program;
|
||||
use super::gen_c::setup_c_file_paths;
|
||||
use super::gen_rust::{create_rust_test_program, setup_rust_file_paths};
|
||||
use super::intrinsic::IntrinsicDefinition;
|
||||
use super::intrinsic_helpers::IntrinsicTypeDefinition;
|
||||
|
|
@ -11,37 +9,6 @@ pub fn write_file(filename: &String, code: String) {
|
|||
file.write_all(code.into_bytes().as_slice()).unwrap();
|
||||
}
|
||||
|
||||
pub fn write_c_testfiles<T: IntrinsicTypeDefinition + Sized>(
|
||||
intrinsics: &Vec<&dyn IntrinsicDefinition<T>>,
|
||||
target: &str,
|
||||
c_target: &str,
|
||||
headers: &[&str],
|
||||
notice: &str,
|
||||
arch_specific_definitions: &[&str],
|
||||
) -> Vec<String> {
|
||||
let intrinsics_name_list = intrinsics
|
||||
.iter()
|
||||
.map(|i| i.name().clone())
|
||||
.collect::<Vec<_>>();
|
||||
let filename_mapping = setup_c_file_paths(&intrinsics_name_list);
|
||||
|
||||
intrinsics.iter().for_each(|&i| {
|
||||
let c_code = create_c_test_program(
|
||||
i,
|
||||
headers,
|
||||
target,
|
||||
c_target,
|
||||
notice,
|
||||
arch_specific_definitions,
|
||||
);
|
||||
if let Some(filename) = filename_mapping.get(&i.name()) {
|
||||
write_file(filename, c_code)
|
||||
};
|
||||
});
|
||||
|
||||
intrinsics_name_list
|
||||
}
|
||||
|
||||
pub fn write_rust_testfiles<T: IntrinsicTypeDefinition>(
|
||||
intrinsics: Vec<&dyn IntrinsicDefinition<T>>,
|
||||
rust_target: &str,
|
||||
|
|
|
|||
|
|
@ -30,12 +30,15 @@ fn main() {
|
|||
|
||||
let test_environment = test_environment_result.unwrap();
|
||||
|
||||
info!("building C binaries");
|
||||
if !test_environment.build_c_file() {
|
||||
std::process::exit(2);
|
||||
}
|
||||
info!("building Rust binaries");
|
||||
if !test_environment.build_rust_file() {
|
||||
std::process::exit(3);
|
||||
}
|
||||
info!("comaparing outputs");
|
||||
if !test_environment.compare_outputs() {
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,12 +57,12 @@ pub fn simd_test(
|
|||
.unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"))
|
||||
{
|
||||
"i686" | "x86_64" | "i586" => "is_x86_feature_detected",
|
||||
"arm" | "armv7" => "is_arm_feature_detected",
|
||||
"arm" | "armv7" | "thumbv7neon" => "is_arm_feature_detected",
|
||||
"aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected",
|
||||
maybe_riscv if maybe_riscv.starts_with("riscv") => "is_riscv_feature_detected",
|
||||
"powerpc" | "powerpcle" => "is_powerpc_feature_detected",
|
||||
"powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected",
|
||||
"loongarch64" => "is_loongarch_feature_detected",
|
||||
"loongarch32" | "loongarch64" => "is_loongarch_feature_detected",
|
||||
"s390x" => "is_s390x_feature_detected",
|
||||
t => panic!("unknown target: {t}"),
|
||||
};
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ crate from working on applications in which `std` is not available.
|
|||
application.
|
||||
|
||||
* Linux/Android:
|
||||
* `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch64`, `s390x`:
|
||||
* `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch{32,64}`, `s390x`:
|
||||
`std_detect` supports these on Linux by querying ELF auxiliary vectors (using `getauxval`
|
||||
when available), and if that fails, by querying `/proc/self/auxv`.
|
||||
* `arm64`: partial support for doing run-time feature detection by directly
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
features! {
|
||||
@TARGET: loongarch;
|
||||
@CFG: target_arch = "loongarch64";
|
||||
@CFG: any(target_arch = "loongarch32", target_arch = "loongarch64");
|
||||
@MACRO_NAME: is_loongarch_feature_detected;
|
||||
@MACRO_ATTRS:
|
||||
/// Checks if `loongarch` feature is enabled.
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ cfg_if! {
|
|||
} else if #[cfg(target_arch = "mips64")] {
|
||||
#[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")]
|
||||
pub use mips64::*;
|
||||
} else if #[cfg(target_arch = "loongarch64")] {
|
||||
} else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] {
|
||||
#[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")]
|
||||
pub use loongarch::*;
|
||||
} else if #[cfg(target_arch = "s390x")] {
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
|
|||
target_arch = "powerpc64",
|
||||
target_arch = "mips",
|
||||
target_arch = "mips64",
|
||||
target_arch = "loongarch32",
|
||||
target_arch = "loongarch64",
|
||||
target_arch = "s390x",
|
||||
))] {
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
|
|||
target_arch = "riscv64",
|
||||
target_arch = "mips",
|
||||
target_arch = "mips64",
|
||||
target_arch = "loongarch32",
|
||||
target_arch = "loongarch64",
|
||||
))]
|
||||
{
|
||||
|
|
@ -182,6 +183,7 @@ fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> {
|
|||
target_arch = "riscv64",
|
||||
target_arch = "mips",
|
||||
target_arch = "mips64",
|
||||
target_arch = "loongarch32",
|
||||
target_arch = "loongarch64",
|
||||
))]
|
||||
{
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ cfg_if::cfg_if! {
|
|||
} else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] {
|
||||
mod powerpc;
|
||||
pub(crate) use self::powerpc::detect_features;
|
||||
} else if #[cfg(target_arch = "loongarch64")] {
|
||||
} else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] {
|
||||
mod loongarch;
|
||||
pub(crate) use self::loongarch::detect_features;
|
||||
} else if #[cfg(target_arch = "s390x")] {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
target_arch = "s390x",
|
||||
target_arch = "riscv32",
|
||||
target_arch = "riscv64",
|
||||
target_arch = "loongarch32",
|
||||
target_arch = "loongarch64"
|
||||
),
|
||||
feature(stdarch_internal)
|
||||
|
|
@ -30,7 +31,7 @@
|
|||
feature(stdarch_riscv_feature_detection)
|
||||
)]
|
||||
#![cfg_attr(
|
||||
target_arch = "loongarch64",
|
||||
any(target_arch = "loongarch32", target_arch = "loongarch64"),
|
||||
feature(stdarch_loongarch_feature_detection)
|
||||
)]
|
||||
|
||||
|
|
@ -45,6 +46,7 @@
|
|||
target_arch = "s390x",
|
||||
target_arch = "riscv32",
|
||||
target_arch = "riscv64",
|
||||
target_arch = "loongarch32",
|
||||
target_arch = "loongarch64"
|
||||
))]
|
||||
#[macro_use]
|
||||
|
|
@ -65,8 +67,8 @@ fn aarch64() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "loongarch64")]
|
||||
fn loongarch64() {
|
||||
#[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))]
|
||||
fn loongarch() {
|
||||
let _ = is_loongarch_feature_detected!("lsx");
|
||||
let _ = is_loongarch_feature_detected!("lsx",);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ edition = "2024"
|
|||
|
||||
[dependencies]
|
||||
itertools = "0.14.0"
|
||||
lazy_static = "1.4.0"
|
||||
proc-macro2 = "1.0"
|
||||
quote = "1.0"
|
||||
regex = "1.5"
|
||||
|
|
|
|||
|
|
@ -187,7 +187,7 @@ intrinsics:
|
|||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [sabdl]
|
||||
assert_instr: [sabdl2]
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x16_t, int16x8_t, int8x8_t, uint8x8_t]
|
||||
|
|
@ -230,7 +230,7 @@ intrinsics:
|
|||
- stable
|
||||
- - 'feature = "neon_intrinsics"'
|
||||
- 'since = "1.59.0"'
|
||||
assert_instr: [sabdl]
|
||||
assert_instr: [sabdl2]
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int32x4_t, int16x4_t, uint16x4_t]
|
||||
|
|
@ -273,7 +273,7 @@ intrinsics:
|
|||
- stable
|
||||
- - 'feature = "neon_intrinsics"'
|
||||
- 'since = "1.59.0"'
|
||||
assert_instr: [sabdl]
|
||||
assert_instr: [sabdl2]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int64x2_t, int32x2_t, uint32x2_t]
|
||||
|
|
@ -1462,7 +1462,7 @@ intrinsics:
|
|||
arguments: ["a: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
safety: safe
|
||||
types:
|
||||
|
|
@ -1530,7 +1530,7 @@ intrinsics:
|
|||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
safety: safe
|
||||
types:
|
||||
|
|
@ -1582,7 +1582,7 @@ intrinsics:
|
|||
arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
safety: safe
|
||||
types:
|
||||
|
|
@ -5147,7 +5147,7 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
assert_instr: [pmull]
|
||||
assert_instr: [pmull2]
|
||||
types:
|
||||
- [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t]
|
||||
compose:
|
||||
|
|
@ -5169,7 +5169,7 @@ intrinsics:
|
|||
- *neon-aes
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
assert_instr: [pmull]
|
||||
assert_instr: [pmull2]
|
||||
types:
|
||||
- [poly64x2_t, "p128"]
|
||||
compose:
|
||||
|
|
@ -5741,7 +5741,7 @@ intrinsics:
|
|||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [ssubw]
|
||||
assert_instr: [ssubw2]
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
|
|
@ -5762,7 +5762,7 @@ intrinsics:
|
|||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [usubw]
|
||||
assert_instr: [usubw2]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
|
|
@ -5783,7 +5783,7 @@ intrinsics:
|
|||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [ssubl]
|
||||
assert_instr: [ssubl2]
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t]
|
||||
|
|
@ -5813,7 +5813,7 @@ intrinsics:
|
|||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [usubl]
|
||||
assert_instr: [usubl2]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t]
|
||||
|
|
@ -6580,7 +6580,6 @@ intrinsics:
|
|||
arch: aarch64,arm64ec
|
||||
|
||||
|
||||
|
||||
- name: "vmaxnm{neon_type.no}"
|
||||
doc: Floating-point Maximum Number (vector)
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
|
|
@ -6592,11 +6591,7 @@ intrinsics:
|
|||
- float64x1_t
|
||||
- float64x2_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fmaxnm.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_fmax, [a, b]]
|
||||
|
||||
|
||||
- name: "vmaxnmh_{type}"
|
||||
|
|
@ -6611,11 +6606,7 @@ intrinsics:
|
|||
types:
|
||||
- f16
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vmaxh.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fmaxnm.{type}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: ["f16::max", [a, b]]
|
||||
|
||||
|
||||
- name: "vminnmh_{type}"
|
||||
|
|
@ -6630,11 +6621,7 @@ intrinsics:
|
|||
types:
|
||||
- f16
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vminh.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fminnm.{type}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: ["f16::min", [a, b]]
|
||||
|
||||
|
||||
- name: "vmaxnmv{neon_type[0].no}"
|
||||
|
|
@ -6648,11 +6635,7 @@ intrinsics:
|
|||
- [float32x2_t, f32]
|
||||
- [float64x2_t, f64]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fmaxnmv.{neon_type[0]}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_max, [a]]
|
||||
|
||||
- name: "vmaxnmv{neon_type[0].no}"
|
||||
doc: Floating-point maximum number across vector
|
||||
|
|
@ -6664,11 +6647,7 @@ intrinsics:
|
|||
types:
|
||||
- [float32x4_t, f32]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fmaxnmv.{neon_type[0]}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_max, [a]]
|
||||
|
||||
|
||||
- name: "vmaxnmv{neon_type[0].no}"
|
||||
|
|
@ -6684,11 +6663,7 @@ intrinsics:
|
|||
- [float16x4_t, f16]
|
||||
- [float16x8_t, f16]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fmaxnmv.{neon_type[0]}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_max, [a]]
|
||||
|
||||
|
||||
- name: "vminnmv{neon_type[0].no}"
|
||||
|
|
@ -6704,11 +6679,7 @@ intrinsics:
|
|||
- [float16x4_t, f16]
|
||||
- [float16x8_t, f16]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fminnmv.{neon_type[0]}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_min, [a]]
|
||||
|
||||
|
||||
- name: "vmaxv{neon_type[0].no}"
|
||||
|
|
@ -6814,11 +6785,7 @@ intrinsics:
|
|||
- float64x1_t
|
||||
- float64x2_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fminnm.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fminnm.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_fmin, [a, b]]
|
||||
|
||||
- name: "vminnmv{neon_type[0].no}"
|
||||
doc: "Floating-point minimum number across vector"
|
||||
|
|
@ -6832,11 +6799,7 @@ intrinsics:
|
|||
- [float32x2_t, "f32"]
|
||||
- [float64x2_t, "f64"]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vminnmv.{neon_type[0]}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_min, [a]]
|
||||
|
||||
- name: "vminnmv{neon_type[0].no}"
|
||||
doc: "Floating-point minimum number across vector"
|
||||
|
|
@ -6849,11 +6812,7 @@ intrinsics:
|
|||
types:
|
||||
- [float32x4_t, "f32"]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vminnmv.{neon_type[0]}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_min, [a]]
|
||||
|
||||
- name: "vmovl_high{neon_type[0].noq}"
|
||||
doc: Vector move
|
||||
|
|
@ -9950,7 +9909,7 @@ intrinsics:
|
|||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal]]}]]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
|
|
@ -9977,7 +9936,7 @@ intrinsics:
|
|||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal]]}]]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t]
|
||||
|
|
@ -11386,7 +11345,7 @@ intrinsics:
|
|||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl]]}]]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
safety: safe
|
||||
types:
|
||||
|
|
@ -13023,6 +12982,26 @@ intrinsics:
|
|||
- link: "llvm.aarch64.crc32cx"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vabsd_s64"
|
||||
doc: "Absolute Value (wrapping)."
|
||||
arguments: ["a: {type[1]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
assert_instr: [abs]
|
||||
safety: safe
|
||||
types:
|
||||
- [i64, i64]
|
||||
compose:
|
||||
# This is behaviorally equivalent to `i64::wrapping_abs`, but keeps the value in a SIMD
|
||||
# register. That can be beneficial when combined with other instructions. This LLVM
|
||||
# issue provides some extra context https://github.com/llvm/llvm-project/issues/148388.
|
||||
- LLVMLink:
|
||||
name: "vabsd_s64"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.abs.i64"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "{type[0]}"
|
||||
doc: "Absolute Value (wrapping)."
|
||||
arguments: ["a: {type[1]}"]
|
||||
|
|
@ -13032,15 +13011,18 @@ intrinsics:
|
|||
assert_instr: [abs]
|
||||
safety: safe
|
||||
types:
|
||||
- ['vabsd_s64', i64, i64]
|
||||
- ['vabs_s64', int64x1_t, v1i64]
|
||||
- ['vabsq_s64', int64x2_t, v2i64]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "{type[0]}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.abs.{type[2]}"
|
||||
arch: aarch64,arm64ec
|
||||
- Let:
|
||||
- neg
|
||||
- "{type[1]}"
|
||||
- FnCall: [simd_neg, [a]]
|
||||
- Let:
|
||||
- mask
|
||||
- "{type[1]}"
|
||||
- FnCall: [simd_ge, [a, neg]]
|
||||
- FnCall: [simd_select, [mask, a, neg]]
|
||||
|
||||
- name: "vuqadd{neon_type[0].no}"
|
||||
doc: "Signed saturating Accumulate of Unsigned value."
|
||||
|
|
@ -13142,11 +13124,7 @@ intrinsics:
|
|||
types:
|
||||
- [int64x2_t, i64]
|
||||
compose:
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- "vaddvq_u64"
|
||||
- - FnCall: [transmute, [a]]
|
||||
- FnCall: [simd_reduce_add_unordered, [a]]
|
||||
|
||||
- name: "vpaddd_u64"
|
||||
doc: "Add pairwise"
|
||||
|
|
@ -13159,7 +13137,7 @@ intrinsics:
|
|||
types:
|
||||
- [uint64x2_t, u64]
|
||||
compose:
|
||||
- FnCall: [vaddvq_u64, [a]]
|
||||
- FnCall: [simd_reduce_add_unordered, [a]]
|
||||
|
||||
- name: "vaddv{neon_type[0].no}"
|
||||
doc: "Add across vector"
|
||||
|
|
@ -13176,11 +13154,7 @@ intrinsics:
|
|||
- [int16x8_t, i16]
|
||||
- [int32x4_t, i32]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vaddv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.saddv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_add_unordered, [a]]
|
||||
|
||||
- name: "vaddv{neon_type[0].no}"
|
||||
doc: "Add across vector"
|
||||
|
|
@ -13193,11 +13167,7 @@ intrinsics:
|
|||
types:
|
||||
- [int32x2_t, i32]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vaddv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.saddv.i32.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_add_unordered, [a]]
|
||||
|
||||
- name: "vaddv{neon_type[0].no}"
|
||||
doc: "Add across vector"
|
||||
|
|
@ -13210,11 +13180,7 @@ intrinsics:
|
|||
types:
|
||||
- [int64x2_t, i64]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vaddv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.saddv.i64.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_add_unordered, [a]]
|
||||
|
||||
- name: "vaddv{neon_type[0].no}"
|
||||
doc: "Add across vector"
|
||||
|
|
@ -13231,11 +13197,7 @@ intrinsics:
|
|||
- [uint16x8_t, u16]
|
||||
- [uint32x4_t, u32]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vaddv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.uaddv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_add_unordered, [a]]
|
||||
|
||||
- name: "vaddv{neon_type[0].no}"
|
||||
doc: "Add across vector"
|
||||
|
|
@ -13248,11 +13210,7 @@ intrinsics:
|
|||
types:
|
||||
- [uint32x2_t, u32, i32]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vaddv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_add_unordered, [a]]
|
||||
|
||||
- name: "vaddv{neon_type[0].no}"
|
||||
doc: "Add across vector"
|
||||
|
|
@ -13265,11 +13223,7 @@ intrinsics:
|
|||
types:
|
||||
- [uint64x2_t, u64, i64]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vaddv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_add_unordered, [a]]
|
||||
|
||||
- name: "vaddlv{neon_type[0].no}"
|
||||
doc: "Signed Add Long across Vector"
|
||||
|
|
@ -13327,11 +13281,7 @@ intrinsics:
|
|||
- [int16x8_t, i16, 'smaxv']
|
||||
- [int32x4_t, i32, 'smaxv']
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vmaxv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.smaxv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_max, [a]]
|
||||
|
||||
- name: "vmaxv{neon_type[0].no}"
|
||||
doc: "Horizontal vector max."
|
||||
|
|
@ -13349,11 +13299,7 @@ intrinsics:
|
|||
- [uint16x8_t, u16, 'umaxv']
|
||||
- [uint32x4_t, u32, 'umaxv']
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vmaxv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.umaxv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_max, [a]]
|
||||
|
||||
- name: "vmaxv{neon_type[0].no}"
|
||||
doc: "Horizontal vector max."
|
||||
|
|
@ -13390,11 +13336,7 @@ intrinsics:
|
|||
- [int16x8_t, i16, 'sminv']
|
||||
- [int32x4_t, i32, 'sminv']
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vminv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.sminv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_min, [a]]
|
||||
|
||||
- name: "vminv{neon_type[0].no}"
|
||||
doc: "Horizontal vector min."
|
||||
|
|
@ -13412,11 +13354,7 @@ intrinsics:
|
|||
- [uint16x8_t, u16, 'uminv']
|
||||
- [uint32x4_t, u32, 'uminv']
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vminv{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.uminv.{type[1]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_reduce_min, [a]]
|
||||
|
||||
- name: "vminv{neon_type[0].no}"
|
||||
doc: "Horizontal vector min."
|
||||
|
|
|
|||
|
|
@ -7135,13 +7135,8 @@ intrinsics:
|
|||
- int32x2_t
|
||||
- int32x4_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "smax.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.arm.neon.vmaxs.{neon_type}"
|
||||
arch: arm
|
||||
- link: "llvm.aarch64.neon.smax.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- Let: [mask, "{neon_type}", {FnCall: [simd_ge, [a, b]]}]
|
||||
- FnCall: [simd_select, [mask, a, b]]
|
||||
|
||||
- name: "vmax{neon_type.no}"
|
||||
doc: Maximum (vector)
|
||||
|
|
@ -7162,13 +7157,8 @@ intrinsics:
|
|||
- uint32x2_t
|
||||
- uint32x4_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "smax.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.arm.neon.vmaxu.{neon_type}"
|
||||
arch: arm
|
||||
- link: "llvm.aarch64.neon.umax.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- Let: [mask, "{neon_type}", {FnCall: [simd_ge, [a, b]]}]
|
||||
- FnCall: [simd_select, [mask, a, b]]
|
||||
|
||||
- name: "vmax{neon_type.no}"
|
||||
doc: Maximum (vector)
|
||||
|
|
@ -7233,13 +7223,7 @@ intrinsics:
|
|||
- float32x2_t
|
||||
- float32x4_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fmaxnm.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.arm.neon.vmaxnm.{neon_type}"
|
||||
arch: arm
|
||||
- link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_fmax, [a, b]]
|
||||
|
||||
|
||||
- name: "vmaxnm{neon_type.no}"
|
||||
|
|
@ -7257,13 +7241,7 @@ intrinsics:
|
|||
- float16x4_t
|
||||
- float16x8_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fmaxnm.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.arm.neon.vmaxnm.{neon_type}"
|
||||
arch: arm
|
||||
- link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_fmax, [a, b]]
|
||||
|
||||
|
||||
- name: "vminnm{neon_type.no}"
|
||||
|
|
@ -7281,13 +7259,7 @@ intrinsics:
|
|||
- float16x4_t
|
||||
- float16x8_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fminnm.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.arm.neon.vminnm.{neon_type}"
|
||||
arch: arm
|
||||
- link: "llvm.aarch64.neon.fminnm.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_fmin, [a, b]]
|
||||
|
||||
|
||||
- name: "vmin{neon_type.no}"
|
||||
|
|
@ -7309,13 +7281,8 @@ intrinsics:
|
|||
- int32x2_t
|
||||
- int32x4_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "smin.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.arm.neon.vmins.{neon_type}"
|
||||
arch: arm
|
||||
- link: "llvm.aarch64.neon.smin.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- Let: [mask, "{neon_type}", {FnCall: [simd_le, [a, b]]}]
|
||||
- FnCall: [simd_select, [mask, a, b]]
|
||||
|
||||
- name: "vmin{neon_type.no}"
|
||||
doc: "Minimum (vector)"
|
||||
|
|
@ -7336,13 +7303,8 @@ intrinsics:
|
|||
- uint32x2_t
|
||||
- uint32x4_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "umin.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.arm.neon.vminu.{neon_type}"
|
||||
arch: arm
|
||||
- link: "llvm.aarch64.neon.umin.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- Let: [mask, "{neon_type}", {FnCall: [simd_le, [a, b]]}]
|
||||
- FnCall: [simd_select, [mask, a, b]]
|
||||
|
||||
- name: "vmin{neon_type.no}"
|
||||
doc: "Minimum (vector)"
|
||||
|
|
@ -7408,13 +7370,7 @@ intrinsics:
|
|||
- float32x2_t
|
||||
- float32x4_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "fminnm.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.arm.neon.vminnm.{neon_type}"
|
||||
arch: arm
|
||||
- link: "llvm.aarch64.neon.fminnm.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: [simd_fmin, [a, b]]
|
||||
|
||||
- name: "vpadd{neon_type.no}"
|
||||
doc: Floating-point add pairwise
|
||||
|
|
@ -7874,9 +7830,9 @@ intrinsics:
|
|||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
|
||||
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
|
||||
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
|
||||
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
|
||||
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
|
||||
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[2]}"]]
|
||||
- LLVMLink:
|
||||
|
|
@ -7929,9 +7885,9 @@ intrinsics:
|
|||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
|
||||
- [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
|
||||
- [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
|
||||
- [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
|
||||
- [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
|
||||
- [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[2]}"]]
|
||||
- LLVMLink:
|
||||
|
|
@ -8105,9 +8061,9 @@ intrinsics:
|
|||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
|
||||
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
|
||||
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
|
||||
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
|
||||
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
|
||||
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[2]}"]]
|
||||
- LLVMLink:
|
||||
|
|
@ -8215,9 +8171,9 @@ intrinsics:
|
|||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
|
||||
- [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
|
||||
- [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
|
||||
- [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
|
||||
- [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
|
||||
- [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[2]}"]]
|
||||
- LLVMLink:
|
||||
|
|
@ -8939,9 +8895,9 @@ intrinsics:
|
|||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
|
||||
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
|
||||
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
|
||||
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
|
||||
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
|
||||
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[2]}"]]
|
||||
- LLVMLink:
|
||||
|
|
@ -9576,7 +9532,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
|
|
@ -9617,7 +9574,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn2]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
safety: safe
|
||||
|
|
@ -9645,7 +9603,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
|
|
@ -9673,7 +9632,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vorr]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
|
|
@ -9707,7 +9667,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
|
|
@ -9735,7 +9696,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vzip]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
|
|
@ -9767,7 +9729,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vzip.16"']]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
safety: safe
|
||||
|
|
@ -9794,7 +9757,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
|
|
@ -9835,7 +9799,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp2]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
safety: safe
|
||||
|
|
@ -9863,7 +9828,8 @@ intrinsics:
|
|||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
|
|
@ -12881,13 +12847,16 @@ intrinsics:
|
|||
- int16x8_t
|
||||
- int32x4_t
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vabs{neon_type.no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.abs.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- link: "llvm.arm.neon.vabs.{neon_type}"
|
||||
arch: arm
|
||||
- Let:
|
||||
- neg
|
||||
- "{neon_type}"
|
||||
- FnCall: [simd_neg, [a]]
|
||||
- Let:
|
||||
- mask
|
||||
- "{neon_type}"
|
||||
- FnCall: [simd_ge, [a, neg]]
|
||||
- FnCall: [simd_select, [mask, a, neg]]
|
||||
|
||||
|
||||
- name: "vpmin{neon_type.no}"
|
||||
doc: "Folding minimum of adjacent pairs"
|
||||
|
|
@ -13862,8 +13831,8 @@ intrinsics:
|
|||
- [int8x16_t, '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8']
|
||||
- [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16']
|
||||
- [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16']
|
||||
- [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N as i32']
|
||||
- [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N as i32']
|
||||
- [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N']
|
||||
- [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N']
|
||||
- [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64']
|
||||
- [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64']
|
||||
compose:
|
||||
|
|
@ -13891,8 +13860,8 @@ intrinsics:
|
|||
- [uint8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8']
|
||||
- [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16']
|
||||
- [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16']
|
||||
- [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N as i32']
|
||||
- [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N as i32']
|
||||
- [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N']
|
||||
- [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N']
|
||||
- [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64']
|
||||
- [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64']
|
||||
- [poly8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8']
|
||||
|
|
@ -14138,6 +14107,7 @@ intrinsics:
|
|||
doc: "Load one single-element structure and Replicate to all lanes (of one register)."
|
||||
arguments: ["ptr: {type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
big_endian_inverse: false
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']] } ]]
|
||||
|
|
@ -14147,40 +14117,36 @@ intrinsics:
|
|||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
- ['vld1_dup_s8', '*const i8', 'int8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_s8::<0>', 'i8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
|
||||
- ['vld1_dup_u8', '*const u8', 'uint8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_u8::<0>', 'u8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
|
||||
- ['vld1_dup_p8', '*const p8', 'poly8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_p8::<0>', 'u8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
|
||||
- ['vld1_dup_s8', '*const i8', 'int8x8_t', 'vld1.8', 'ld1r', 'i8x8::splat']
|
||||
- ['vld1_dup_u8', '*const u8', 'uint8x8_t', 'vld1.8', 'ld1r', 'u8x8::splat']
|
||||
- ['vld1_dup_p8', '*const p8', 'poly8x8_t', 'vld1.8', 'ld1r', 'u8x8::splat']
|
||||
|
||||
- ['vld1q_dup_s8', '*const i8', 'int8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_s8::<0>', 'i8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']
|
||||
- ['vld1q_dup_u8', '*const u8', 'uint8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_u8::<0>', 'u8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']
|
||||
- ['vld1q_dup_p8', '*const p8', 'poly8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_p8::<0>', 'u8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']
|
||||
- ['vld1q_dup_s8', '*const i8', 'int8x16_t', 'vld1.8', 'ld1r', 'i8x16::splat']
|
||||
- ['vld1q_dup_u8', '*const u8', 'uint8x16_t', 'vld1.8', 'ld1r', 'u8x16::splat']
|
||||
- ['vld1q_dup_p8', '*const p8', 'poly8x16_t', 'vld1.8', 'ld1r', 'u8x16::splat']
|
||||
|
||||
- ['vld1_dup_s16', '*const i16', 'int16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_s16::<0>', 'i16x4::splat(0)', '[0, 0, 0, 0]']
|
||||
- ['vld1_dup_u16', '*const u16', 'uint16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_u16::<0>', 'u16x4::splat(0)', '[0, 0, 0, 0]']
|
||||
- ['vld1_dup_p16', '*const p16', 'poly16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_p16::<0>', 'u16x4::splat(0)', '[0, 0, 0, 0]']
|
||||
- ['vld1_dup_s16', '*const i16', 'int16x4_t', 'vld1.16', 'ld1r', 'i16x4::splat']
|
||||
- ['vld1_dup_u16', '*const u16', 'uint16x4_t', 'vld1.16', 'ld1r', 'u16x4::splat']
|
||||
- ['vld1_dup_p16', '*const p16', 'poly16x4_t', 'vld1.16', 'ld1r', 'u16x4::splat']
|
||||
|
||||
- ['vld1q_dup_s16', '*const i16', 'int16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_s16::<0>', 'i16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
|
||||
- ['vld1q_dup_u16', '*const u16', 'uint16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_u16::<0>', 'u16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
|
||||
- ['vld1q_dup_p16', '*const p16', 'poly16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_p16::<0>', 'u16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
|
||||
- ['vld1q_dup_s16', '*const i16', 'int16x8_t', 'vld1.16', 'ld1r', 'i16x8::splat']
|
||||
- ['vld1q_dup_u16', '*const u16', 'uint16x8_t', 'vld1.16', 'ld1r', 'u16x8::splat']
|
||||
- ['vld1q_dup_p16', '*const p16', 'poly16x8_t', 'vld1.16', 'ld1r', 'u16x8::splat']
|
||||
|
||||
- ['vld1_dup_s32', '*const i32', 'int32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_s32::<0>', 'i32x2::splat(0)', '[0, 0]']
|
||||
- ['vld1_dup_u32', '*const u32', 'uint32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_u32::<0>', 'u32x2::splat(0)', '[0, 0]']
|
||||
- ['vld1_dup_f32', '*const f32', 'float32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_f32::<0>', 'f32x2::splat(0.0)', '[0, 0]']
|
||||
- ['vld1_dup_s32', '*const i32', 'int32x2_t', 'vld1.32', 'ld1r', 'i32x2::splat']
|
||||
- ['vld1_dup_u32', '*const u32', 'uint32x2_t', 'vld1.32', 'ld1r', 'u32x2::splat']
|
||||
- ['vld1_dup_f32', '*const f32', 'float32x2_t', 'vld1.32', 'ld1r', 'f32x2::splat']
|
||||
|
||||
- ['vld1q_dup_s32', '*const i32', 'int32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_s32::<0>', 'i32x4::splat(0)', '[0, 0, 0, 0]']
|
||||
- ['vld1q_dup_u32', '*const u32', 'uint32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_u32::<0>', 'u32x4::splat(0)', '[0, 0, 0, 0]']
|
||||
- ['vld1q_dup_f32', '*const f32', 'float32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_f32::<0>', 'f32x4::splat(0.0)', '[0, 0, 0, 0]']
|
||||
- ['vld1q_dup_s32', '*const i32', 'int32x4_t', 'vld1.32', 'ld1r', 'i32x4::splat']
|
||||
- ['vld1q_dup_u32', '*const u32', 'uint32x4_t', 'vld1.32', 'ld1r', 'u32x4::splat']
|
||||
- ['vld1q_dup_f32', '*const f32', 'float32x4_t', 'vld1.32', 'ld1r', 'f32x4::splat']
|
||||
|
||||
- ['vld1q_dup_s64', '*const i64', 'int64x2_t', 'vldr', 'ld1', 'vld1q_lane_s64::<0>', 'i64x2::splat(0)', '[0, 0]']
|
||||
- ['vld1q_dup_u64', '*const u64', 'uint64x2_t', 'vldr', 'ld1', 'vld1q_lane_u64::<0>', 'u64x2::splat(0)', '[0, 0]']
|
||||
- ['vld1q_dup_s64', '*const i64', 'int64x2_t', 'vldr', 'ld1r', 'i64x2::splat']
|
||||
- ['vld1q_dup_u64', '*const u64', 'uint64x2_t', 'vldr', 'ld1r', 'u64x2::splat']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[5]}'
|
||||
- - ptr
|
||||
- FnCall: [transmute, ['{type[6]}']]
|
||||
- FnCall: ['simd_shuffle!', [x, x, '{type[7]}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: ['{type[5]}', ["*ptr"]]
|
||||
|
||||
- name: "{type[0]}"
|
||||
doc: "Absolute difference and accumulate (64-bit)"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use proc_macro2::{Literal, Punct, Spacing, TokenStream};
|
||||
use quote::{ToTokens, TokenStreamExt, format_ident, quote};
|
||||
use regex::Regex;
|
||||
|
|
@ -7,6 +6,7 @@ use serde::de::{self, MapAccess, Visitor};
|
|||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::intrinsic::Intrinsic;
|
||||
use crate::wildstring::WildStringPart;
|
||||
|
|
@ -374,10 +374,8 @@ impl FromStr for Expression {
|
|||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref MACRO_RE: Regex =
|
||||
Regex::new(r"^(?P<name>[\w\d_]+)!\((?P<ex>.*?)\);?$").unwrap();
|
||||
}
|
||||
static MACRO_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^(?P<name>[\w\d_]+)!\((?P<ex>.*?)\);?$").unwrap());
|
||||
|
||||
if s == "SvUndef" {
|
||||
Ok(Expression::SvUndef)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use std::fs::File;
|
|||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::format_code;
|
||||
use crate::input::InputType;
|
||||
|
|
@ -10,7 +11,6 @@ use crate::typekinds::BaseType;
|
|||
use crate::typekinds::{ToRepr, TypeKind};
|
||||
|
||||
use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use proc_macro2::TokenStream;
|
||||
use quote::{format_ident, quote};
|
||||
|
||||
|
|
@ -639,8 +639,8 @@ impl LdIntrCharacteristics {
|
|||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref PREAMBLE: String = format!(
|
||||
static PREAMBLE: LazyLock<String> = LazyLock::new(|| {
|
||||
format!(
|
||||
r#"#![allow(unused)]
|
||||
|
||||
use super::*;
|
||||
|
|
@ -801,13 +801,11 @@ fn assert_vector_matches_u64(vector: svuint64_t, expected: svuint64_t) {{
|
|||
assert!(!svptest_any(defined, cmp))
|
||||
}}
|
||||
"#
|
||||
);
|
||||
}
|
||||
)
|
||||
});
|
||||
|
||||
lazy_static! {
|
||||
static ref MANUAL_TESTS: String = format!(
|
||||
"#[simd_test(enable = \"sve\")]
|
||||
unsafe fn test_ffr() {{
|
||||
const MANUAL_TESTS: &str = "#[simd_test(enable = \"sve\")]
|
||||
unsafe fn test_ffr() {
|
||||
svsetffr();
|
||||
let ffr = svrdffr();
|
||||
assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svindex_u8(1, 0));
|
||||
|
|
@ -816,7 +814,5 @@ unsafe fn test_ffr() {{
|
|||
svwrffr(pred);
|
||||
let ffr = svrdffr_z(svptrue_b8());
|
||||
assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svdup_n_u8_z(pred, 1));
|
||||
}}
|
||||
"
|
||||
);
|
||||
}
|
||||
";
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
use lazy_static::lazy_static;
|
||||
use proc_macro2::TokenStream;
|
||||
use quote::{ToTokens, TokenStreamExt, quote};
|
||||
use regex::Regex;
|
||||
use serde_with::{DeserializeFromStr, SerializeDisplay};
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::context;
|
||||
use crate::expression::{Expression, FnCall};
|
||||
|
|
@ -496,9 +496,9 @@ impl FromStr for VectorType {
|
|||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(r"^(?:(?:sv(?P<sv_ty>(?:uint|int|bool|float)(?:\d+)?))|(?:(?P<ty>(?:uint|int|bool|poly|float)(?:\d+)?)x(?P<lanes>(?:\d+)?)))(?:x(?P<tuple_size>2|3|4))?_t$").unwrap();
|
||||
}
|
||||
static RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"^(?:(?:sv(?P<sv_ty>(?:uint|int|bool|float)(?:\d+)?))|(?:(?P<ty>(?:uint|int|bool|poly|float)(?:\d+)?)x(?P<lanes>(?:\d+)?)))(?:x(?P<tuple_size>2|3|4))?_t$").unwrap()
|
||||
});
|
||||
|
||||
if let Some(c) = RE.captures(s) {
|
||||
let (base_type, lanes) = Self::sanitise_lanes(
|
||||
|
|
@ -698,9 +698,8 @@ impl FromStr for BaseType {
|
|||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(r"^(?P<kind>[a-zA-Z]+)(?P<size>\d+)?(_t)?$").unwrap();
|
||||
}
|
||||
static RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^(?P<kind>[a-zA-Z]+)(?P<size>\d+)?(_t)?$").unwrap());
|
||||
|
||||
if let Some(c) = RE.captures(s) {
|
||||
let kind = c["kind"].parse()?;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde_with::{DeserializeFromStr, SerializeDisplay};
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use std::{fmt, sync::LazyLock};
|
||||
|
||||
use crate::{
|
||||
fn_suffix::SuffixKind,
|
||||
|
|
@ -66,9 +65,9 @@ impl FromStr for Wildcard {
|
|||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(r"^(?P<wildcard>\w+?)(?:_x(?P<tuple_size>[2-4]))?(?:\[(?P<index>\d+)\])?(?:\.(?P<modifiers>\w+))?(?:\s+as\s+(?P<scale_to>.*?))?$").unwrap();
|
||||
}
|
||||
static RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"^(?P<wildcard>\w+?)(?:_x(?P<tuple_size>[2-4]))?(?:\[(?P<index>\d+)\])?(?:\.(?P<modifiers>\w+))?(?:\s+as\s+(?P<scale_to>.*?))?$").unwrap()
|
||||
});
|
||||
|
||||
if let Some(c) = RE.captures(s) {
|
||||
let wildcard_name = &c["wildcard"];
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ LSX:
|
|||
# Generate bindings
|
||||
OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsxintrin.h
|
||||
OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsx.spec
|
||||
rustfmt crates/core_arch/src/loongarch64/lsx/generated.rs
|
||||
|
||||
# Generate tests
|
||||
OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsx.spec test
|
||||
|
|
@ -25,7 +24,6 @@ LASX:
|
|||
# Generate bindings
|
||||
OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasxintrin.h
|
||||
OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasx.spec
|
||||
rustfmt crates/core_arch/src/loongarch64/lasx/generated.rs
|
||||
|
||||
# Generate tests
|
||||
OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasx.spec test
|
||||
|
|
|
|||
|
|
@ -274,13 +274,14 @@ fn gen_bind_body(
|
|||
}
|
||||
};
|
||||
|
||||
let is_mem = in_t.iter().any(|s| s.contains("POINTER"));
|
||||
let is_store = current_name.to_string().contains("vst");
|
||||
let link_function = {
|
||||
let fn_decl = {
|
||||
let fn_output = if out_t.to_lowercase() == "void" {
|
||||
String::new()
|
||||
} else {
|
||||
format!("-> {}", type_to_rst(out_t, is_store))
|
||||
format!(" -> {}", type_to_rst(out_t, is_store))
|
||||
};
|
||||
let fn_inputs = match para_num {
|
||||
1 => format!("(a: {})", type_to_rst(in_t[0], is_store)),
|
||||
|
|
@ -304,7 +305,7 @@ fn gen_bind_body(
|
|||
),
|
||||
_ => panic!("unsupported parameter number"),
|
||||
};
|
||||
format!("fn __{current_name}{fn_inputs} {fn_output};")
|
||||
format!("fn __{current_name}{fn_inputs}{fn_output};")
|
||||
};
|
||||
let function = format!(
|
||||
r#" #[link_name = "llvm.loongarch.{}"]
|
||||
|
|
@ -456,31 +457,40 @@ fn gen_bind_body(
|
|||
};
|
||||
rustc_legacy_const_generics = "rustc_legacy_const_generics(2, 3)";
|
||||
}
|
||||
format!("pub unsafe fn {current_name}{fn_inputs} {fn_output}")
|
||||
format!(
|
||||
"pub {}fn {current_name}{fn_inputs} {fn_output}",
|
||||
if is_mem { "unsafe " } else { "" }
|
||||
)
|
||||
};
|
||||
let unsafe_start = if !is_mem { "unsafe { " } else { "" };
|
||||
let unsafe_end = if !is_mem { " }" } else { "" };
|
||||
let mut call_params = {
|
||||
match para_num {
|
||||
1 => format!("__{current_name}(a)"),
|
||||
2 => format!("__{current_name}(a, b)"),
|
||||
3 => format!("__{current_name}(a, b, c)"),
|
||||
4 => format!("__{current_name}(a, b, c, d)"),
|
||||
1 => format!("{unsafe_start}__{current_name}(a){unsafe_end}"),
|
||||
2 => format!("{unsafe_start}__{current_name}(a, b){unsafe_end}"),
|
||||
3 => format!("{unsafe_start}__{current_name}(a, b, c){unsafe_end}"),
|
||||
4 => format!("{unsafe_start}__{current_name}(a, b, c, d){unsafe_end}"),
|
||||
_ => panic!("unsupported parameter number"),
|
||||
}
|
||||
};
|
||||
if para_num == 1 && in_t[0] == "HI" {
|
||||
call_params = match asm_fmts[1].as_str() {
|
||||
"si10" => {
|
||||
format!("static_assert_simm_bits!(IMM_S10, 10);\n __{current_name}(IMM_S10)")
|
||||
format!(
|
||||
"static_assert_simm_bits!(IMM_S10, 10);\n {unsafe_start}__{current_name}(IMM_S10){unsafe_end}"
|
||||
)
|
||||
}
|
||||
"i13" => {
|
||||
format!("static_assert_simm_bits!(IMM_S13, 13);\n __{current_name}(IMM_S13)")
|
||||
format!(
|
||||
"static_assert_simm_bits!(IMM_S13, 13);\n {unsafe_start}__{current_name}(IMM_S13){unsafe_end}"
|
||||
)
|
||||
}
|
||||
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
|
||||
}
|
||||
} else if para_num == 2 && (in_t[1] == "UQI" || in_t[1] == "USI") {
|
||||
call_params = if asm_fmts[2].starts_with("ui") {
|
||||
format!(
|
||||
"static_assert_uimm_bits!(IMM{0}, {0});\n __{current_name}(a, IMM{0})",
|
||||
"static_assert_uimm_bits!(IMM{0}, {0});\n {unsafe_start}__{current_name}(a, IMM{0}){unsafe_end}",
|
||||
asm_fmts[2].get(2..).unwrap()
|
||||
)
|
||||
} else {
|
||||
|
|
@ -489,14 +499,16 @@ fn gen_bind_body(
|
|||
} else if para_num == 2 && in_t[1] == "QI" {
|
||||
call_params = match asm_fmts[2].as_str() {
|
||||
"si5" => {
|
||||
format!("static_assert_simm_bits!(IMM_S5, 5);\n __{current_name}(a, IMM_S5)")
|
||||
format!(
|
||||
"static_assert_simm_bits!(IMM_S5, 5);\n {unsafe_start}__{current_name}(a, IMM_S5){unsafe_end}"
|
||||
)
|
||||
}
|
||||
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
|
||||
};
|
||||
} else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "SI" {
|
||||
call_params = if asm_fmts[2].starts_with("si") {
|
||||
format!(
|
||||
"static_assert_simm_bits!(IMM_S{0}, {0});\n __{current_name}(mem_addr, IMM_S{0})",
|
||||
"static_assert_simm_bits!(IMM_S{0}, {0});\n {unsafe_start}__{current_name}(mem_addr, IMM_S{0}){unsafe_end}",
|
||||
asm_fmts[2].get(2..).unwrap()
|
||||
)
|
||||
} else {
|
||||
|
|
@ -504,13 +516,13 @@ fn gen_bind_body(
|
|||
}
|
||||
} else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "DI" {
|
||||
call_params = match asm_fmts[2].as_str() {
|
||||
"rk" => format!("__{current_name}(mem_addr, b)"),
|
||||
"rk" => format!("{unsafe_start}__{current_name}(mem_addr, b){unsafe_end}"),
|
||||
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
|
||||
};
|
||||
} else if para_num == 3 && (in_t[2] == "USI" || in_t[2] == "UQI") {
|
||||
call_params = if asm_fmts[2].starts_with("ui") {
|
||||
format!(
|
||||
"static_assert_uimm_bits!(IMM{0}, {0});\n __{current_name}(a, b, IMM{0})",
|
||||
"static_assert_uimm_bits!(IMM{0}, {0});\n {unsafe_start}__{current_name}(a, b, IMM{0}){unsafe_end}",
|
||||
asm_fmts[2].get(2..).unwrap()
|
||||
)
|
||||
} else {
|
||||
|
|
@ -519,19 +531,19 @@ fn gen_bind_body(
|
|||
} else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "SI" {
|
||||
call_params = match asm_fmts[2].as_str() {
|
||||
"si12" => format!(
|
||||
"static_assert_simm_bits!(IMM_S12, 12);\n __{current_name}(a, mem_addr, IMM_S12)"
|
||||
"static_assert_simm_bits!(IMM_S12, 12);\n {unsafe_start}__{current_name}(a, mem_addr, IMM_S12){unsafe_end}"
|
||||
),
|
||||
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
|
||||
};
|
||||
} else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "DI" {
|
||||
call_params = match asm_fmts[2].as_str() {
|
||||
"rk" => format!("__{current_name}(a, mem_addr, b)"),
|
||||
"rk" => format!("{unsafe_start}__{current_name}(a, mem_addr, b){unsafe_end}"),
|
||||
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
|
||||
};
|
||||
} else if para_num == 4 {
|
||||
call_params = match (asm_fmts[2].as_str(), current_name.chars().last().unwrap()) {
|
||||
("si8", t) => format!(
|
||||
"static_assert_simm_bits!(IMM_S8, 8);\n static_assert_uimm_bits!(IMM{0}, {0});\n __{current_name}(a, mem_addr, IMM_S8, IMM{0})",
|
||||
"static_assert_simm_bits!(IMM_S8, 8);\n static_assert_uimm_bits!(IMM{0}, {0});\n {unsafe_start}__{current_name}(a, mem_addr, IMM_S8, IMM{0}){unsafe_end}",
|
||||
type_to_imm(t)
|
||||
),
|
||||
(_, _) => panic!(
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ edition = "2024"
|
|||
[dependencies]
|
||||
assert-instr-macro = { path = "../assert-instr-macro" }
|
||||
simd-test-macro = { path = "../simd-test-macro" }
|
||||
lazy_static = "1.0"
|
||||
rustc-demangle = "0.1.8"
|
||||
cfg-if = "1.0"
|
||||
|
||||
|
|
@ -20,7 +19,7 @@ cc = "1.0"
|
|||
# time, and we want to make updates to this explicit rather than automatically
|
||||
# picking up updates which might break CI with new instruction names.
|
||||
[target.'cfg(target_arch = "wasm32")'.dependencies]
|
||||
wasmprinter = "=0.2.67"
|
||||
wasmprinter = "=0.235"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
|
|
|||
|
|
@ -6,14 +6,12 @@
|
|||
#![deny(rust_2018_idioms)]
|
||||
#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate cfg_if;
|
||||
|
||||
pub use assert_instr_macro::*;
|
||||
pub use simd_test_macro::*;
|
||||
use std::{cmp, collections::HashSet, env, hash, hint::black_box, str};
|
||||
use std::{cmp, collections::HashSet, env, hash, hint::black_box, str, sync::LazyLock};
|
||||
|
||||
cfg_if! {
|
||||
if #[cfg(target_arch = "wasm32")] {
|
||||
|
|
@ -25,9 +23,7 @@ cfg_if! {
|
|||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref DISASSEMBLY: HashSet<Function> = disassemble_myself();
|
||||
}
|
||||
static DISASSEMBLY: LazyLock<HashSet<Function>> = LazyLock::new(disassemble_myself);
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Function {
|
||||
|
|
@ -65,11 +61,12 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
|
|||
black_box(shim_addr);
|
||||
|
||||
//eprintln!("shim name: {fnname}");
|
||||
let function = &DISASSEMBLY
|
||||
.get(&Function::new(fnname))
|
||||
.unwrap_or_else(|| panic!("function \"{fnname}\" not found in the disassembly"));
|
||||
let Some(function) = &DISASSEMBLY.get(&Function::new(fnname)) else {
|
||||
panic!("function `{fnname}` not found in the disassembly")
|
||||
};
|
||||
//eprintln!(" function: {:?}", function);
|
||||
|
||||
// Trim any filler instructions.
|
||||
let mut instrs = &function.instrs[..];
|
||||
while instrs.last().is_some_and(|s| s == "nop" || s == "int3") {
|
||||
instrs = &instrs[..instrs.len() - 1];
|
||||
|
|
@ -84,12 +81,26 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
|
|||
// 2. It is a mark, indicating that the instruction will be
|
||||
// compiled into other instructions - mainly because of llvm
|
||||
// optimization.
|
||||
let expected = if expected == "unknown" {
|
||||
"<unknown>" // Workaround for rust-lang/stdarch#1674, todo: remove when the issue is fixed
|
||||
} else {
|
||||
expected
|
||||
let expected = match expected {
|
||||
// `<unknown>` is what LLVM will generate for unknown instructions. We use this to fail
|
||||
// loudly when LLVM does start supporting these instructions.
|
||||
//
|
||||
// This was introduced in https://github.com/rust-lang/stdarch/pull/1674 to work around the
|
||||
// RISC-V P extension not yet being supported.
|
||||
"unknown" => "<unknown>",
|
||||
_ => expected,
|
||||
};
|
||||
let found = expected == "nop" || instrs.iter().any(|s| s.starts_with(expected));
|
||||
|
||||
// Check whether the given instruction is part of the disassemblied body.
|
||||
let found = expected == "nop"
|
||||
|| instrs.iter().any(|instruction| {
|
||||
instruction.starts_with(expected)
|
||||
// Check that the next character is non-alphanumeric. This prevents false negatives
|
||||
// when e.g. `fminnm` was used but `fmin` was expected.
|
||||
//
|
||||
// TODO: resolve the conflicts (x86_64 and aarch64 have a bunch, probably others)
|
||||
// && !instruction[expected.len()..].starts_with(|c: char| c.is_ascii_alphanumeric())
|
||||
});
|
||||
|
||||
// Look for subroutine call instructions in the disassembly to detect whether
|
||||
// inlining failed: all intrinsics are `#[inline(always)]`, so calling one
|
||||
|
|
|
|||
|
|
@ -558,7 +558,8 @@ fn search(pos: &Pos, alpha: i32, beta: i32, depth: i32, _ply: i32) -> i32 {
|
|||
assert_ne!(bm, MOVE_NONE);
|
||||
assert!(bs >= -EVAL_INF && bs <= EVAL_INF);
|
||||
|
||||
if _ply == 0 { bm } else { bs } //best move at the root node, best score elsewhere
|
||||
// best move at the root node, best score elsewhere
|
||||
if _ply == 0 { bm } else { bs }
|
||||
}
|
||||
|
||||
/// Evaluation function: give different scores to different patterns after a fixed depth.
|
||||
|
|
@ -570,15 +571,11 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
|
|||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
if check_x86_avx512_features() {
|
||||
unsafe {
|
||||
if check_patternlive4_avx512(pos, def) {
|
||||
return -4096;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if check_patternlive4(pos, def) {
|
||||
if unsafe { check_patternlive4_avx512(pos, def) } {
|
||||
return -4096;
|
||||
}
|
||||
} else if check_patternlive4(pos, def) {
|
||||
return -4096;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -593,15 +590,11 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
|
|||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
if check_x86_avx512_features() {
|
||||
unsafe {
|
||||
if check_patternlive4_avx512(pos, atk) {
|
||||
return 2560;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if check_patternlive4(pos, atk) {
|
||||
if unsafe { check_patternlive4_avx512(pos, atk) } {
|
||||
return 2560;
|
||||
}
|
||||
} else if check_patternlive4(pos, atk) {
|
||||
return 2560;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -616,15 +609,11 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
|
|||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
if check_x86_avx512_features() {
|
||||
unsafe {
|
||||
if check_patterndead4_avx512(pos, atk) > 0 {
|
||||
return 2560;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if check_patterndead4(pos, atk) > 0 {
|
||||
if unsafe { check_patterndead4_avx512(pos, atk) > 0 } {
|
||||
return 2560;
|
||||
}
|
||||
} else if check_patterndead4(pos, atk) > 0 {
|
||||
return 2560;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -909,9 +898,7 @@ fn pos_is_winner_avx512(pos: &Pos) -> bool {
|
|||
0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10];
|
||||
let mut count_match: i32 = 0;
|
||||
|
||||
for dir in 0..2 {
|
||||
// direction 0 and 1
|
||||
let mut board0 = board0org[dir];
|
||||
for mut board0 in board0org {
|
||||
let boardf = _mm512_and_si512(answer, board0);
|
||||
let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf);
|
||||
count_match += _popcnt32(temp_mask as i32);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[assign]
|
||||
|
||||
[assign.owners]
|
||||
"*" = ["@Amanieu"]
|
||||
"*" = ["@Amanieu", "@folkertdev", "@sayantn"]
|
||||
|
||||
[ping.windows]
|
||||
message = """\
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue