Auto merge of #144222 - Kobzol:stdarch-push, r=folkertdev

stdarch subtree update

Subtree update of `stdarch` to 5531955678.

Created using https://github.com/rust-lang/josh-sync.

I saw that there were non-trivial changes made to `std_detect` in `stdarch` recently. So I want to get them merged here before we move forward with https://github.com/rust-lang/rust/pull/143412.

r? `@folkertdev`
This commit is contained in:
bors 2025-07-22 15:25:31 +00:00
commit 2e53675668
72 changed files with 4644 additions and 5044 deletions

View file

@ -255,6 +255,28 @@ jobs:
env:
TARGET: ${{ matrix.target.tuple }}
# Check that the generated files agree with the checked-in versions.
check-stdarch-gen:
needs: [style]
name: Check stdarch-gen-{arm, loongarch} output
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Rust
run: rustup update nightly && rustup default nightly && rustup component add rustfmt
- name: Check arm spec
run: |
cargo run --bin=stdarch-gen-arm --release -- crates/stdarch-gen-arm/spec
git diff --exit-code
- name: Check lsx.spec
run: |
cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lsx.spec
git diff --exit-code
- name: Check lasx.spec
run: |
cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lasx.spec
git diff --exit-code
build-std-detect:
needs: [style]
name: Build std_detect
@ -271,6 +293,7 @@ jobs:
- verify
- test
- build-std-detect
- check-stdarch-gen
runs-on: ubuntu-latest
# We need to ensure this job does *not* get skipped if its dependencies fail,
# because a skipped job is considered a success by GitHub. So we have to

View file

@ -0,0 +1,22 @@
# Perform a subtree sync (pull) using the josh-sync tool once every few days (or on demand).
name: rustc-pull
on:
workflow_dispatch:
schedule:
# Run at 04:00 UTC every Monday and Thursday
- cron: '0 4 * * 1,4'
jobs:
pull:
if: github.repository == 'rust-lang/stdarch'
uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main
with:
# https://rust-lang.zulipchat.com/#narrow/channel/208962-t-libs.2Fstdarch/topic/Subtree.20sync.20automation/with/528461782
zulip-stream-id: 208962
zulip-bot-email: "stdarch-ci-bot@rust-lang.zulipchat.com"
pr-base-branch: master
branch-name: rustc-pull
secrets:
zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}
token: ${{ secrets.GITHUB_TOKEN }}

View file

@ -73,20 +73,26 @@ version = "0.1.0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.102",
"syn 2.0.104",
]
[[package]]
name = "autocfg"
version = "1.4.0"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bitflags"
version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "cc"
version = "1.2.26"
version = "1.2.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "956a5e21988b87f372569b66183b78babf23ebc2e744b733e4350a752c4dafac"
checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
dependencies = [
"shlex",
]
@ -99,9 +105,9 @@ checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
[[package]]
name = "clap"
version = "4.5.40"
version = "4.5.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
dependencies = [
"clap_builder",
"clap_derive",
@ -109,9 +115,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.40"
version = "4.5.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
dependencies = [
"anstream",
"anstyle",
@ -121,14 +127,14 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.5.40"
version = "4.5.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce"
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.102",
"syn 2.0.104",
]
[[package]]
@ -338,9 +344,9 @@ dependencies = [
[[package]]
name = "indexmap"
version = "2.9.0"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
dependencies = [
"equivalent",
"hashbrown 0.15.4",
@ -403,9 +409,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.172"
version = "0.2.174"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
[[package]]
name = "linked-hash-map"
@ -624,7 +630,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.102",
"syn 2.0.104",
]
[[package]]
@ -685,7 +691,7 @@ version = "0.1.0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.102",
"syn 2.0.104",
]
[[package]]
@ -703,7 +709,6 @@ name = "stdarch-gen-arm"
version = "0.1.0"
dependencies = [
"itertools",
"lazy_static",
"proc-macro2",
"quote",
"regex",
@ -727,7 +732,6 @@ dependencies = [
"assert-instr-macro",
"cc",
"cfg-if",
"lazy_static",
"rustc-demangle",
"simd-test-macro",
"wasmprinter",
@ -742,7 +746,7 @@ dependencies = [
"quote",
"serde",
"serde_json",
"syn 2.0.102",
"syn 2.0.104",
]
[[package]]
@ -780,9 +784,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.102"
version = "2.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462"
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
dependencies = [
"proc-macro2",
"quote",
@ -834,21 +838,23 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasmparser"
version = "0.113.3"
version = "0.235.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "286049849b5a5bd09a8773171be96824afabffc7cc3df6caaf33a38db6cd07ae"
checksum = "161296c618fa2d63f6ed5fffd1112937e803cb9ec71b32b01a76321555660917"
dependencies = [
"indexmap 2.9.0",
"bitflags",
"indexmap 2.10.0",
"semver",
]
[[package]]
name = "wasmprinter"
version = "0.2.67"
version = "0.235.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6615a5587149e753bf4b93f90fa3c3f41c88597a7a2da72879afcabeda9648f"
checksum = "75aa8e9076de6b9544e6dab4badada518cca0bf4966d35b131bbd057aed8fa0a"
dependencies = [
"anyhow",
"termcolor",
"wasmparser",
]
@ -945,20 +951,20 @@ dependencies = [
[[package]]
name = "zerocopy"
version = "0.8.25"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.25"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.102",
"syn 2.0.104",
]

View file

@ -5,7 +5,8 @@ members = [
"examples",
]
exclude = [
"crates/wasm-assert-instr-tests"
"crates/wasm-assert-instr-tests",
"rust_programs",
]
[profile.release]

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
qemu-user \
make \
file \
clang-19 \
clang \
lld
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
@ -9,15 +9,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
qemu-user \
make \
file \
clang-19 \
clang \
curl \
xz-utils \
lld
ENV TOOLCHAIN="arm-gnu-toolchain-14.2.rel1-x86_64-aarch64_be-none-linux-gnu"
ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu"
# Download the aarch64_be gcc toolchain
RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.2.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
RUN tar -xvf "${TOOLCHAIN}.tar.xz"
RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \

View file

@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
qemu-user \
make \
file \
clang-19 \
clang \
lld
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \

View file

@ -1,9 +1,9 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user-static ca-certificates \
gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
gcc-loongarch64-linux-gnu libc6-dev-loong64-cross
ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && \
apt-get install -y --no-install-recommends \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \

View file

@ -1,10 +1,10 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
wget xz-utils make file llvm
ENV VERSION=2025.01.20
ENV VERSION=2025.07.03
RUN wget "https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${VERSION}/riscv32-glibc-ubuntu-24.04-gcc-nightly-${VERSION}-nightly.tar.xz" \
-O riscv-toolchain.tar.xz

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
curl ca-certificates \

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update -y && apt-get install -y --no-install-recommends \
@ -7,7 +7,9 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends \
xz-utils \
clang
RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasmtime-v18.0.2-x86_64-linux.tar.xz | tar xJf -
ENV PATH=$PATH:/wasmtime-v18.0.2-x86_64-linux
ENV VERSION=v34.0.1
RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/${VERSION}/wasmtime-${VERSION}-x86_64-linux.tar.xz | tar xJf -
ENV PATH=$PATH:/wasmtime-${VERSION}-x86_64-linux
ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime --dir /checkout/target/wasm32-wasip1/release/deps::."

View file

@ -1,4 +1,4 @@
FROM ubuntu:25.04
FROM ubuntu:25.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
xz-utils
RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.53.0-2025-03-16-lin.tar.xz -O sde.tar.xz
RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.58.0-2025-06-16-lin.tar.xz -O sde.tar.xz
RUN mkdir intel-sde
RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \

View file

@ -1,41 +1,35 @@
# Copyright (C) 2024-2024 Intel Corporation.
#
# Copyright (C) 2017-2025 Intel Corporation.
#
# This software and the related documents are Intel copyrighted materials, and your
# use of them is governed by the express license under which they were provided to
# you ("License"). Unless the License provides otherwise, you may not use, modify,
# copy, publish, distribute, disclose or transmit this software or the related
# documents without Intel's prior written permission.
#
#
# This software and the related documents are provided as is, with no express or
# implied warranties, other than those that are expressly stated in the License.
#
# The CPUID information in this file is for software enabling purposes only and
# it is not a full and accurate representation of the CPU under development which
# it represents.
# The CPUID information in this file is not a guarantee of the availability of
# features or characteristics in the final released CPU.
#
# CPUID_VERSION = 1.0
# Input => Output
# EAX ECX => EAX EBX ECX EDX
00000000 ******** => 00000024 68747541 444d4163 69746e65
00000001 ******** => 000d06f0 00100800 7ffaf3ff bfebfbff
00000001 ******** => 00400f10 00100800 7ffaf3ff bfebfbff
00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000
00000003 ******** => 00000000 00000000 00000000 00000000
00000004 00000000 => 7c004121 02c0003f 0000003f 00000000 #Deterministic Cache
00000004 00000000 => 7c004121 01c0003f 0000003f 00000000 #Deterministic Cache
00000004 00000001 => 7c004122 01c0003f 0000003f 00000000
00000004 00000002 => 7c004143 03c0003f 000007ff 00000000
00000004 00000003 => 7c0fc163 04c0003f 0005ffff 00000004
00000004 00000002 => 7c004143 03c0003f 000003ff 00000000
00000004 00000003 => 7c0fc163 0280003f 0000dfff 00000004
00000004 00000004 => 00000000 00000000 00000000 00000000
00000005 ******** => 00000040 00000040 00000003 00042120 #MONITOR/MWAIT
00000006 ******** => 00000077 00000002 00000001 00000000 #Thermal and Power
00000007 00000000 => 00000001 f3bfbfbf bbc05ffe 03d55130 #Extended Features
00000007 00000001 => 88ee00bf 00000002 00000000 1d29cd3e
00000007 00000000 => 00000001 f3bfbfbf bac05ffe 03d54130 #Extended Features
00000007 00000001 => 98ee00bf 00000002 00000020 1d29cd3e
00000008 ******** => 00000000 00000000 00000000 00000000
00000009 ******** => 00000000 00000000 00000000 00000000 #Direct Cache
0000000a ******** => 07300403 00000000 00000000 00000603
0000000b 00000000 => 00000001 00000002 00000100 0000001e #Extended Topology
0000000b 00000001 => 00000004 00000002 00000201 0000001e
0000000b 00000000 => 00000001 00000002 00000100 00000000 #Extended Topology
0000000b 00000001 => 00000004 00000002 00000201 00000000
0000000c ******** => 00000000 00000000 00000000 00000000
0000000d 00000000 => 000e02e7 00002b00 00002b00 00000000 #xcr0
0000000d 00000001 => 0000001f 00000240 00000100 00000000
@ -52,10 +46,8 @@
0000001d 00000001 => 04002000 00080040 00000010 00000000 #AMX Palette1
0000001e 00000000 => 00000001 00004010 00000000 00000000 #AMX Tmul
0000001e 00000001 => 000001ff 00000000 00000000 00000000
0000001f 00000000 => 00000001 00000002 00000100 0000001e
0000001f 00000001 => 00000007 00000070 00000201 0000001e
0000001f 00000002 => 00000000 00000000 00000002 0000001e
00000024 00000000 => 00000000 00070002 00000000 00000000 #AVX10
00000024 00000000 => 00000001 00070002 00000000 00000000 #AVX10
00000024 00000001 => 00000000 00000000 00000004 00000000
80000000 ******** => 80000008 00000000 00000000 00000000
80000001 ******** => 00000000 00000000 00200961 2c100000
80000002 ******** => 00000000 00000000 00000000 00000000
@ -66,6 +58,6 @@
80000007 ******** => 00000000 00000000 00000000 00000100
80000008 ******** => 00003028 00000200 00000200 00000000
# This file was copied from intel-sde/misc/cpuid/dmr/cpuid.def, and modified to
# This file was copied from intel-sde/misc/cpuid/future/cpuid.def, and modified to
# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM`,
# `AVX512_VP2INTERSECT` and the VEX variants of AVX512 was added in the CPUID.

View file

@ -144,21 +144,21 @@ case ${TARGET} in
aarch64-unknown-linux-gnu*)
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
TEST_CXX_COMPILER="clang++-19"
TEST_CXX_COMPILER="clang++"
TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
;;
aarch64_be-unknown-linux-gnu*)
TEST_CPPFLAGS="-fuse-ld=lld"
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
TEST_CXX_COMPILER="clang++-19"
TEST_CXX_COMPILER="clang++"
TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
;;
armv7-unknown-linux-gnueabihf*)
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
TEST_CXX_COMPILER="clang++-19"
TEST_CXX_COMPILER="clang++"
TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
;;
*)

View file

@ -193,6 +193,7 @@ others at:
* [`powerpc64`]
* [`nvptx`]
* [`wasm32`]
* [`loongarch32`]
* [`loongarch64`]
* [`s390x`]
@ -208,6 +209,7 @@ others at:
[`powerpc64`]: ../../core/arch/powerpc64/index.html
[`nvptx`]: ../../core/arch/nvptx/index.html
[`wasm32`]: ../../core/arch/wasm32/index.html
[`loongarch32`]: ../../core/arch/loongarch32/index.html
[`loongarch64`]: ../../core/arch/loongarch64/index.html
[`s390x`]: ../../core/arch/s390x/index.html

View file

@ -0,0 +1,47 @@
//! `LoongArch32` intrinsics
use crate::arch::asm;
#[allow(improper_ctypes)]
unsafe extern "unadjusted" {
#[link_name = "llvm.loongarch.cacop.w"]
fn __cacop(a: i32, b: i32, c: i32);
#[link_name = "llvm.loongarch.csrrd.w"]
fn __csrrd(a: i32) -> i32;
#[link_name = "llvm.loongarch.csrwr.w"]
fn __csrwr(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.csrxchg.w"]
fn __csrxchg(a: i32, b: i32, c: i32) -> i32;
}
/// Generates the cache operation instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn cacop<const IMM12: i32>(a: i32, b: i32) {
static_assert_simm_bits!(IMM12, 12);
__cacop(a, b, IMM12);
}
/// Reads the CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn csrrd<const IMM14: i32>() -> i32 {
static_assert_uimm_bits!(IMM14, 14);
__csrrd(IMM14)
}
/// Writes the CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn csrwr<const IMM14: i32>(a: i32) -> i32 {
static_assert_uimm_bits!(IMM14, 14);
__csrwr(a, IMM14)
}
/// Exchanges the CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn csrxchg<const IMM14: i32>(a: i32, b: i32) -> i32 {
static_assert_uimm_bits!(IMM14, 14);
__csrxchg(a, b, IMM14)
}

View file

@ -1,4 +1,4 @@
//! `LoongArch` intrinsics
//! `LoongArch64` intrinsics
mod lasx;
mod lsx;
@ -13,89 +13,30 @@ use crate::arch::asm;
/// Reads the 64-bit stable counter value and the counter ID
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn rdtime_d() -> (i64, isize) {
let val: i64;
let tid: isize;
asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
(val, tid)
}
/// Reads the lower 32-bit stable counter value and the counter ID
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn rdtimel_w() -> (i32, isize) {
let val: i32;
let tid: isize;
asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
(val, tid)
}
/// Reads the upper 32-bit stable counter value and the counter ID
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn rdtimeh_w() -> (i32, isize) {
let val: i32;
let tid: isize;
asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack));
pub fn rdtime_d() -> (i64, isize) {
let (val, tid): (i64, isize);
unsafe { asm!("rdtime.d {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)) };
(val, tid)
}
#[allow(improper_ctypes)]
unsafe extern "unadjusted" {
#[link_name = "llvm.loongarch.crc.w.b.w"]
fn __crc_w_b_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crc.w.h.w"]
fn __crc_w_h_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crc.w.w.w"]
fn __crc_w_w_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crc.w.d.w"]
fn __crc_w_d_w(a: i64, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.b.w"]
fn __crcc_w_b_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.h.w"]
fn __crcc_w_h_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.w.w"]
fn __crcc_w_w_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.d.w"]
fn __crcc_w_d_w(a: i64, b: i32) -> i32;
#[link_name = "llvm.loongarch.cacop.d"]
fn __cacop(a: i64, b: i64, c: i64);
#[link_name = "llvm.loongarch.dbar"]
fn __dbar(a: i32);
#[link_name = "llvm.loongarch.ibar"]
fn __ibar(a: i32);
#[link_name = "llvm.loongarch.movgr2fcsr"]
fn __movgr2fcsr(a: i32, b: i32);
#[link_name = "llvm.loongarch.movfcsr2gr"]
fn __movfcsr2gr(a: i32) -> i32;
#[link_name = "llvm.loongarch.csrrd.d"]
fn __csrrd(a: i32) -> i64;
#[link_name = "llvm.loongarch.csrwr.d"]
fn __csrwr(a: i64, b: i32) -> i64;
#[link_name = "llvm.loongarch.csrxchg.d"]
fn __csrxchg(a: i64, b: i64, c: i32) -> i64;
#[link_name = "llvm.loongarch.iocsrrd.b"]
fn __iocsrrd_b(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrrd.h"]
fn __iocsrrd_h(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrrd.w"]
fn __iocsrrd_w(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrrd.d"]
fn __iocsrrd_d(a: i32) -> i64;
#[link_name = "llvm.loongarch.iocsrwr.b"]
fn __iocsrwr_b(a: i32, b: i32);
#[link_name = "llvm.loongarch.iocsrwr.h"]
fn __iocsrwr_h(a: i32, b: i32);
#[link_name = "llvm.loongarch.iocsrwr.w"]
fn __iocsrwr_w(a: i32, b: i32);
#[link_name = "llvm.loongarch.iocsrwr.d"]
fn __iocsrwr_d(a: i64, b: i32);
#[link_name = "llvm.loongarch.break"]
fn __break(a: i32);
#[link_name = "llvm.loongarch.cpucfg"]
fn __cpucfg(a: i32) -> i32;
#[link_name = "llvm.loongarch.syscall"]
fn __syscall(a: i32);
#[link_name = "llvm.loongarch.asrtle.d"]
fn __asrtle(a: i64, b: i64);
#[link_name = "llvm.loongarch.asrtgt.d"]
@ -104,70 +45,20 @@ unsafe extern "unadjusted" {
fn __lddir(a: i64, b: i64) -> i64;
#[link_name = "llvm.loongarch.ldpte.d"]
fn __ldpte(a: i64, b: i64);
#[link_name = "llvm.loongarch.frecipe.s"]
fn __frecipe_s(a: f32) -> f32;
#[link_name = "llvm.loongarch.frecipe.d"]
fn __frecipe_d(a: f64) -> f64;
#[link_name = "llvm.loongarch.frsqrte.s"]
fn __frsqrte_s(a: f32) -> f32;
#[link_name = "llvm.loongarch.frsqrte.d"]
fn __frsqrte_d(a: f64) -> f64;
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crc_w_b_w(a: i32, b: i32) -> i32 {
__crc_w_b_w(a, b)
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crc_w_h_w(a: i32, b: i32) -> i32 {
__crc_w_h_w(a, b)
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crc_w_w_w(a: i32, b: i32) -> i32 {
__crc_w_w_w(a, b)
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crc_w_d_w(a: i64, b: i32) -> i32 {
__crc_w_d_w(a, b)
pub fn crc_w_d_w(a: i64, b: i32) -> i32 {
unsafe { __crc_w_d_w(a, b) }
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crcc_w_b_w(a: i32, b: i32) -> i32 {
__crcc_w_b_w(a, b)
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crcc_w_h_w(a: i32, b: i32) -> i32 {
__crcc_w_h_w(a, b)
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crcc_w_w_w(a: i32, b: i32) -> i32 {
__crcc_w_w_w(a, b)
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn crcc_w_d_w(a: i64, b: i32) -> i32 {
__crcc_w_d_w(a, b)
pub fn crcc_w_d_w(a: i64, b: i32) -> i32 {
unsafe { __crcc_w_d_w(a, b) }
}
/// Generates the cache operation instruction
@ -178,38 +69,6 @@ pub unsafe fn cacop<const IMM12: i64>(a: i64, b: i64) {
__cacop(a, b, IMM12);
}
/// Generates the memory barrier instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn dbar<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__dbar(IMM15);
}
/// Generates the instruction-fetch barrier instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn ibar<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__ibar(IMM15);
}
/// Moves data from a GPR to the FCSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
static_assert_uimm_bits!(IMM5, 5);
__movgr2fcsr(IMM5, a);
}
/// Moves data from a FCSR to the GPR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn movfcsr2gr<const IMM5: i32>() -> i32 {
static_assert_uimm_bits!(IMM5, 5);
__movfcsr2gr(IMM5)
}
/// Reads the CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
@ -234,27 +93,6 @@ pub unsafe fn csrxchg<const IMM14: i32>(a: i64, b: i64) -> i64 {
__csrxchg(a, b, IMM14)
}
/// Reads the 8-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_b(a: i32) -> i32 {
__iocsrrd_b(a)
}
/// Reads the 16-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_h(a: i32) -> i32 {
__iocsrrd_h(a)
}
/// Reads the 32-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_w(a: i32) -> i32 {
__iocsrrd_w(a)
}
/// Reads the 64-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
@ -262,27 +100,6 @@ pub unsafe fn iocsrrd_d(a: i32) -> i64 {
__iocsrrd_d(a)
}
/// Writes the 8-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_b(a: i32, b: i32) {
__iocsrwr_b(a, b)
}
/// Writes the 16-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_h(a: i32, b: i32) {
__iocsrwr_h(a, b)
}
/// Writes the 32-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_w(a: i32, b: i32) {
__iocsrwr_w(a, b)
}
/// Writes the 64-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
@ -290,29 +107,6 @@ pub unsafe fn iocsrwr_d(a: i64, b: i32) {
__iocsrwr_d(a, b)
}
/// Generates the breakpoint instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn brk<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__break(IMM15);
}
/// Reads the CPU configuration register
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn cpucfg(a: i32) -> i32 {
__cpucfg(a)
}
/// Generates the syscall instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn syscall<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__syscall(IMM15);
}
/// Generates the less-than-or-equal asseration instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
@ -342,35 +136,3 @@ pub unsafe fn lddir<const B: i64>(a: i64) -> i64 {
pub unsafe fn ldpte<const B: i64>(a: i64) {
__ldpte(a, B)
}
/// Calculate the approximate single-precision result of 1.0 divided
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn frecipe_s(a: f32) -> f32 {
__frecipe_s(a)
}
/// Calculate the approximate double-precision result of 1.0 divided
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn frecipe_d(a: f64) -> f64 {
__frecipe_d(a)
}
/// Calculate the approximate single-precision result of dividing 1.0 by the square root
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn frsqrte_s(a: f32) -> f32 {
__frsqrte_s(a)
}
/// Calculate the approximate double-precision result of dividing 1.0 by the square root
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn frsqrte_d(a: f64) -> f64 {
__frsqrte_d(a)
}

View file

@ -0,0 +1,242 @@
//! `Shared LoongArch` intrinsics
use crate::arch::asm;
/// Reads the lower 32-bit stable counter value and the counter ID
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn rdtimel_w() -> (i32, isize) {
let (val, tid): (i32, isize);
unsafe { asm!("rdtimel.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)) };
(val, tid)
}
/// Reads the upper 32-bit stable counter value and the counter ID
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn rdtimeh_w() -> (i32, isize) {
let (val, tid): (i32, isize);
unsafe { asm!("rdtimeh.w {}, {}", out(reg) val, out(reg) tid, options(readonly, nostack)) };
(val, tid)
}
#[allow(improper_ctypes)]
unsafe extern "unadjusted" {
#[link_name = "llvm.loongarch.crc.w.b.w"]
fn __crc_w_b_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crc.w.h.w"]
fn __crc_w_h_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crc.w.w.w"]
fn __crc_w_w_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.b.w"]
fn __crcc_w_b_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.h.w"]
fn __crcc_w_h_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.crcc.w.w.w"]
fn __crcc_w_w_w(a: i32, b: i32) -> i32;
#[link_name = "llvm.loongarch.dbar"]
fn __dbar(a: i32);
#[link_name = "llvm.loongarch.ibar"]
fn __ibar(a: i32);
#[link_name = "llvm.loongarch.movgr2fcsr"]
fn __movgr2fcsr(a: i32, b: i32);
#[link_name = "llvm.loongarch.movfcsr2gr"]
fn __movfcsr2gr(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrrd.b"]
fn __iocsrrd_b(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrrd.h"]
fn __iocsrrd_h(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrrd.w"]
fn __iocsrrd_w(a: i32) -> i32;
#[link_name = "llvm.loongarch.iocsrwr.b"]
fn __iocsrwr_b(a: i32, b: i32);
#[link_name = "llvm.loongarch.iocsrwr.h"]
fn __iocsrwr_h(a: i32, b: i32);
#[link_name = "llvm.loongarch.iocsrwr.w"]
fn __iocsrwr_w(a: i32, b: i32);
#[link_name = "llvm.loongarch.break"]
fn __break(a: i32);
#[link_name = "llvm.loongarch.cpucfg"]
fn __cpucfg(a: i32) -> i32;
#[link_name = "llvm.loongarch.syscall"]
fn __syscall(a: i32);
#[link_name = "llvm.loongarch.frecipe.s"]
fn __frecipe_s(a: f32) -> f32;
#[link_name = "llvm.loongarch.frecipe.d"]
fn __frecipe_d(a: f64) -> f64;
#[link_name = "llvm.loongarch.frsqrte.s"]
fn __frsqrte_s(a: f32) -> f32;
#[link_name = "llvm.loongarch.frsqrte.d"]
fn __frsqrte_d(a: f64) -> f64;
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn crc_w_b_w(a: i32, b: i32) -> i32 {
unsafe { __crc_w_b_w(a, b) }
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn crc_w_h_w(a: i32, b: i32) -> i32 {
unsafe { __crc_w_h_w(a, b) }
}
/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn crc_w_w_w(a: i32, b: i32) -> i32 {
unsafe { __crc_w_w_w(a, b) }
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn crcc_w_b_w(a: i32, b: i32) -> i32 {
unsafe { __crcc_w_b_w(a, b) }
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn crcc_w_h_w(a: i32, b: i32) -> i32 {
unsafe { __crcc_w_h_w(a, b) }
}
/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn crcc_w_w_w(a: i32, b: i32) -> i32 {
unsafe { __crcc_w_w_w(a, b) }
}
/// Generates the memory barrier instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn dbar<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
unsafe { __dbar(IMM15) };
}
/// Generates the instruction-fetch barrier instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn ibar<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
unsafe { __ibar(IMM15) };
}
/// Moves data from a GPR to the FCSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn movgr2fcsr<const IMM5: i32>(a: i32) {
static_assert_uimm_bits!(IMM5, 5);
__movgr2fcsr(IMM5, a);
}
/// Moves data from a FCSR to the GPR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn movfcsr2gr<const IMM5: i32>() -> i32 {
static_assert_uimm_bits!(IMM5, 5);
unsafe { __movfcsr2gr(IMM5) }
}
/// Reads the 8-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_b(a: i32) -> i32 {
__iocsrrd_b(a)
}
/// Reads the 16-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_h(a: i32) -> i32 {
__iocsrrd_h(a)
}
/// Reads the 32-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrrd_w(a: i32) -> i32 {
__iocsrrd_w(a)
}
/// Writes the 8-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_b(a: i32, b: i32) {
__iocsrwr_b(a, b)
}
/// Writes the 16-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_h(a: i32, b: i32) {
__iocsrwr_h(a, b)
}
/// Writes the 32-bit IO-CSR
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn iocsrwr_w(a: i32, b: i32) {
__iocsrwr_w(a, b)
}
/// Generates the breakpoint instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn brk<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__break(IMM15);
}
/// Reads the CPU configuration register
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn cpucfg(a: i32) -> i32 {
unsafe { __cpucfg(a) }
}
/// Generates the syscall instruction
#[inline]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn syscall<const IMM15: i32>() {
static_assert_uimm_bits!(IMM15, 15);
__syscall(IMM15);
}
/// Calculate the approximate single-precision result of 1.0 divided
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn frecipe_s(a: f32) -> f32 {
unsafe { __frecipe_s(a) }
}
/// Calculate the approximate double-precision result of 1.0 divided
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn frecipe_d(a: f64) -> f64 {
unsafe { __frecipe_d(a) }
}
/// Calculate the approximate single-precision result of dividing 1.0 by the square root
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn frsqrte_s(a: f32) -> f32 {
unsafe { __frsqrte_s(a) }
}
/// Calculate the approximate double-precision result of dividing 1.0 by the square root
#[inline]
#[target_feature(enable = "frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn frsqrte_d(a: f64) -> f64 {
unsafe { __frsqrte_d(a) }
}

View file

@ -16,6 +16,9 @@ mod riscv_shared;
))]
mod arm_shared;
#[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64", doc))]
mod loongarch_shared;
mod simd;
#[doc = include_str!("core_arch_docs.md")]
@ -271,13 +274,25 @@ pub mod arch {
pub use crate::core_arch::nvptx::*;
}
/// Platform-specific intrinsics for the `loongarch` platform.
/// Platform-specific intrinsics for the `loongarch32` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "loongarch32", doc))]
#[doc(cfg(target_arch = "loongarch32"))]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub mod loongarch32 {
pub use crate::core_arch::loongarch_shared::*;
pub use crate::core_arch::loongarch32::*;
}
/// Platform-specific intrinsics for the `loongarch64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "loongarch64", doc))]
#[doc(cfg(target_arch = "loongarch64"))]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub mod loongarch64 {
pub use crate::core_arch::loongarch_shared::*;
pub use crate::core_arch::loongarch64::*;
}
@ -334,6 +349,10 @@ mod powerpc64;
#[doc(cfg(target_arch = "nvptx64"))]
mod nvptx;
#[cfg(any(target_arch = "loongarch32", doc))]
#[doc(cfg(target_arch = "loongarch32"))]
mod loongarch32;
#[cfg(any(target_arch = "loongarch64", doc))]
#[doc(cfg(target_arch = "loongarch64"))]
mod loongarch64;

View file

@ -1181,6 +1181,20 @@ mod sealed {
impl_vec_trait! { [VectorOrc vec_orc]+ 2c (orc) }
// Z vector intrinsic C23 math.h LLVM IR ISO/IEC 60559 operation inexact vfidb parameters
//
// vec_rint rint llvm.rint roundToIntegralExact yes 0, 0
// vec_roundc nearbyint llvm.nearbyint n/a no 4, 0
// vec_floor / vec_roundm floor llvm.floor roundToIntegralTowardNegative no 4, 7
// vec_ceil / vec_roundp ceil llvm.ceil roundToIntegralTowardPositive no 4, 6
// vec_trunc / vec_roundz trunc llvm.trunc roundToIntegralTowardZero no 4, 5
// vec_round roundeven llvm.roundeven roundToIntegralTiesToEven no 4, 4
// n/a round llvm.round roundToIntegralTiesAway no 4, 1
// `simd_round_ties_even` is implemented as `llvm.rint`.
test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] }
test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] }
test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32, "vector-enhancements-1" vfisb] }
test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] }
@ -1189,9 +1203,6 @@ mod sealed {
test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }
test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] }
test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] }
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub trait VectorRoundc {
unsafe fn vec_roundc(self) -> Self;
@ -2254,14 +2265,14 @@ mod sealed {
#[inline]
#[target_feature(enable = "vector")]
#[cfg_attr(test, assert_instr("vlbb"))]
#[cfg_attr(test, assert_instr(vlbb))]
unsafe fn test_vec_load_bndry(ptr: *const i32) -> MaybeUninit<vector_signed_int> {
vector_signed_int::vec_load_bndry::<512>(ptr)
}
#[inline]
#[target_feature(enable = "vector")]
#[cfg_attr(test, assert_instr(vst))]
#[cfg_attr(test, assert_instr(vstl))]
unsafe fn test_vec_store_len(vector: vector_signed_int, ptr: *mut i32, byte_count: u32) {
vector.vec_store_len(ptr, byte_count)
}
@ -2787,11 +2798,11 @@ mod sealed {
}
test_impl! { vec_vmal_ib(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [simd_mladd, vmalb ] }
test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalh ] }
test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalhw ] }
test_impl! { vec_vmal_if(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [simd_mladd, vmalf ] }
test_impl! { vec_vmal_ub(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [simd_mladd, vmalb ] }
test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalh ] }
test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalhw ] }
test_impl! { vec_vmal_uf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [simd_mladd, vmalf ] }
impl_mul!([VectorMladd vec_mladd] vec_vmal_ib (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char );

View file

@ -1,64 +1,51 @@
use crate::common::compile_c::CompilationCommandBuilder;
use crate::common::gen_c::compile_c_programs;
use crate::common::cli::ProcessedCli;
use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation};
pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
let cpp_compiler = config.cpp_compiler.as_ref()?;
pub fn compile_c_arm(
intrinsics_name_list: &[String],
compiler: &str,
target: &str,
cxx_toolchain_dir: Option<&str>,
) -> bool {
// -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
let mut command = CompilationCommandBuilder::new()
.add_arch_flags(vec!["armv8.6-a", "crypto", "crc", "dotprod", "fp16"])
.set_compiler(compiler)
.set_target(target)
.set_compiler(cpp_compiler)
.set_target(&config.target)
.set_opt_level("2")
.set_cxx_toolchain_dir(cxx_toolchain_dir)
.set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref())
.set_project_root("c_programs")
.add_extra_flags(vec!["-ffp-contract=off", "-Wno-narrowing"]);
if !target.contains("v7") {
if !config.target.contains("v7") {
command = command.add_arch_flags(vec!["faminmax", "lut", "sha3"]);
}
/*
* clang++ cannot link an aarch64_be object file, so we invoke
* aarch64_be-unknown-linux-gnu's C++ linker. This ensures that we
* are testing the intrinsics against LLVM.
*
* Note: setting `--sysroot=<...>` which is the obvious thing to do
* does not work as it gets caught up with `#include_next <stdlib.h>`
* not existing...
*/
if target.contains("aarch64_be") {
command = command
.set_linker(
cxx_toolchain_dir.unwrap_or("").to_string() + "/bin/aarch64_be-none-linux-gnu-g++",
)
.set_include_paths(vec![
"/include",
"/aarch64_be-none-linux-gnu/include",
"/aarch64_be-none-linux-gnu/include/c++/14.2.1",
"/aarch64_be-none-linux-gnu/include/c++/14.2.1/aarch64_be-none-linux-gnu",
"/aarch64_be-none-linux-gnu/include/c++/14.2.1/backward",
"/aarch64_be-none-linux-gnu/libc/usr/include",
]);
}
if !compiler.contains("clang") {
if !cpp_compiler.contains("clang") {
command = command.add_extra_flag("-flax-vector-conversions");
}
let compiler_commands = intrinsics_name_list
.iter()
.map(|intrinsic_name| {
command
.clone()
.set_input_name(intrinsic_name)
.set_output_name(intrinsic_name)
.make_string()
})
.collect::<Vec<_>>();
let mut cpp_compiler = command.into_cpp_compilation();
compile_c_programs(&compiler_commands)
if config.target.contains("aarch64_be") {
let Some(ref cxx_toolchain_dir) = config.cxx_toolchain_dir else {
panic!(
"target `{}` must specify `cxx_toolchain_dir`",
config.target
)
};
cpp_compiler.command_mut().args([
&format!("--sysroot={cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc"),
"--include-directory",
&format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1"),
"--include-directory",
&format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1/aarch64_be-none-linux-gnu"),
"-L",
&format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
"-L",
&format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/lib"),
"-B",
&format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
]);
}
Some(cpp_compiler)
}

View file

@ -114,7 +114,6 @@ pub const AARCH_CONFIGURATIONS: &str = r#"
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_dotprod))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
#![feature(fmt_helpers_for_derive)]

View file

@ -1,8 +1,8 @@
use crate::common::argument::ArgumentList;
use crate::common::indentation::Indentation;
use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition};
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, TypeKind};
use std::ops::Deref;
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
use std::ops::{Deref, DerefMut};
#[derive(Debug, Clone, PartialEq)]
pub struct ArmIntrinsicType(pub IntrinsicType);
@ -15,6 +15,12 @@ impl Deref for ArmIntrinsicType {
}
}
impl DerefMut for ArmIntrinsicType {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl IntrinsicDefinition<ArmIntrinsicType> for Intrinsic<ArmIntrinsicType> {
fn arguments(&self) -> ArgumentList<ArmIntrinsicType> {
self.arguments.clone()
@ -73,8 +79,9 @@ impl IntrinsicDefinition<ArmIntrinsicType> for Intrinsic<ArmIntrinsicType> {
TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(),
TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(),
TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(),
TypeKind::Int => format!("int{}_t", self.results().inner_size()),
TypeKind::UInt => format!("uint{}_t", self.results().inner_size()),
TypeKind::Int(Sign::Signed) => format!("int{}_t", self.results().inner_size()),
TypeKind::Int(Sign::Unsigned) =>
format!("uint{}_t", self.results().inner_size()),
TypeKind::Poly => format!("poly{}_t", self.results().inner_size()),
ty => todo!("print_result_c - Unknown type: {:#?}", ty),
},

View file

@ -110,7 +110,7 @@ fn json_to_intrinsic(
Ok(Intrinsic {
name,
arguments,
results: *results,
results: results,
arch_tags: intr.architectures,
})
}

View file

@ -4,15 +4,20 @@ mod intrinsic;
mod json_parser;
mod types;
use std::fs::File;
use rayon::prelude::*;
use crate::arm::config::POLY128_OSTREAM_DEF;
use crate::common::SupportedArchitectureTest;
use crate::common::cli::ProcessedCli;
use crate::common::compare::compare_outputs;
use crate::common::gen_c::{write_main_cpp, write_mod_cpp};
use crate::common::gen_rust::compile_rust_programs;
use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition};
use crate::common::intrinsic_helpers::TypeKind;
use crate::common::write_file::{write_c_testfiles, write_rust_testfiles};
use compile::compile_c_arm;
use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, POLY128_OSTREAM_DEF, build_notices};
use crate::common::write_file::write_rust_testfiles;
use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, build_notices};
use intrinsic::ArmIntrinsicType;
use json_parser::get_neon_intrinsics;
@ -21,6 +26,13 @@ pub struct ArmArchitectureTest {
cli_options: ProcessedCli,
}
fn chunk_info(intrinsic_count: usize) -> (usize, usize) {
let available_parallelism = std::thread::available_parallelism().unwrap().get();
let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count));
(chunk_size, intrinsic_count.div_ceil(chunk_size))
}
impl SupportedArchitectureTest for ArmArchitectureTest {
fn create(cli_options: ProcessedCli) -> Box<Self> {
let a32 = cli_options.target.contains("v7");
@ -51,33 +63,58 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
}
fn build_c_file(&self) -> bool {
let compiler = self.cli_options.cpp_compiler.as_deref();
let target = &self.cli_options.target;
let cxx_toolchain_dir = self.cli_options.cxx_toolchain_dir.as_deref();
let c_target = "aarch64";
let platform_headers = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"];
let intrinsics_name_list = write_c_testfiles(
&self
.intrinsics
.iter()
.map(|i| i as &dyn IntrinsicDefinition<_>)
.collect::<Vec<_>>(),
target,
let (chunk_size, chunk_count) = chunk_info(self.intrinsics.len());
let cpp_compiler = compile::build_cpp_compilation(&self.cli_options).unwrap();
let notice = &build_notices("// ");
self.intrinsics
.par_chunks(chunk_size)
.enumerate()
.map(|(i, chunk)| {
let c_filename = format!("c_programs/mod_{i}.cpp");
let mut file = File::create(&c_filename).unwrap();
write_mod_cpp(&mut file, notice, c_target, platform_headers, chunk).unwrap();
// compile this cpp file into a .o file
let output = cpp_compiler
.compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?;
assert!(output.status.success(), "{output:?}");
Ok(())
})
.collect::<Result<(), std::io::Error>>()
.unwrap();
let mut file = File::create("c_programs/main.cpp").unwrap();
write_main_cpp(
&mut file,
c_target,
&["arm_neon.h", "arm_acle.h", "arm_fp16.h"],
&build_notices("// "),
&[POLY128_OSTREAM_DEF],
);
POLY128_OSTREAM_DEF,
self.intrinsics.iter().map(|i| i.name.as_str()),
)
.unwrap();
match compiler {
None => true,
Some(compiler) => compile_c_arm(
intrinsics_name_list.as_slice(),
compiler,
target,
cxx_toolchain_dir,
),
}
// compile this cpp file into a .o file
info!("compiling main.cpp");
let output = cpp_compiler
.compile_object_file("main.cpp", "intrinsic-test-programs.o")
.unwrap();
assert!(output.status.success(), "{output:?}");
let object_files = (0..chunk_count)
.map(|i| format!("mod_{i}.o"))
.chain(["intrinsic-test-programs.o".to_owned()]);
let output = cpp_compiler
.link_executable(object_files, "intrinsic-test-programs")
.unwrap();
assert!(output.status.success(), "{output:?}");
true
}
fn build_rust_file(&self) -> bool {
@ -104,7 +141,7 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
}
fn compare_outputs(&self) -> bool {
if let Some(ref toolchain) = self.cli_options.toolchain {
if self.cli_options.toolchain.is_some() {
let intrinsics_name_list = self
.intrinsics
.iter()
@ -113,8 +150,7 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
compare_outputs(
&intrinsics_name_list,
toolchain,
&self.cli_options.c_runner,
&self.cli_options.runner,
&self.cli_options.target,
)
} else {

View file

@ -1,6 +1,6 @@
use super::intrinsic::ArmIntrinsicType;
use crate::common::cli::Language;
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, TypeKind};
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
impl IntrinsicTypeDefinition for ArmIntrinsicType {
/// Gets a string containing the typename for this type in C format.
@ -73,8 +73,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
format!(
"vld{len}{quad}_{type}{size}",
type = match k {
TypeKind::UInt => "u",
TypeKind::Int => "s",
TypeKind::Int(Sign::Unsigned) => "u",
TypeKind::Int(Sign::Signed) => "s",
TypeKind::Float => "f",
// The ACLE doesn't support 64-bit polynomial loads on Armv7
// if armv7 and bl == 64, use "s", else "p"
@ -107,8 +107,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
format!(
"vget{quad}_lane_{type}{size}",
type = match k {
TypeKind::UInt => "u",
TypeKind::Int => "s",
TypeKind::Int(Sign::Unsigned) => "u",
TypeKind::Int(Sign::Signed) => "s",
TypeKind::Float => "f",
TypeKind::Poly => "p",
x => todo!("get_load_function TypeKind: {:#?}", x),
@ -121,7 +121,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
}
}
fn from_c(s: &str, target: &str) -> Result<Box<Self>, String> {
fn from_c(s: &str, target: &str) -> Result<Self, String> {
const CONST_STR: &str = "const";
if let Some(s) = s.strip_suffix('*') {
let (s, constant) = match s.trim().strip_suffix(CONST_STR) {
@ -131,9 +131,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
let s = s.trim_end();
let temp_return = ArmIntrinsicType::from_c(s, target);
temp_return.map(|mut op| {
let edited = op.as_mut();
edited.0.ptr = true;
edited.0.ptr_constant = constant;
op.ptr = true;
op.ptr_constant = constant;
op
})
} else {
@ -163,7 +162,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
),
None => None,
};
Ok(Box::new(ArmIntrinsicType(IntrinsicType {
Ok(ArmIntrinsicType(IntrinsicType {
ptr: false,
ptr_constant: false,
constant,
@ -172,14 +171,14 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
simd_len,
vec_len,
target: target.to_string(),
})))
}))
} else {
let kind = start.parse::<TypeKind>()?;
let bit_len = match kind {
TypeKind::Int => Some(32),
TypeKind::Int(_) => Some(32),
_ => None,
};
Ok(Box::new(ArmIntrinsicType(IntrinsicType {
Ok(ArmIntrinsicType(IntrinsicType {
ptr: false,
ptr_constant: false,
constant,
@ -188,7 +187,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
simd_len: None,
vec_len: None,
target: target.to_string(),
})))
}))
}
}
}

View file

@ -76,7 +76,7 @@ where
Argument {
pos,
name: String::from(var_name),
ty: *ty,
ty: ty,
constraint,
}
}
@ -125,19 +125,23 @@ where
/// Creates a line for each argument that initializes an array for C from which `loads` argument
/// values can be loaded as a sliding window.
/// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2.
pub fn gen_arglists_c(&self, indentation: Indentation, loads: u32) -> String {
self.iter()
.filter(|&arg| !arg.has_constraint())
.map(|arg| {
format!(
"{indentation}const {ty} {name}_vals[] = {values};",
ty = arg.ty.c_scalar_type(),
name = arg.name,
values = arg.ty.populate_random(indentation, loads, &Language::C)
)
})
.collect::<Vec<_>>()
.join("\n")
pub fn gen_arglists_c(
&self,
w: &mut impl std::io::Write,
indentation: Indentation,
loads: u32,
) -> std::io::Result<()> {
for arg in self.iter().filter(|&arg| !arg.has_constraint()) {
writeln!(
w,
"{indentation}const {ty} {name}_vals[] = {values};",
ty = arg.ty.c_scalar_type(),
name = arg.name,
values = arg.ty.populate_random(indentation, loads, &Language::C)
)?
}
Ok(())
}
/// Creates a line for each argument that initializes an array for Rust from which `loads` argument

View file

@ -60,7 +60,7 @@ pub struct ProcessedCli {
pub filename: PathBuf,
pub toolchain: Option<String>,
pub cpp_compiler: Option<String>,
pub c_runner: String,
pub runner: String,
pub target: String,
pub linker: Option<String>,
pub cxx_toolchain_dir: Option<String>,
@ -70,7 +70,7 @@ pub struct ProcessedCli {
impl ProcessedCli {
pub fn new(cli_options: Cli) -> Self {
let filename = cli_options.input;
let c_runner = cli_options.runner.unwrap_or_default();
let runner = cli_options.runner.unwrap_or_default();
let target = cli_options.target;
let linker = cli_options.linker;
let cxx_toolchain_dir = cli_options.cxx_toolchain_dir;
@ -102,7 +102,7 @@ impl ProcessedCli {
Self {
toolchain,
cpp_compiler,
c_runner,
runner,
target,
linker,
cxx_toolchain_dir,

View file

@ -2,27 +2,25 @@ use super::cli::FailureReason;
use rayon::prelude::*;
use std::process::Command;
pub fn compare_outputs(
intrinsic_name_list: &Vec<String>,
toolchain: &str,
runner: &str,
target: &str,
) -> bool {
pub fn compare_outputs(intrinsic_name_list: &Vec<String>, runner: &str, target: &str) -> bool {
fn runner_command(runner: &str) -> Command {
let mut it = runner.split_whitespace();
let mut cmd = Command::new(it.next().unwrap());
cmd.args(it);
cmd
}
let intrinsics = intrinsic_name_list
.par_iter()
.filter_map(|intrinsic_name| {
let c = Command::new("sh")
.arg("-c")
.arg(format!("{runner} ./c_programs/{intrinsic_name}"))
let c = runner_command(runner)
.arg("./c_programs/intrinsic-test-programs")
.arg(intrinsic_name)
.output();
let rust = Command::new("sh")
.current_dir("rust_programs")
.arg("-c")
.arg(format!(
"cargo {toolchain} run --target {target} --bin {intrinsic_name} --release",
))
.env("RUSTFLAGS", "-Cdebuginfo=0")
let rust = runner_command(runner)
.arg(format!("target/{target}/release/{intrinsic_name}"))
.output();
let (c, rust) = match (c, rust) {
@ -42,8 +40,8 @@ pub fn compare_outputs(
if !rust.status.success() {
error!(
"Failed to run Rust program for intrinsic {intrinsic_name}\nstdout: {stdout}\nstderr: {stderr}",
stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""),
stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""),
stdout = String::from_utf8_lossy(&rust.stdout),
stderr = String::from_utf8_lossy(&rust.stderr),
);
return Some(FailureReason::RunRust(intrinsic_name.clone()));
}

View file

@ -5,11 +5,7 @@ pub struct CompilationCommandBuilder {
cxx_toolchain_dir: Option<String>,
arch_flags: Vec<String>,
optimization: String,
include_paths: Vec<String>,
project_root: Option<String>,
output: String,
input: String,
linker: Option<String>,
extra_flags: Vec<String>,
}
@ -21,11 +17,7 @@ impl CompilationCommandBuilder {
cxx_toolchain_dir: None,
arch_flags: Vec::new(),
optimization: "2".to_string(),
include_paths: Vec::new(),
project_root: None,
output: String::new(),
input: String::new(),
linker: None,
extra_flags: Vec::new(),
}
}
@ -57,37 +49,12 @@ impl CompilationCommandBuilder {
self
}
/// Sets a list of include paths for compilation.
/// The paths that are passed must be relative to the
/// "cxx_toolchain_dir" directory path.
pub fn set_include_paths(mut self, paths: Vec<&str>) -> Self {
self.include_paths = paths.into_iter().map(|path| path.to_string()).collect();
self
}
/// Sets the root path of all the generated test files.
pub fn set_project_root(mut self, path: &str) -> Self {
self.project_root = Some(path.to_string());
self
}
/// The name of the output executable, without any suffixes
pub fn set_output_name(mut self, path: &str) -> Self {
self.output = path.to_string();
self
}
/// The name of the input C file, without any suffixes
pub fn set_input_name(mut self, path: &str) -> Self {
self.input = path.to_string();
self
}
pub fn set_linker(mut self, linker: String) -> Self {
self.linker = Some(linker);
self
}
pub fn add_extra_flags(mut self, flags: Vec<&str>) -> Self {
let mut flags: Vec<String> = flags.into_iter().map(|f| f.to_string()).collect();
self.extra_flags.append(&mut flags);
@ -100,55 +67,69 @@ impl CompilationCommandBuilder {
}
impl CompilationCommandBuilder {
pub fn make_string(self) -> String {
let arch_flags = self.arch_flags.join("+");
let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
let project_root = self.project_root.unwrap_or_default();
let project_root_str = project_root.as_str();
let mut output = self.output.clone();
if self.linker.is_some() {
output += ".o"
};
let mut command = format!(
"{} {flags} -march={arch_flags} \
-O{} \
-o {project_root}/{} \
{project_root}/{}.cpp",
self.compiler, self.optimization, output, self.input,
);
pub fn into_cpp_compilation(self) -> CppCompilation {
let mut cpp_compiler = std::process::Command::new(self.compiler);
command = command + " " + self.extra_flags.join(" ").as_str();
if let Some(project_root) = self.project_root {
cpp_compiler.current_dir(project_root);
}
let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
cpp_compiler.args(flags.split_whitespace());
cpp_compiler.arg(format!("-march={}", self.arch_flags.join("+")));
cpp_compiler.arg(format!("-O{}", self.optimization));
cpp_compiler.args(self.extra_flags);
if let Some(target) = &self.target {
command = command + " --target=" + target;
cpp_compiler.arg(format!("--target={target}"));
}
if let (Some(linker), Some(cxx_toolchain_dir)) = (&self.linker, &self.cxx_toolchain_dir) {
let include_args = self
.include_paths
.iter()
.map(|path| "--include-directory=".to_string() + cxx_toolchain_dir + path)
.collect::<Vec<_>>()
.join(" ");
command = command
+ " -c "
+ include_args.as_str()
+ " && "
+ linker
+ " "
+ project_root_str
+ "/"
+ &output
+ " -o "
+ project_root_str
+ "/"
+ &self.output
+ " && rm "
+ project_root_str
+ "/"
+ &output;
}
command
CppCompilation(cpp_compiler)
}
}
pub struct CppCompilation(std::process::Command);
fn clone_command(command: &std::process::Command) -> std::process::Command {
let mut cmd = std::process::Command::new(command.get_program());
if let Some(current_dir) = command.get_current_dir() {
cmd.current_dir(current_dir);
}
cmd.args(command.get_args());
for (key, val) in command.get_envs() {
cmd.env(key, val.unwrap_or_default());
}
cmd
}
impl CppCompilation {
pub fn command_mut(&mut self) -> &mut std::process::Command {
&mut self.0
}
pub fn compile_object_file(
&self,
input: &str,
output: &str,
) -> std::io::Result<std::process::Output> {
let mut cmd = clone_command(&self.0);
cmd.args([input, "-c", "-o", output]);
cmd.output()
}
pub fn link_executable(
&self,
inputs: impl Iterator<Item = String>,
output: &str,
) -> std::io::Result<std::process::Output> {
let mut cmd = clone_command(&self.0);
cmd.args(inputs);
cmd.args(["-o", output]);
cmd.output()
}
}

View file

@ -1,8 +1,3 @@
use itertools::Itertools;
use rayon::prelude::*;
use std::collections::BTreeMap;
use std::process::Command;
use super::argument::Argument;
use super::indentation::Indentation;
use super::intrinsic::IntrinsicDefinition;
@ -11,104 +6,16 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition;
// The number of times each intrinsic will be called.
const PASSES: u32 = 20;
// Formats the main C program template with placeholders
pub fn format_c_main_template(
notices: &str,
header_files: &[&str],
arch_identifier: &str,
arch_specific_definitions: &[&str],
arglists: &str,
passes: &str,
) -> String {
format!(
r#"{notices}{header_files}
#include <iostream>
#include <cstring>
#include <iomanip>
#include <sstream>
template<typename T1, typename T2> T1 cast(T2 x) {{
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
T1 ret{{}};
memcpy(&ret, &x, sizeof(T1));
return ret;
}}
std::ostream& operator<<(std::ostream& os, float16_t value) {{
uint16_t temp = 0;
memcpy(&temp, &value, sizeof(float16_t));
std::stringstream ss;
ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp;
os << ss.str();
return os;
}}
#ifdef __{arch_identifier}__
{arch_specific_definitions}
#endif
{arglists}
int main(int argc, char **argv) {{
{passes}
return 0;
}}"#,
header_files = header_files
.iter()
.map(|header| format!("#include <{header}>"))
.collect::<Vec<_>>()
.join("\n"),
arch_specific_definitions = arch_specific_definitions.join("\n"),
)
}
pub fn compile_c_programs(compiler_commands: &[String]) -> bool {
compiler_commands
.par_iter()
.map(|compiler_command| {
let output = Command::new("sh").arg("-c").arg(compiler_command).output();
if let Ok(output) = output {
if output.status.success() {
true
} else {
error!(
"Failed to compile code for intrinsics: \n\nstdout:\n{}\n\nstderr:\n{}",
std::str::from_utf8(&output.stdout).unwrap_or(""),
std::str::from_utf8(&output.stderr).unwrap_or("")
);
false
}
} else {
error!("Command failed: {output:#?}");
false
}
})
.find_any(|x| !x)
.is_none()
}
// Creates directory structure and file path mappings
pub fn setup_c_file_paths(identifiers: &Vec<String>) -> BTreeMap<&String, String> {
let _ = std::fs::create_dir("c_programs");
identifiers
.par_iter()
.map(|identifier| {
let c_filename = format!(r#"c_programs/{identifier}.cpp"#);
(identifier, c_filename)
})
.collect::<BTreeMap<&String, String>>()
}
pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
w: &mut impl std::io::Write,
intrinsic: &dyn IntrinsicDefinition<T>,
indentation: Indentation,
additional: &str,
passes: u32,
_target: &str,
) -> String {
) -> std::io::Result<()> {
let body_indentation = indentation.nested();
format!(
writeln!(
w,
"{indentation}for (int i=0; i<{passes}; i++) {{\n\
{loaded_args}\
{body_indentation}auto __return_value = {intrinsic_call}({args});\n\
@ -121,78 +28,172 @@ pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
)
}
pub fn generate_c_constraint_blocks<T: IntrinsicTypeDefinition>(
pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>(
w: &mut impl std::io::Write,
intrinsic: &dyn IntrinsicDefinition<T>,
indentation: Indentation,
constraints: &[&Argument<T>],
constraints: &mut (impl Iterator<Item = &'a Argument<T>> + Clone),
name: String,
target: &str,
) -> String {
if let Some((current, constraints)) = constraints.split_last() {
let range = current
.constraint
.iter()
.map(|c| c.to_range())
.flat_map(|r| r.into_iter());
) -> std::io::Result<()> {
let Some(current) = constraints.next() else {
return generate_c_test_loop(w, intrinsic, indentation, &name, PASSES);
};
let body_indentation = indentation.nested();
range
.map(|i| {
format!(
"{indentation}{{\n\
{body_indentation}{ty} {name} = {val};\n\
{pass}\n\
{indentation}}}",
name = current.name,
ty = current.ty.c_type(),
val = i,
pass = generate_c_constraint_blocks(
intrinsic,
body_indentation,
constraints,
format!("{name}-{i}"),
target,
)
)
})
.join("\n")
} else {
generate_c_test_loop(intrinsic, indentation, &name, PASSES, target)
let body_indentation = indentation.nested();
for i in current.constraint.iter().flat_map(|c| c.to_range()) {
let ty = current.ty.c_type();
writeln!(w, "{indentation}{{")?;
writeln!(w, "{body_indentation}{ty} {} = {i};", current.name)?;
generate_c_constraint_blocks(
w,
intrinsic,
body_indentation,
&mut constraints.clone(),
format!("{name}-{i}"),
)?;
writeln!(w, "{indentation}}}")?;
}
Ok(())
}
// Compiles C test programs using specified compiler
pub fn create_c_test_program<T: IntrinsicTypeDefinition>(
pub fn create_c_test_function<T: IntrinsicTypeDefinition>(
w: &mut impl std::io::Write,
intrinsic: &dyn IntrinsicDefinition<T>,
header_files: &[&str],
target: &str,
c_target: &str,
notices: &str,
arch_specific_definitions: &[&str],
) -> String {
let arguments = intrinsic.arguments();
let constraints = arguments
.iter()
.filter(|&i| i.has_constraint())
.collect_vec();
) -> std::io::Result<()> {
let indentation = Indentation::default();
format_c_main_template(
notices,
header_files,
c_target,
arch_specific_definitions,
intrinsic
.arguments()
.gen_arglists_c(indentation, PASSES)
.as_str(),
generate_c_constraint_blocks(
intrinsic,
indentation.nested(),
constraints.as_slice(),
Default::default(),
target,
)
.as_str(),
)
writeln!(w, "int run_{}() {{", intrinsic.name())?;
// Define the arrays of arguments.
let arguments = intrinsic.arguments();
arguments.gen_arglists_c(w, indentation.nested(), PASSES)?;
generate_c_constraint_blocks(
w,
intrinsic,
indentation.nested(),
&mut arguments.iter().rev().filter(|&i| i.has_constraint()),
Default::default(),
)?;
writeln!(w, " return 0;")?;
writeln!(w, "}}")?;
Ok(())
}
pub fn write_mod_cpp<T: IntrinsicTypeDefinition>(
w: &mut impl std::io::Write,
notice: &str,
architecture: &str,
platform_headers: &[&str],
intrinsics: &[impl IntrinsicDefinition<T>],
) -> std::io::Result<()> {
write!(w, "{notice}")?;
for header in platform_headers {
writeln!(w, "#include <{header}>")?;
}
writeln!(
w,
r#"
#include <iostream>
#include <cstring>
#include <iomanip>
#include <sstream>
template<typename T1, typename T2> T1 cast(T2 x) {{
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
T1 ret{{}};
memcpy(&ret, &x, sizeof(T1));
return ret;
}}
std::ostream& operator<<(std::ostream& os, float16_t value);
"#
)?;
writeln!(w, "#ifdef __{architecture}__")?;
writeln!(
w,
"std::ostream& operator<<(std::ostream& os, poly128_t value);"
)?;
writeln!(w, "#endif")?;
for intrinsic in intrinsics {
create_c_test_function(w, intrinsic)?;
}
Ok(())
}
pub fn write_main_cpp<'a>(
w: &mut impl std::io::Write,
architecture: &str,
arch_specific_definitions: &str,
intrinsics: impl Iterator<Item = &'a str> + Clone,
) -> std::io::Result<()> {
writeln!(w, "#include <iostream>")?;
writeln!(w, "#include <string>")?;
for header in ["arm_neon.h", "arm_acle.h", "arm_fp16.h"] {
writeln!(w, "#include <{header}>")?;
}
writeln!(
w,
r#"
#include <cstring>
#include <iomanip>
#include <sstream>
std::ostream& operator<<(std::ostream& os, float16_t value) {{
uint16_t temp = 0;
memcpy(&temp, &value, sizeof(float16_t));
std::stringstream ss;
ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp;
os << ss.str();
return os;
}}
"#
)?;
writeln!(w, "#ifdef __{architecture}__")?;
writeln!(w, "{arch_specific_definitions }")?;
writeln!(w, "#endif")?;
for intrinsic in intrinsics.clone() {
writeln!(w, "extern int run_{intrinsic}(void);")?;
}
writeln!(w, "int main(int argc, char **argv) {{")?;
writeln!(w, " std::string intrinsic_name = argv[1];")?;
writeln!(w, " if (false) {{")?;
for intrinsic in intrinsics {
writeln!(w, " }} else if (intrinsic_name == \"{intrinsic}\") {{")?;
writeln!(w, " return run_{intrinsic}();")?;
}
writeln!(w, " }} else {{")?;
writeln!(
w,
" std::cerr << \"Unknown command: \" << intrinsic_name << \"\\n\";"
)?;
writeln!(w, " return -1;")?;
writeln!(w, " }}")?;
writeln!(w, "}}")?;
Ok(())
}

View file

@ -2,7 +2,6 @@ use itertools::Itertools;
use rayon::prelude::*;
use std::collections::BTreeMap;
use std::fs::File;
use std::io::Write;
use std::process::Command;
use super::argument::Argument;
@ -23,8 +22,8 @@ pub fn format_rust_main_template(
) -> String {
format!(
r#"{notices}#![feature(simd_ffi)]
#![feature(link_llvm_intrinsics)]
#![feature(f16)]
#![allow(unused)]
{configurations}
{definitions}
@ -38,6 +37,42 @@ fn main() {{
)
}
fn write_cargo_toml(w: &mut impl std::io::Write, binaries: &[String]) -> std::io::Result<()> {
writeln!(
w,
concat!(
"[package]\n",
"name = \"intrinsic-test-programs\"\n",
"version = \"{version}\"\n",
"authors = [{authors}]\n",
"license = \"{license}\"\n",
"edition = \"2018\"\n",
"[workspace]\n",
"[dependencies]\n",
"core_arch = {{ path = \"../crates/core_arch\" }}",
),
version = env!("CARGO_PKG_VERSION"),
authors = env!("CARGO_PKG_AUTHORS")
.split(":")
.format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))),
license = env!("CARGO_PKG_LICENSE"),
)?;
for binary in binaries {
writeln!(
w,
concat!(
"[[bin]]\n",
"name = \"{binary}\"\n",
"path = \"{binary}/main.rs\"\n",
),
binary = binary,
)?;
}
Ok(())
}
pub fn compile_rust_programs(
binaries: Vec<String>,
toolchain: Option<&str>,
@ -45,56 +80,20 @@ pub fn compile_rust_programs(
linker: Option<&str>,
) -> bool {
let mut cargo = File::create("rust_programs/Cargo.toml").unwrap();
cargo
.write_all(
format!(
r#"[package]
name = "intrinsic-test-programs"
version = "{version}"
authors = [{authors}]
license = "{license}"
edition = "2018"
[workspace]
[dependencies]
core_arch = {{ path = "../crates/core_arch" }}
{binaries}"#,
version = env!("CARGO_PKG_VERSION"),
authors = env!("CARGO_PKG_AUTHORS")
.split(":")
.format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))),
license = env!("CARGO_PKG_LICENSE"),
binaries = binaries
.iter()
.map(|binary| {
format!(
r#"[[bin]]
name = "{binary}"
path = "{binary}/main.rs""#,
)
})
.collect::<Vec<_>>()
.join("\n")
)
.into_bytes()
.as_slice(),
)
.unwrap();
let toolchain = match toolchain {
None => return true,
Some(t) => t,
};
write_cargo_toml(&mut cargo, &binaries).unwrap();
/* If there has been a linker explicitly set from the command line then
* we want to set it via setting it in the RUSTFLAGS*/
let cargo_command = format!("cargo {toolchain} build --target {target} --release");
let mut cargo_command = Command::new("cargo");
cargo_command.current_dir("rust_programs");
let mut command = Command::new("sh");
command
.current_dir("rust_programs")
.arg("-c")
.arg(cargo_command);
if let Some(toolchain) = toolchain {
if !toolchain.is_empty() {
cargo_command.arg(toolchain);
}
}
cargo_command.args(["build", "--target", target, "--release"]);
let mut rust_flags = "-Cdebuginfo=0".to_string();
if let Some(linker) = linker {
@ -102,11 +101,11 @@ path = "{binary}/main.rs""#,
rust_flags.push_str(linker);
rust_flags.push_str(" -C link-args=-static");
command.env("CPPFLAGS", "-fuse-ld=lld");
cargo_command.env("CPPFLAGS", "-fuse-ld=lld");
}
command.env("RUSTFLAGS", rust_flags);
let output = command.output();
cargo_command.env("RUSTFLAGS", rust_flags);
let output = cargo_command.output();
if let Ok(output) = output {
if output.status.success() {

View file

@ -8,14 +8,22 @@ use super::cli::Language;
use super::indentation::Indentation;
use super::values::value_for_array;
#[derive(Debug, PartialEq, Copy, Clone)]
pub enum Sign {
Signed,
Unsigned,
}
#[derive(Debug, PartialEq, Copy, Clone)]
pub enum TypeKind {
BFloat,
Float,
Int,
UInt,
Int(Sign),
Char(Sign),
Poly,
Void,
Mask,
Vector,
}
impl FromStr for TypeKind {
@ -23,12 +31,17 @@ impl FromStr for TypeKind {
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"bfloat" => Ok(Self::BFloat),
"float" => Ok(Self::Float),
"int" => Ok(Self::Int),
"bfloat" | "BF16" => Ok(Self::BFloat),
"float" | "double" | "FP16" | "FP32" | "FP64" => Ok(Self::Float),
"int" | "long" | "short" | "SI8" | "SI16" | "SI32" | "SI64" => {
Ok(Self::Int(Sign::Signed))
}
"poly" => Ok(Self::Poly),
"uint" | "unsigned" => Ok(Self::UInt),
"char" => Ok(Self::Char(Sign::Signed)),
"uint" | "unsigned" | "UI8" | "UI16" | "UI32" | "UI64" => Ok(Self::Int(Sign::Unsigned)),
"void" => Ok(Self::Void),
"MASK" => Ok(Self::Mask),
"M64" | "M128" | "M256" | "M512" => Ok(Self::Vector),
_ => Err(format!("Impossible to parse argument kind {s}")),
}
}
@ -42,10 +55,14 @@ impl fmt::Display for TypeKind {
match self {
Self::BFloat => "bfloat",
Self::Float => "float",
Self::Int => "int",
Self::UInt => "uint",
Self::Int(Sign::Signed) => "int",
Self::Int(Sign::Unsigned) => "uint",
Self::Poly => "poly",
Self::Void => "void",
Self::Char(Sign::Signed) => "char",
Self::Char(Sign::Unsigned) => "unsigned char",
Self::Mask => "mask",
Self::Vector => "vector",
}
)
}
@ -56,9 +73,10 @@ impl TypeKind {
pub fn c_prefix(&self) -> &str {
match self {
Self::Float => "float",
Self::Int => "int",
Self::UInt => "uint",
Self::Int(Sign::Signed) => "int",
Self::Int(Sign::Unsigned) => "uint",
Self::Poly => "poly",
Self::Char(Sign::Signed) => "char",
_ => unreachable!("Not used: {:#?}", self),
}
}
@ -66,10 +84,13 @@ impl TypeKind {
/// Gets the rust prefix for the type kind i.e. i, u, f.
pub fn rust_prefix(&self) -> &str {
match self {
Self::BFloat => "bf",
Self::Float => "f",
Self::Int => "i",
Self::UInt => "u",
Self::Int(Sign::Signed) => "i",
Self::Int(Sign::Unsigned) => "u",
Self::Poly => "u",
Self::Char(Sign::Unsigned) => "u",
Self::Char(Sign::Signed) => "i",
_ => unreachable!("Unused type kind: {:#?}", self),
}
}
@ -133,11 +154,14 @@ impl IntrinsicType {
}
pub fn c_scalar_type(&self) -> String {
format!(
"{prefix}{bits}_t",
prefix = self.kind().c_prefix(),
bits = self.inner_size()
)
match self.kind() {
TypeKind::Char(_) => String::from("char"),
_ => format!(
"{prefix}{bits}_t",
prefix = self.kind().c_prefix(),
bits = self.inner_size()
),
}
}
pub fn rust_scalar_type(&self) -> String {
@ -155,8 +179,8 @@ impl IntrinsicType {
bit_len: Some(8),
..
} => match kind {
TypeKind::Int => "(int)",
TypeKind::UInt => "(unsigned int)",
TypeKind::Int(Sign::Signed) => "(int)",
TypeKind::Int(Sign::Unsigned) => "(unsigned int)",
TypeKind::Poly => "(unsigned int)(uint8_t)",
_ => "",
},
@ -172,6 +196,21 @@ impl IntrinsicType {
128 => "",
_ => panic!("invalid bit_len"),
},
IntrinsicType {
kind: TypeKind::Float,
bit_len: Some(bit_len),
..
} => match bit_len {
16 => "(float16_t)",
32 => "(float)",
64 => "(double)",
128 => "",
_ => panic!("invalid bit_len"),
},
IntrinsicType {
kind: TypeKind::Char(_),
..
} => "(char)",
_ => "",
}
}
@ -185,7 +224,7 @@ impl IntrinsicType {
match self {
IntrinsicType {
bit_len: Some(bit_len @ (8 | 16 | 32 | 64)),
kind: kind @ (TypeKind::Int | TypeKind::UInt | TypeKind::Poly),
kind: kind @ (TypeKind::Int(_) | TypeKind::Poly | TypeKind::Char(_)),
simd_len,
vec_len,
..
@ -201,7 +240,8 @@ impl IntrinsicType {
.format_with(",\n", |i, fmt| {
let src = value_for_array(*bit_len, i);
assert!(src == 0 || src.ilog2() < *bit_len);
if *kind == TypeKind::Int && (src >> (*bit_len - 1)) != 0 {
if *kind == TypeKind::Int(Sign::Signed) && (src >> (*bit_len - 1)) != 0
{
// `src` is a two's complement representation of a negative value.
let mask = !0u64 >> (64 - *bit_len);
let ones_compl = src ^ mask;
@ -257,7 +297,7 @@ impl IntrinsicType {
..
} => false,
IntrinsicType {
kind: TypeKind::Int | TypeKind::UInt | TypeKind::Poly,
kind: TypeKind::Int(_) | TypeKind::Poly,
..
} => true,
_ => unimplemented!(),
@ -282,7 +322,9 @@ pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {
fn get_lane_function(&self) -> String;
/// can be implemented in an `impl` block
fn from_c(_s: &str, _target: &str) -> Result<Box<Self>, String>;
fn from_c(_s: &str, _target: &str) -> Result<Self, String>
where
Self: Sized;
/// Gets a string containing the typename for this type in C format.
/// can be directly defined in `impl` blocks

View file

@ -1,5 +1,3 @@
use super::gen_c::create_c_test_program;
use super::gen_c::setup_c_file_paths;
use super::gen_rust::{create_rust_test_program, setup_rust_file_paths};
use super::intrinsic::IntrinsicDefinition;
use super::intrinsic_helpers::IntrinsicTypeDefinition;
@ -11,37 +9,6 @@ pub fn write_file(filename: &String, code: String) {
file.write_all(code.into_bytes().as_slice()).unwrap();
}
pub fn write_c_testfiles<T: IntrinsicTypeDefinition + Sized>(
intrinsics: &Vec<&dyn IntrinsicDefinition<T>>,
target: &str,
c_target: &str,
headers: &[&str],
notice: &str,
arch_specific_definitions: &[&str],
) -> Vec<String> {
let intrinsics_name_list = intrinsics
.iter()
.map(|i| i.name().clone())
.collect::<Vec<_>>();
let filename_mapping = setup_c_file_paths(&intrinsics_name_list);
intrinsics.iter().for_each(|&i| {
let c_code = create_c_test_program(
i,
headers,
target,
c_target,
notice,
arch_specific_definitions,
);
if let Some(filename) = filename_mapping.get(&i.name()) {
write_file(filename, c_code)
};
});
intrinsics_name_list
}
pub fn write_rust_testfiles<T: IntrinsicTypeDefinition>(
intrinsics: Vec<&dyn IntrinsicDefinition<T>>,
rust_target: &str,

View file

@ -30,12 +30,15 @@ fn main() {
let test_environment = test_environment_result.unwrap();
info!("building C binaries");
if !test_environment.build_c_file() {
std::process::exit(2);
}
info!("building Rust binaries");
if !test_environment.build_rust_file() {
std::process::exit(3);
}
info!("comaparing outputs");
if !test_environment.compare_outputs() {
std::process::exit(1);
}

View file

@ -57,12 +57,12 @@ pub fn simd_test(
.unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"))
{
"i686" | "x86_64" | "i586" => "is_x86_feature_detected",
"arm" | "armv7" => "is_arm_feature_detected",
"arm" | "armv7" | "thumbv7neon" => "is_arm_feature_detected",
"aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected",
maybe_riscv if maybe_riscv.starts_with("riscv") => "is_riscv_feature_detected",
"powerpc" | "powerpcle" => "is_powerpc_feature_detected",
"powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected",
"loongarch64" => "is_loongarch_feature_detected",
"loongarch32" | "loongarch64" => "is_loongarch_feature_detected",
"s390x" => "is_s390x_feature_detected",
t => panic!("unknown target: {t}"),
};

View file

@ -55,7 +55,7 @@ crate from working on applications in which `std` is not available.
application.
* Linux/Android:
* `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch64`, `s390x`:
* `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch{32,64}`, `s390x`:
`std_detect` supports these on Linux by querying ELF auxiliary vectors (using `getauxval`
when available), and if that fails, by querying `/proc/self/auxv`.
* `arm64`: partial support for doing run-time feature detection by directly

View file

@ -2,7 +2,7 @@
features! {
@TARGET: loongarch;
@CFG: target_arch = "loongarch64";
@CFG: any(target_arch = "loongarch32", target_arch = "loongarch64");
@MACRO_NAME: is_loongarch_feature_detected;
@MACRO_ATTRS:
/// Checks if `loongarch` feature is enabled.

View file

@ -49,7 +49,7 @@ cfg_if! {
} else if #[cfg(target_arch = "mips64")] {
#[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")]
pub use mips64::*;
} else if #[cfg(target_arch = "loongarch64")] {
} else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] {
#[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")]
pub use loongarch::*;
} else if #[cfg(target_arch = "s390x")] {

View file

@ -103,6 +103,7 @@ pub fn features() -> impl Iterator<Item = (&'static str, bool)> {
target_arch = "powerpc64",
target_arch = "mips",
target_arch = "mips64",
target_arch = "loongarch32",
target_arch = "loongarch64",
target_arch = "s390x",
))] {

View file

@ -80,6 +80,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
target_arch = "riscv64",
target_arch = "mips",
target_arch = "mips64",
target_arch = "loongarch32",
target_arch = "loongarch64",
))]
{
@ -182,6 +183,7 @@ fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> {
target_arch = "riscv64",
target_arch = "mips",
target_arch = "mips64",
target_arch = "loongarch32",
target_arch = "loongarch64",
))]
{

View file

@ -51,7 +51,7 @@ cfg_if::cfg_if! {
} else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] {
mod powerpc;
pub(crate) use self::powerpc::detect_features;
} else if #[cfg(target_arch = "loongarch64")] {
} else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] {
mod loongarch;
pub(crate) use self::loongarch::detect_features;
} else if #[cfg(target_arch = "s390x")] {

View file

@ -11,6 +11,7 @@
target_arch = "s390x",
target_arch = "riscv32",
target_arch = "riscv64",
target_arch = "loongarch32",
target_arch = "loongarch64"
),
feature(stdarch_internal)
@ -30,7 +31,7 @@
feature(stdarch_riscv_feature_detection)
)]
#![cfg_attr(
target_arch = "loongarch64",
any(target_arch = "loongarch32", target_arch = "loongarch64"),
feature(stdarch_loongarch_feature_detection)
)]
@ -45,6 +46,7 @@
target_arch = "s390x",
target_arch = "riscv32",
target_arch = "riscv64",
target_arch = "loongarch32",
target_arch = "loongarch64"
))]
#[macro_use]
@ -65,8 +67,8 @@ fn aarch64() {
}
#[test]
#[cfg(target_arch = "loongarch64")]
fn loongarch64() {
#[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))]
fn loongarch() {
let _ = is_loongarch_feature_detected!("lsx");
let _ = is_loongarch_feature_detected!("lsx",);
}

View file

@ -13,7 +13,6 @@ edition = "2024"
[dependencies]
itertools = "0.14.0"
lazy_static = "1.4.0"
proc-macro2 = "1.0"
quote = "1.0"
regex = "1.5"

View file

@ -187,7 +187,7 @@ intrinsics:
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
return_type: "{neon_type[1]}"
attr: [*neon-stable]
assert_instr: [sabdl]
assert_instr: [sabdl2]
safety: safe
types:
- [int8x16_t, int16x8_t, int8x8_t, uint8x8_t]
@ -230,7 +230,7 @@ intrinsics:
- stable
- - 'feature = "neon_intrinsics"'
- 'since = "1.59.0"'
assert_instr: [sabdl]
assert_instr: [sabdl2]
safety: safe
types:
- [int16x8_t, int32x4_t, int16x4_t, uint16x4_t]
@ -273,7 +273,7 @@ intrinsics:
- stable
- - 'feature = "neon_intrinsics"'
- 'since = "1.59.0"'
assert_instr: [sabdl]
assert_instr: [sabdl2]
safety: safe
types:
- [int32x4_t, int64x2_t, int32x2_t, uint32x2_t]
@ -1462,7 +1462,7 @@ intrinsics:
arguments: ["a: {neon_type[0]}"]
return_type: "{neon_type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
safety: safe
types:
@ -1530,7 +1530,7 @@ intrinsics:
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
return_type: "{neon_type[2]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
safety: safe
types:
@ -1582,7 +1582,7 @@ intrinsics:
arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
return_type: "{type[2]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn2]]}]]
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
safety: safe
types:
@ -5147,7 +5147,7 @@ intrinsics:
attr:
- *neon-stable
safety: safe
assert_instr: [pmull]
assert_instr: [pmull2]
types:
- [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t]
compose:
@ -5169,7 +5169,7 @@ intrinsics:
- *neon-aes
- *neon-stable
safety: safe
assert_instr: [pmull]
assert_instr: [pmull2]
types:
- [poly64x2_t, "p128"]
compose:
@ -5741,7 +5741,7 @@ intrinsics:
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
return_type: "{neon_type[0]}"
attr: [*neon-stable]
assert_instr: [ssubw]
assert_instr: [ssubw2]
safety: safe
types:
- [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@ -5762,7 +5762,7 @@ intrinsics:
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
return_type: "{neon_type[0]}"
attr: [*neon-stable]
assert_instr: [usubw]
assert_instr: [usubw2]
safety: safe
types:
- [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@ -5783,7 +5783,7 @@ intrinsics:
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
return_type: "{neon_type[1]}"
attr: [*neon-stable]
assert_instr: [ssubl]
assert_instr: [ssubl2]
safety: safe
types:
- [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t]
@ -5813,7 +5813,7 @@ intrinsics:
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
return_type: "{neon_type[1]}"
attr: [*neon-stable]
assert_instr: [usubl]
assert_instr: [usubl2]
safety: safe
types:
- [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t]
@ -6580,7 +6580,6 @@ intrinsics:
arch: aarch64,arm64ec
- name: "vmaxnm{neon_type.no}"
doc: Floating-point Maximum Number (vector)
arguments: ["a: {neon_type}", "b: {neon_type}"]
@ -6592,11 +6591,7 @@ intrinsics:
- float64x1_t
- float64x2_t
compose:
- LLVMLink:
name: "fmaxnm.{neon_type}"
links:
- link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
arch: aarch64,arm64ec
- FnCall: [simd_fmax, [a, b]]
- name: "vmaxnmh_{type}"
@ -6611,11 +6606,7 @@ intrinsics:
types:
- f16
compose:
- LLVMLink:
name: "vmaxh.{neon_type}"
links:
- link: "llvm.aarch64.neon.fmaxnm.{type}"
arch: aarch64,arm64ec
- FnCall: ["f16::max", [a, b]]
- name: "vminnmh_{type}"
@ -6630,11 +6621,7 @@ intrinsics:
types:
- f16
compose:
- LLVMLink:
name: "vminh.{neon_type}"
links:
- link: "llvm.aarch64.neon.fminnm.{type}"
arch: aarch64,arm64ec
- FnCall: ["f16::min", [a, b]]
- name: "vmaxnmv{neon_type[0].no}"
@ -6648,11 +6635,7 @@ intrinsics:
- [float32x2_t, f32]
- [float64x2_t, f64]
compose:
- LLVMLink:
name: "fmaxnmv.{neon_type[0]}"
links:
- link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_max, [a]]
- name: "vmaxnmv{neon_type[0].no}"
doc: Floating-point maximum number across vector
@ -6664,11 +6647,7 @@ intrinsics:
types:
- [float32x4_t, f32]
compose:
- LLVMLink:
name: "fmaxnmv.{neon_type[0]}"
links:
- link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_max, [a]]
- name: "vmaxnmv{neon_type[0].no}"
@ -6684,11 +6663,7 @@ intrinsics:
- [float16x4_t, f16]
- [float16x8_t, f16]
compose:
- LLVMLink:
name: "fmaxnmv.{neon_type[0]}"
links:
- link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_max, [a]]
- name: "vminnmv{neon_type[0].no}"
@ -6704,11 +6679,7 @@ intrinsics:
- [float16x4_t, f16]
- [float16x8_t, f16]
compose:
- LLVMLink:
name: "fminnmv.{neon_type[0]}"
links:
- link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_min, [a]]
- name: "vmaxv{neon_type[0].no}"
@ -6814,11 +6785,7 @@ intrinsics:
- float64x1_t
- float64x2_t
compose:
- LLVMLink:
name: "fminnm.{neon_type}"
links:
- link: "llvm.aarch64.neon.fminnm.{neon_type}"
arch: aarch64,arm64ec
- FnCall: [simd_fmin, [a, b]]
- name: "vminnmv{neon_type[0].no}"
doc: "Floating-point minimum number across vector"
@ -6832,11 +6799,7 @@ intrinsics:
- [float32x2_t, "f32"]
- [float64x2_t, "f64"]
compose:
- LLVMLink:
name: "vminnmv.{neon_type[0]}"
links:
- link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_min, [a]]
- name: "vminnmv{neon_type[0].no}"
doc: "Floating-point minimum number across vector"
@ -6849,11 +6812,7 @@ intrinsics:
types:
- [float32x4_t, "f32"]
compose:
- LLVMLink:
name: "vminnmv.{neon_type[0]}"
links:
- link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_min, [a]]
- name: "vmovl_high{neon_type[0].noq}"
doc: Vector move
@ -9950,7 +9909,7 @@ intrinsics:
return_type: "{neon_type[0]}"
attr:
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal]]}]]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal2]]}]]
safety: safe
types:
- [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
@ -9977,7 +9936,7 @@ intrinsics:
return_type: "{neon_type[0]}"
attr:
- *neon-stable
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal]]}]]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal2]]}]]
safety: safe
types:
- [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t]
@ -11386,7 +11345,7 @@ intrinsics:
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
return_type: "{neon_type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl]]}]]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl2]]}]]
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
safety: safe
types:
@ -13023,6 +12982,26 @@ intrinsics:
- link: "llvm.aarch64.crc32cx"
arch: aarch64,arm64ec
- name: "vabsd_s64"
doc: "Absolute Value (wrapping)."
arguments: ["a: {type[1]}"]
return_type: "{type[1]}"
attr:
- *neon-stable
assert_instr: [abs]
safety: safe
types:
- [i64, i64]
compose:
# This is behaviorally equivalent to `i64::wrapping_abs`, but keeps the value in a SIMD
# register. That can be beneficial when combined with other instructions. This LLVM
# issue provides some extra context https://github.com/llvm/llvm-project/issues/148388.
- LLVMLink:
name: "vabsd_s64"
links:
- link: "llvm.aarch64.neon.abs.i64"
arch: aarch64,arm64ec
- name: "{type[0]}"
doc: "Absolute Value (wrapping)."
arguments: ["a: {type[1]}"]
@ -13032,15 +13011,18 @@ intrinsics:
assert_instr: [abs]
safety: safe
types:
- ['vabsd_s64', i64, i64]
- ['vabs_s64', int64x1_t, v1i64]
- ['vabsq_s64', int64x2_t, v2i64]
compose:
- LLVMLink:
name: "{type[0]}"
links:
- link: "llvm.aarch64.neon.abs.{type[2]}"
arch: aarch64,arm64ec
- Let:
- neg
- "{type[1]}"
- FnCall: [simd_neg, [a]]
- Let:
- mask
- "{type[1]}"
- FnCall: [simd_ge, [a, neg]]
- FnCall: [simd_select, [mask, a, neg]]
- name: "vuqadd{neon_type[0].no}"
doc: "Signed saturating Accumulate of Unsigned value."
@ -13142,11 +13124,7 @@ intrinsics:
types:
- [int64x2_t, i64]
compose:
- FnCall:
- transmute
- - FnCall:
- "vaddvq_u64"
- - FnCall: [transmute, [a]]
- FnCall: [simd_reduce_add_unordered, [a]]
- name: "vpaddd_u64"
doc: "Add pairwise"
@ -13159,7 +13137,7 @@ intrinsics:
types:
- [uint64x2_t, u64]
compose:
- FnCall: [vaddvq_u64, [a]]
- FnCall: [simd_reduce_add_unordered, [a]]
- name: "vaddv{neon_type[0].no}"
doc: "Add across vector"
@ -13176,11 +13154,7 @@ intrinsics:
- [int16x8_t, i16]
- [int32x4_t, i32]
compose:
- LLVMLink:
name: "vaddv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.saddv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_add_unordered, [a]]
- name: "vaddv{neon_type[0].no}"
doc: "Add across vector"
@ -13193,11 +13167,7 @@ intrinsics:
types:
- [int32x2_t, i32]
compose:
- LLVMLink:
name: "vaddv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.saddv.i32.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_add_unordered, [a]]
- name: "vaddv{neon_type[0].no}"
doc: "Add across vector"
@ -13210,11 +13180,7 @@ intrinsics:
types:
- [int64x2_t, i64]
compose:
- LLVMLink:
name: "vaddv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.saddv.i64.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_add_unordered, [a]]
- name: "vaddv{neon_type[0].no}"
doc: "Add across vector"
@ -13231,11 +13197,7 @@ intrinsics:
- [uint16x8_t, u16]
- [uint32x4_t, u32]
compose:
- LLVMLink:
name: "vaddv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.uaddv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_add_unordered, [a]]
- name: "vaddv{neon_type[0].no}"
doc: "Add across vector"
@ -13248,11 +13210,7 @@ intrinsics:
types:
- [uint32x2_t, u32, i32]
compose:
- LLVMLink:
name: "vaddv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_add_unordered, [a]]
- name: "vaddv{neon_type[0].no}"
doc: "Add across vector"
@ -13265,11 +13223,7 @@ intrinsics:
types:
- [uint64x2_t, u64, i64]
compose:
- LLVMLink:
name: "vaddv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_add_unordered, [a]]
- name: "vaddlv{neon_type[0].no}"
doc: "Signed Add Long across Vector"
@ -13327,11 +13281,7 @@ intrinsics:
- [int16x8_t, i16, 'smaxv']
- [int32x4_t, i32, 'smaxv']
compose:
- LLVMLink:
name: "vmaxv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.smaxv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_max, [a]]
- name: "vmaxv{neon_type[0].no}"
doc: "Horizontal vector max."
@ -13349,11 +13299,7 @@ intrinsics:
- [uint16x8_t, u16, 'umaxv']
- [uint32x4_t, u32, 'umaxv']
compose:
- LLVMLink:
name: "vmaxv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.umaxv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_max, [a]]
- name: "vmaxv{neon_type[0].no}"
doc: "Horizontal vector max."
@ -13390,11 +13336,7 @@ intrinsics:
- [int16x8_t, i16, 'sminv']
- [int32x4_t, i32, 'sminv']
compose:
- LLVMLink:
name: "vminv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.sminv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_min, [a]]
- name: "vminv{neon_type[0].no}"
doc: "Horizontal vector min."
@ -13412,11 +13354,7 @@ intrinsics:
- [uint16x8_t, u16, 'uminv']
- [uint32x4_t, u32, 'uminv']
compose:
- LLVMLink:
name: "vminv{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.uminv.{type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: [simd_reduce_min, [a]]
- name: "vminv{neon_type[0].no}"
doc: "Horizontal vector min."

View file

@ -7135,13 +7135,8 @@ intrinsics:
- int32x2_t
- int32x4_t
compose:
- LLVMLink:
name: "smax.{neon_type}"
links:
- link: "llvm.arm.neon.vmaxs.{neon_type}"
arch: arm
- link: "llvm.aarch64.neon.smax.{neon_type}"
arch: aarch64,arm64ec
- Let: [mask, "{neon_type}", {FnCall: [simd_ge, [a, b]]}]
- FnCall: [simd_select, [mask, a, b]]
- name: "vmax{neon_type.no}"
doc: Maximum (vector)
@ -7162,13 +7157,8 @@ intrinsics:
- uint32x2_t
- uint32x4_t
compose:
- LLVMLink:
name: "smax.{neon_type}"
links:
- link: "llvm.arm.neon.vmaxu.{neon_type}"
arch: arm
- link: "llvm.aarch64.neon.umax.{neon_type}"
arch: aarch64,arm64ec
- Let: [mask, "{neon_type}", {FnCall: [simd_ge, [a, b]]}]
- FnCall: [simd_select, [mask, a, b]]
- name: "vmax{neon_type.no}"
doc: Maximum (vector)
@ -7233,13 +7223,7 @@ intrinsics:
- float32x2_t
- float32x4_t
compose:
- LLVMLink:
name: "fmaxnm.{neon_type}"
links:
- link: "llvm.arm.neon.vmaxnm.{neon_type}"
arch: arm
- link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
arch: aarch64,arm64ec
- FnCall: [simd_fmax, [a, b]]
- name: "vmaxnm{neon_type.no}"
@ -7257,13 +7241,7 @@ intrinsics:
- float16x4_t
- float16x8_t
compose:
- LLVMLink:
name: "fmaxnm.{neon_type}"
links:
- link: "llvm.arm.neon.vmaxnm.{neon_type}"
arch: arm
- link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
arch: aarch64,arm64ec
- FnCall: [simd_fmax, [a, b]]
- name: "vminnm{neon_type.no}"
@ -7281,13 +7259,7 @@ intrinsics:
- float16x4_t
- float16x8_t
compose:
- LLVMLink:
name: "fminnm.{neon_type}"
links:
- link: "llvm.arm.neon.vminnm.{neon_type}"
arch: arm
- link: "llvm.aarch64.neon.fminnm.{neon_type}"
arch: aarch64,arm64ec
- FnCall: [simd_fmin, [a, b]]
- name: "vmin{neon_type.no}"
@ -7309,13 +7281,8 @@ intrinsics:
- int32x2_t
- int32x4_t
compose:
- LLVMLink:
name: "smin.{neon_type}"
links:
- link: "llvm.arm.neon.vmins.{neon_type}"
arch: arm
- link: "llvm.aarch64.neon.smin.{neon_type}"
arch: aarch64,arm64ec
- Let: [mask, "{neon_type}", {FnCall: [simd_le, [a, b]]}]
- FnCall: [simd_select, [mask, a, b]]
- name: "vmin{neon_type.no}"
doc: "Minimum (vector)"
@ -7336,13 +7303,8 @@ intrinsics:
- uint32x2_t
- uint32x4_t
compose:
- LLVMLink:
name: "umin.{neon_type}"
links:
- link: "llvm.arm.neon.vminu.{neon_type}"
arch: arm
- link: "llvm.aarch64.neon.umin.{neon_type}"
arch: aarch64,arm64ec
- Let: [mask, "{neon_type}", {FnCall: [simd_le, [a, b]]}]
- FnCall: [simd_select, [mask, a, b]]
- name: "vmin{neon_type.no}"
doc: "Minimum (vector)"
@ -7408,13 +7370,7 @@ intrinsics:
- float32x2_t
- float32x4_t
compose:
- LLVMLink:
name: "fminnm.{neon_type}"
links:
- link: "llvm.arm.neon.vminnm.{neon_type}"
arch: arm
- link: "llvm.aarch64.neon.fminnm.{neon_type}"
arch: aarch64,arm64ec
- FnCall: [simd_fmin, [a, b]]
- name: "vpadd{neon_type.no}"
doc: Floating-point add pairwise
@ -7874,9 +7830,9 @@ intrinsics:
static_defs: ['const N: i32']
safety: safe
types:
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
compose:
- FnCall: [static_assert!, ["{type[2]}"]]
- LLVMLink:
@ -7929,9 +7885,9 @@ intrinsics:
static_defs: ['const N: i32']
safety: safe
types:
- [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
- [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
- [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
- [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
- [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
- [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
compose:
- FnCall: [static_assert!, ["{type[2]}"]]
- LLVMLink:
@ -8105,9 +8061,9 @@ intrinsics:
static_defs: ['const N: i32']
safety: safe
types:
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
compose:
- FnCall: [static_assert!, ["{type[2]}"]]
- LLVMLink:
@ -8215,9 +8171,9 @@ intrinsics:
static_defs: ['const N: i32']
safety: safe
types:
- [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
- [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
- [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
- [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
- [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
- [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
compose:
- FnCall: [static_assert!, ["{type[2]}"]]
- LLVMLink:
@ -8939,9 +8895,9 @@ intrinsics:
static_defs: ['const N: i32']
safety: safe
types:
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
- [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16; 8]) }']
- [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N; 4]) }']
- [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64; 2]) }']
compose:
- FnCall: [static_assert!, ["{type[2]}"]]
- LLVMLink:
@ -9576,7 +9532,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn2]]}]]
- *neon-not-arm-stable
- *neon-cfg-arm-unstable
safety: safe
@ -9617,7 +9574,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn2]]}]]
- *neon-fp16
- *neon-unstable-f16
safety: safe
@ -9645,7 +9603,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
- *neon-not-arm-stable
- *neon-cfg-arm-unstable
safety: safe
@ -9673,7 +9632,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vorr]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
- *neon-not-arm-stable
- *neon-cfg-arm-unstable
safety: safe
@ -9707,7 +9667,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
- *neon-not-arm-stable
- *neon-cfg-arm-unstable
safety: safe
@ -9735,7 +9696,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vzip]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
- *neon-not-arm-stable
- *neon-cfg-arm-unstable
safety: safe
@ -9767,7 +9729,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vzip.16"']]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
- *neon-fp16
- *neon-unstable-f16
safety: safe
@ -9794,7 +9757,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp2]]}]]
- *neon-not-arm-stable
- *neon-cfg-arm-unstable
safety: safe
@ -9835,7 +9799,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp2]]}]]
- *neon-fp16
- *neon-unstable-f16
safety: safe
@ -9863,7 +9828,8 @@ intrinsics:
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
- *neon-not-arm-stable
- *neon-cfg-arm-unstable
safety: safe
@ -12881,13 +12847,16 @@ intrinsics:
- int16x8_t
- int32x4_t
compose:
- LLVMLink:
name: "vabs{neon_type.no}"
links:
- link: "llvm.aarch64.neon.abs.{neon_type}"
arch: aarch64,arm64ec
- link: "llvm.arm.neon.vabs.{neon_type}"
arch: arm
- Let:
- neg
- "{neon_type}"
- FnCall: [simd_neg, [a]]
- Let:
- mask
- "{neon_type}"
- FnCall: [simd_ge, [a, neg]]
- FnCall: [simd_select, [mask, a, neg]]
- name: "vpmin{neon_type.no}"
doc: "Folding minimum of adjacent pairs"
@ -13862,8 +13831,8 @@ intrinsics:
- [int8x16_t, '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8']
- [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16']
- [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16']
- [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N as i32']
- [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N as i32']
- [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N']
- [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N']
- [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64']
- [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64']
compose:
@ -13891,8 +13860,8 @@ intrinsics:
- [uint8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8']
- [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16']
- [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16']
- [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N as i32']
- [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N as i32']
- [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N']
- [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N']
- [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64']
- [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64']
- [poly8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8']
@ -14138,6 +14107,7 @@ intrinsics:
doc: "Load one single-element structure and Replicate to all lanes (of one register)."
arguments: ["ptr: {type[1]}"]
return_type: "{neon_type[2]}"
big_endian_inverse: false
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']] } ]]
@ -14147,40 +14117,36 @@ intrinsics:
safety:
unsafe: [neon]
types:
- ['vld1_dup_s8', '*const i8', 'int8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_s8::<0>', 'i8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
- ['vld1_dup_u8', '*const u8', 'uint8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_u8::<0>', 'u8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
- ['vld1_dup_p8', '*const p8', 'poly8x8_t', 'vld1.8', 'ld1r', 'vld1_lane_p8::<0>', 'u8x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
- ['vld1_dup_s8', '*const i8', 'int8x8_t', 'vld1.8', 'ld1r', 'i8x8::splat']
- ['vld1_dup_u8', '*const u8', 'uint8x8_t', 'vld1.8', 'ld1r', 'u8x8::splat']
- ['vld1_dup_p8', '*const p8', 'poly8x8_t', 'vld1.8', 'ld1r', 'u8x8::splat']
- ['vld1q_dup_s8', '*const i8', 'int8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_s8::<0>', 'i8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']
- ['vld1q_dup_u8', '*const u8', 'uint8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_u8::<0>', 'u8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']
- ['vld1q_dup_p8', '*const p8', 'poly8x16_t', 'vld1.8', 'ld1r', 'vld1q_lane_p8::<0>', 'u8x16::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']
- ['vld1q_dup_s8', '*const i8', 'int8x16_t', 'vld1.8', 'ld1r', 'i8x16::splat']
- ['vld1q_dup_u8', '*const u8', 'uint8x16_t', 'vld1.8', 'ld1r', 'u8x16::splat']
- ['vld1q_dup_p8', '*const p8', 'poly8x16_t', 'vld1.8', 'ld1r', 'u8x16::splat']
- ['vld1_dup_s16', '*const i16', 'int16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_s16::<0>', 'i16x4::splat(0)', '[0, 0, 0, 0]']
- ['vld1_dup_u16', '*const u16', 'uint16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_u16::<0>', 'u16x4::splat(0)', '[0, 0, 0, 0]']
- ['vld1_dup_p16', '*const p16', 'poly16x4_t', 'vld1.16', 'ld1r', 'vld1_lane_p16::<0>', 'u16x4::splat(0)', '[0, 0, 0, 0]']
- ['vld1_dup_s16', '*const i16', 'int16x4_t', 'vld1.16', 'ld1r', 'i16x4::splat']
- ['vld1_dup_u16', '*const u16', 'uint16x4_t', 'vld1.16', 'ld1r', 'u16x4::splat']
- ['vld1_dup_p16', '*const p16', 'poly16x4_t', 'vld1.16', 'ld1r', 'u16x4::splat']
- ['vld1q_dup_s16', '*const i16', 'int16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_s16::<0>', 'i16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
- ['vld1q_dup_u16', '*const u16', 'uint16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_u16::<0>', 'u16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
- ['vld1q_dup_p16', '*const p16', 'poly16x8_t', 'vld1.16', 'ld1r', 'vld1q_lane_p16::<0>', 'u16x8::splat(0)', '[0, 0, 0, 0, 0, 0, 0, 0]']
- ['vld1q_dup_s16', '*const i16', 'int16x8_t', 'vld1.16', 'ld1r', 'i16x8::splat']
- ['vld1q_dup_u16', '*const u16', 'uint16x8_t', 'vld1.16', 'ld1r', 'u16x8::splat']
- ['vld1q_dup_p16', '*const p16', 'poly16x8_t', 'vld1.16', 'ld1r', 'u16x8::splat']
- ['vld1_dup_s32', '*const i32', 'int32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_s32::<0>', 'i32x2::splat(0)', '[0, 0]']
- ['vld1_dup_u32', '*const u32', 'uint32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_u32::<0>', 'u32x2::splat(0)', '[0, 0]']
- ['vld1_dup_f32', '*const f32', 'float32x2_t', 'vld1.32', 'ld1r', 'vld1_lane_f32::<0>', 'f32x2::splat(0.0)', '[0, 0]']
- ['vld1_dup_s32', '*const i32', 'int32x2_t', 'vld1.32', 'ld1r', 'i32x2::splat']
- ['vld1_dup_u32', '*const u32', 'uint32x2_t', 'vld1.32', 'ld1r', 'u32x2::splat']
- ['vld1_dup_f32', '*const f32', 'float32x2_t', 'vld1.32', 'ld1r', 'f32x2::splat']
- ['vld1q_dup_s32', '*const i32', 'int32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_s32::<0>', 'i32x4::splat(0)', '[0, 0, 0, 0]']
- ['vld1q_dup_u32', '*const u32', 'uint32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_u32::<0>', 'u32x4::splat(0)', '[0, 0, 0, 0]']
- ['vld1q_dup_f32', '*const f32', 'float32x4_t', 'vld1.32', 'ld1r', 'vld1q_lane_f32::<0>', 'f32x4::splat(0.0)', '[0, 0, 0, 0]']
- ['vld1q_dup_s32', '*const i32', 'int32x4_t', 'vld1.32', 'ld1r', 'i32x4::splat']
- ['vld1q_dup_u32', '*const u32', 'uint32x4_t', 'vld1.32', 'ld1r', 'u32x4::splat']
- ['vld1q_dup_f32', '*const f32', 'float32x4_t', 'vld1.32', 'ld1r', 'f32x4::splat']
- ['vld1q_dup_s64', '*const i64', 'int64x2_t', 'vldr', 'ld1', 'vld1q_lane_s64::<0>', 'i64x2::splat(0)', '[0, 0]']
- ['vld1q_dup_u64', '*const u64', 'uint64x2_t', 'vldr', 'ld1', 'vld1q_lane_u64::<0>', 'u64x2::splat(0)', '[0, 0]']
- ['vld1q_dup_s64', '*const i64', 'int64x2_t', 'vldr', 'ld1r', 'i64x2::splat']
- ['vld1q_dup_u64', '*const u64', 'uint64x2_t', 'vldr', 'ld1r', 'u64x2::splat']
compose:
- Let:
- x
- FnCall:
- '{type[5]}'
- - ptr
- FnCall: [transmute, ['{type[6]}']]
- FnCall: ['simd_shuffle!', [x, x, '{type[7]}']]
- FnCall:
- transmute
- - FnCall: ['{type[5]}', ["*ptr"]]
- name: "{type[0]}"
doc: "Absolute difference and accumulate (64-bit)"

View file

@ -1,5 +1,4 @@
use itertools::Itertools;
use lazy_static::lazy_static;
use proc_macro2::{Literal, Punct, Spacing, TokenStream};
use quote::{ToTokens, TokenStreamExt, format_ident, quote};
use regex::Regex;
@ -7,6 +6,7 @@ use serde::de::{self, MapAccess, Visitor};
use serde::{Deserialize, Deserializer, Serialize};
use std::fmt;
use std::str::FromStr;
use std::sync::LazyLock;
use crate::intrinsic::Intrinsic;
use crate::wildstring::WildStringPart;
@ -374,10 +374,8 @@ impl FromStr for Expression {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
lazy_static! {
static ref MACRO_RE: Regex =
Regex::new(r"^(?P<name>[\w\d_]+)!\((?P<ex>.*?)\);?$").unwrap();
}
static MACRO_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^(?P<name>[\w\d_]+)!\((?P<ex>.*?)\);?$").unwrap());
if s == "SvUndef" {
Ok(Expression::SvUndef)

View file

@ -2,6 +2,7 @@ use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::LazyLock;
use crate::format_code;
use crate::input::InputType;
@ -10,7 +11,6 @@ use crate::typekinds::BaseType;
use crate::typekinds::{ToRepr, TypeKind};
use itertools::Itertools;
use lazy_static::lazy_static;
use proc_macro2::TokenStream;
use quote::{format_ident, quote};
@ -639,8 +639,8 @@ impl LdIntrCharacteristics {
}
}
lazy_static! {
static ref PREAMBLE: String = format!(
static PREAMBLE: LazyLock<String> = LazyLock::new(|| {
format!(
r#"#![allow(unused)]
use super::*;
@ -801,13 +801,11 @@ fn assert_vector_matches_u64(vector: svuint64_t, expected: svuint64_t) {{
assert!(!svptest_any(defined, cmp))
}}
"#
);
}
)
});
lazy_static! {
static ref MANUAL_TESTS: String = format!(
"#[simd_test(enable = \"sve\")]
unsafe fn test_ffr() {{
const MANUAL_TESTS: &str = "#[simd_test(enable = \"sve\")]
unsafe fn test_ffr() {
svsetffr();
let ffr = svrdffr();
assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svindex_u8(1, 0));
@ -816,7 +814,5 @@ unsafe fn test_ffr() {{
svwrffr(pred);
let ffr = svrdffr_z(svptrue_b8());
assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svdup_n_u8_z(pred, 1));
}}
"
);
}
";

View file

@ -1,10 +1,10 @@
use lazy_static::lazy_static;
use proc_macro2::TokenStream;
use quote::{ToTokens, TokenStreamExt, quote};
use regex::Regex;
use serde_with::{DeserializeFromStr, SerializeDisplay};
use std::fmt;
use std::str::FromStr;
use std::sync::LazyLock;
use crate::context;
use crate::expression::{Expression, FnCall};
@ -496,9 +496,9 @@ impl FromStr for VectorType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
lazy_static! {
static ref RE: Regex = Regex::new(r"^(?:(?:sv(?P<sv_ty>(?:uint|int|bool|float)(?:\d+)?))|(?:(?P<ty>(?:uint|int|bool|poly|float)(?:\d+)?)x(?P<lanes>(?:\d+)?)))(?:x(?P<tuple_size>2|3|4))?_t$").unwrap();
}
static RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^(?:(?:sv(?P<sv_ty>(?:uint|int|bool|float)(?:\d+)?))|(?:(?P<ty>(?:uint|int|bool|poly|float)(?:\d+)?)x(?P<lanes>(?:\d+)?)))(?:x(?P<tuple_size>2|3|4))?_t$").unwrap()
});
if let Some(c) = RE.captures(s) {
let (base_type, lanes) = Self::sanitise_lanes(
@ -698,9 +698,8 @@ impl FromStr for BaseType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
lazy_static! {
static ref RE: Regex = Regex::new(r"^(?P<kind>[a-zA-Z]+)(?P<size>\d+)?(_t)?$").unwrap();
}
static RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^(?P<kind>[a-zA-Z]+)(?P<size>\d+)?(_t)?$").unwrap());
if let Some(c) = RE.captures(s) {
let kind = c["kind"].parse()?;

View file

@ -1,8 +1,7 @@
use lazy_static::lazy_static;
use regex::Regex;
use serde_with::{DeserializeFromStr, SerializeDisplay};
use std::fmt;
use std::str::FromStr;
use std::{fmt, sync::LazyLock};
use crate::{
fn_suffix::SuffixKind,
@ -66,9 +65,9 @@ impl FromStr for Wildcard {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
lazy_static! {
static ref RE: Regex = Regex::new(r"^(?P<wildcard>\w+?)(?:_x(?P<tuple_size>[2-4]))?(?:\[(?P<index>\d+)\])?(?:\.(?P<modifiers>\w+))?(?:\s+as\s+(?P<scale_to>.*?))?$").unwrap();
}
static RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^(?P<wildcard>\w+?)(?:_x(?P<tuple_size>[2-4]))?(?:\[(?P<index>\d+)\])?(?:\.(?P<modifiers>\w+))?(?:\s+as\s+(?P<scale_to>.*?))?$").unwrap()
});
if let Some(c) = RE.captures(s) {
let wildcard_name = &c["wildcard"];

View file

@ -11,7 +11,6 @@ LSX:
# Generate bindings
OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsxintrin.h
OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsx.spec
rustfmt crates/core_arch/src/loongarch64/lsx/generated.rs
# Generate tests
OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lsx.spec test
@ -25,7 +24,6 @@ LASX:
# Generate bindings
OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasxintrin.h
OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasx.spec
rustfmt crates/core_arch/src/loongarch64/lasx/generated.rs
# Generate tests
OUT_DIR=`pwd`/crates/stdarch-gen-loongarch cargo run -p stdarch-gen-loongarch -- crates/stdarch-gen-loongarch/lasx.spec test

View file

@ -274,13 +274,14 @@ fn gen_bind_body(
}
};
let is_mem = in_t.iter().any(|s| s.contains("POINTER"));
let is_store = current_name.to_string().contains("vst");
let link_function = {
let fn_decl = {
let fn_output = if out_t.to_lowercase() == "void" {
String::new()
} else {
format!("-> {}", type_to_rst(out_t, is_store))
format!(" -> {}", type_to_rst(out_t, is_store))
};
let fn_inputs = match para_num {
1 => format!("(a: {})", type_to_rst(in_t[0], is_store)),
@ -304,7 +305,7 @@ fn gen_bind_body(
),
_ => panic!("unsupported parameter number"),
};
format!("fn __{current_name}{fn_inputs} {fn_output};")
format!("fn __{current_name}{fn_inputs}{fn_output};")
};
let function = format!(
r#" #[link_name = "llvm.loongarch.{}"]
@ -456,31 +457,40 @@ fn gen_bind_body(
};
rustc_legacy_const_generics = "rustc_legacy_const_generics(2, 3)";
}
format!("pub unsafe fn {current_name}{fn_inputs} {fn_output}")
format!(
"pub {}fn {current_name}{fn_inputs} {fn_output}",
if is_mem { "unsafe " } else { "" }
)
};
let unsafe_start = if !is_mem { "unsafe { " } else { "" };
let unsafe_end = if !is_mem { " }" } else { "" };
let mut call_params = {
match para_num {
1 => format!("__{current_name}(a)"),
2 => format!("__{current_name}(a, b)"),
3 => format!("__{current_name}(a, b, c)"),
4 => format!("__{current_name}(a, b, c, d)"),
1 => format!("{unsafe_start}__{current_name}(a){unsafe_end}"),
2 => format!("{unsafe_start}__{current_name}(a, b){unsafe_end}"),
3 => format!("{unsafe_start}__{current_name}(a, b, c){unsafe_end}"),
4 => format!("{unsafe_start}__{current_name}(a, b, c, d){unsafe_end}"),
_ => panic!("unsupported parameter number"),
}
};
if para_num == 1 && in_t[0] == "HI" {
call_params = match asm_fmts[1].as_str() {
"si10" => {
format!("static_assert_simm_bits!(IMM_S10, 10);\n __{current_name}(IMM_S10)")
format!(
"static_assert_simm_bits!(IMM_S10, 10);\n {unsafe_start}__{current_name}(IMM_S10){unsafe_end}"
)
}
"i13" => {
format!("static_assert_simm_bits!(IMM_S13, 13);\n __{current_name}(IMM_S13)")
format!(
"static_assert_simm_bits!(IMM_S13, 13);\n {unsafe_start}__{current_name}(IMM_S13){unsafe_end}"
)
}
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
}
} else if para_num == 2 && (in_t[1] == "UQI" || in_t[1] == "USI") {
call_params = if asm_fmts[2].starts_with("ui") {
format!(
"static_assert_uimm_bits!(IMM{0}, {0});\n __{current_name}(a, IMM{0})",
"static_assert_uimm_bits!(IMM{0}, {0});\n {unsafe_start}__{current_name}(a, IMM{0}){unsafe_end}",
asm_fmts[2].get(2..).unwrap()
)
} else {
@ -489,14 +499,16 @@ fn gen_bind_body(
} else if para_num == 2 && in_t[1] == "QI" {
call_params = match asm_fmts[2].as_str() {
"si5" => {
format!("static_assert_simm_bits!(IMM_S5, 5);\n __{current_name}(a, IMM_S5)")
format!(
"static_assert_simm_bits!(IMM_S5, 5);\n {unsafe_start}__{current_name}(a, IMM_S5){unsafe_end}"
)
}
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
};
} else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "SI" {
call_params = if asm_fmts[2].starts_with("si") {
format!(
"static_assert_simm_bits!(IMM_S{0}, {0});\n __{current_name}(mem_addr, IMM_S{0})",
"static_assert_simm_bits!(IMM_S{0}, {0});\n {unsafe_start}__{current_name}(mem_addr, IMM_S{0}){unsafe_end}",
asm_fmts[2].get(2..).unwrap()
)
} else {
@ -504,13 +516,13 @@ fn gen_bind_body(
}
} else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "DI" {
call_params = match asm_fmts[2].as_str() {
"rk" => format!("__{current_name}(mem_addr, b)"),
"rk" => format!("{unsafe_start}__{current_name}(mem_addr, b){unsafe_end}"),
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
};
} else if para_num == 3 && (in_t[2] == "USI" || in_t[2] == "UQI") {
call_params = if asm_fmts[2].starts_with("ui") {
format!(
"static_assert_uimm_bits!(IMM{0}, {0});\n __{current_name}(a, b, IMM{0})",
"static_assert_uimm_bits!(IMM{0}, {0});\n {unsafe_start}__{current_name}(a, b, IMM{0}){unsafe_end}",
asm_fmts[2].get(2..).unwrap()
)
} else {
@ -519,19 +531,19 @@ fn gen_bind_body(
} else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "SI" {
call_params = match asm_fmts[2].as_str() {
"si12" => format!(
"static_assert_simm_bits!(IMM_S12, 12);\n __{current_name}(a, mem_addr, IMM_S12)"
"static_assert_simm_bits!(IMM_S12, 12);\n {unsafe_start}__{current_name}(a, mem_addr, IMM_S12){unsafe_end}"
),
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
};
} else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "DI" {
call_params = match asm_fmts[2].as_str() {
"rk" => format!("__{current_name}(a, mem_addr, b)"),
"rk" => format!("{unsafe_start}__{current_name}(a, mem_addr, b){unsafe_end}"),
_ => panic!("unsupported assembly format: {}", asm_fmts[2]),
};
} else if para_num == 4 {
call_params = match (asm_fmts[2].as_str(), current_name.chars().last().unwrap()) {
("si8", t) => format!(
"static_assert_simm_bits!(IMM_S8, 8);\n static_assert_uimm_bits!(IMM{0}, {0});\n __{current_name}(a, mem_addr, IMM_S8, IMM{0})",
"static_assert_simm_bits!(IMM_S8, 8);\n static_assert_uimm_bits!(IMM{0}, {0});\n {unsafe_start}__{current_name}(a, mem_addr, IMM_S8, IMM{0}){unsafe_end}",
type_to_imm(t)
),
(_, _) => panic!(

View file

@ -7,7 +7,6 @@ edition = "2024"
[dependencies]
assert-instr-macro = { path = "../assert-instr-macro" }
simd-test-macro = { path = "../simd-test-macro" }
lazy_static = "1.0"
rustc-demangle = "0.1.8"
cfg-if = "1.0"
@ -20,7 +19,7 @@ cc = "1.0"
# time, and we want to make updates to this explicit rather than automatically
# picking up updates which might break CI with new instruction names.
[target.'cfg(target_arch = "wasm32")'.dependencies]
wasmprinter = "=0.2.67"
wasmprinter = "=0.235"
[features]
default = []

View file

@ -6,14 +6,12 @@
#![deny(rust_2018_idioms)]
#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate cfg_if;
pub use assert_instr_macro::*;
pub use simd_test_macro::*;
use std::{cmp, collections::HashSet, env, hash, hint::black_box, str};
use std::{cmp, collections::HashSet, env, hash, hint::black_box, str, sync::LazyLock};
cfg_if! {
if #[cfg(target_arch = "wasm32")] {
@ -25,9 +23,7 @@ cfg_if! {
}
}
lazy_static! {
static ref DISASSEMBLY: HashSet<Function> = disassemble_myself();
}
static DISASSEMBLY: LazyLock<HashSet<Function>> = LazyLock::new(disassemble_myself);
#[derive(Debug)]
struct Function {
@ -65,11 +61,12 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
black_box(shim_addr);
//eprintln!("shim name: {fnname}");
let function = &DISASSEMBLY
.get(&Function::new(fnname))
.unwrap_or_else(|| panic!("function \"{fnname}\" not found in the disassembly"));
let Some(function) = &DISASSEMBLY.get(&Function::new(fnname)) else {
panic!("function `{fnname}` not found in the disassembly")
};
//eprintln!(" function: {:?}", function);
// Trim any filler instructions.
let mut instrs = &function.instrs[..];
while instrs.last().is_some_and(|s| s == "nop" || s == "int3") {
instrs = &instrs[..instrs.len() - 1];
@ -84,12 +81,26 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
// 2. It is a mark, indicating that the instruction will be
// compiled into other instructions - mainly because of llvm
// optimization.
let expected = if expected == "unknown" {
"<unknown>" // Workaround for rust-lang/stdarch#1674, todo: remove when the issue is fixed
} else {
expected
let expected = match expected {
// `<unknown>` is what LLVM will generate for unknown instructions. We use this to fail
// loudly when LLVM does start supporting these instructions.
//
// This was introduced in https://github.com/rust-lang/stdarch/pull/1674 to work around the
// RISC-V P extension not yet being supported.
"unknown" => "<unknown>",
_ => expected,
};
let found = expected == "nop" || instrs.iter().any(|s| s.starts_with(expected));
// Check whether the given instruction is part of the disassemblied body.
let found = expected == "nop"
|| instrs.iter().any(|instruction| {
instruction.starts_with(expected)
// Check that the next character is non-alphanumeric. This prevents false negatives
// when e.g. `fminnm` was used but `fmin` was expected.
//
// TODO: resolve the conflicts (x86_64 and aarch64 have a bunch, probably others)
// && !instruction[expected.len()..].starts_with(|c: char| c.is_ascii_alphanumeric())
});
// Look for subroutine call instructions in the disassembly to detect whether
// inlining failed: all intrinsics are `#[inline(always)]`, so calling one

View file

@ -558,7 +558,8 @@ fn search(pos: &Pos, alpha: i32, beta: i32, depth: i32, _ply: i32) -> i32 {
assert_ne!(bm, MOVE_NONE);
assert!(bs >= -EVAL_INF && bs <= EVAL_INF);
if _ply == 0 { bm } else { bs } //best move at the root node, best score elsewhere
// best move at the root node, best score elsewhere
if _ply == 0 { bm } else { bs }
}
/// Evaluation function: give different scores to different patterns after a fixed depth.
@ -570,15 +571,11 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if check_x86_avx512_features() {
unsafe {
if check_patternlive4_avx512(pos, def) {
return -4096;
}
}
} else {
if check_patternlive4(pos, def) {
if unsafe { check_patternlive4_avx512(pos, def) } {
return -4096;
}
} else if check_patternlive4(pos, def) {
return -4096;
}
}
@ -593,15 +590,11 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if check_x86_avx512_features() {
unsafe {
if check_patternlive4_avx512(pos, atk) {
return 2560;
}
}
} else {
if check_patternlive4(pos, atk) {
if unsafe { check_patternlive4_avx512(pos, atk) } {
return 2560;
}
} else if check_patternlive4(pos, atk) {
return 2560;
}
}
@ -616,15 +609,11 @@ fn eval(pos: &Pos, _ply: i32) -> i32 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if check_x86_avx512_features() {
unsafe {
if check_patterndead4_avx512(pos, atk) > 0 {
return 2560;
}
}
} else {
if check_patterndead4(pos, atk) > 0 {
if unsafe { check_patterndead4_avx512(pos, atk) > 0 } {
return 2560;
}
} else if check_patterndead4(pos, atk) > 0 {
return 2560;
}
}
@ -909,9 +898,7 @@ fn pos_is_winner_avx512(pos: &Pos) -> bool {
0b00_10_10_10_10_11_10_10_10_10_11_11_11_11_11_10];
let mut count_match: i32 = 0;
for dir in 0..2 {
// direction 0 and 1
let mut board0 = board0org[dir];
for mut board0 in board0org {
let boardf = _mm512_and_si512(answer, board0);
let temp_mask = _mm512_mask_cmpeq_epi16_mask(answer_mask[0], answer, boardf);
count_match += _popcnt32(temp_mask as i32);

View file

@ -1,7 +1,7 @@
[assign]
[assign.owners]
"*" = ["@Amanieu"]
"*" = ["@Amanieu", "@folkertdev", "@sayantn"]
[ping.windows]
message = """\