Update the intrinsic checker tool (#1258)

This commit is contained in:
Amanieu d'Antras 2021-12-04 13:03:30 +00:00 committed by GitHub
parent 972030f2b2
commit 937978eeef
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 1204 additions and 794 deletions

View file

@ -116,7 +116,6 @@ jobs:
os: ubuntu-latest
- target: armv7-unknown-linux-gnueabihf
os: ubuntu-latest
rustflags: -C target-feature=+neon
- target: mips-unknown-linux-gnu
os: ubuntu-latest
norun: true

View file

@ -1,4 +1,4 @@
FROM ubuntu:20.04
FROM ubuntu:21.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
qemu-user \
make \
file \
clang-12 \
clang-13 \
lld
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \

View file

@ -1,13 +1,17 @@
FROM ubuntu:18.04
FROM ubuntu:21.10
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
ca-certificates \
libc6-dev \
gcc-arm-linux-gnueabihf \
g++-arm-linux-gnueabihf \
libc6-dev-armhf-cross \
qemu-user \
make \
file
file \
clang-13 \
lld
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
OBJDUMP=arm-linux-gnueabihf-objdump

View file

@ -25,7 +25,7 @@ run() {
--env NORUN \
--env RUSTFLAGS \
--env STDARCH_TEST_NORUN \
--volume "$(dirname "$(dirname "$(command -v cargo)")")":/cargo \
--volume "${HOME}/.cargo":/cargo \
--volume "$(rustc --print sysroot)":/rust:ro \
--volume "$(pwd)":/checkout:ro \
--volume "$(pwd)"/target:/checkout/target \

View file

@ -37,6 +37,13 @@ case ${TARGET} in
mips-* | mipsel-*)
export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
;;
# Some of our test dependencies use the deprecated `gcc` crates which is
# missing a fix from https://github.com/alexcrichton/cc-rs/pull/627. Apply
# the workaround manually here.
armv7-*eabihf | thumbv7-*eabihf)
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
export TARGET_CFLAGS="-mfpu=vfpv3-d16"
;;
esac
echo "RUSTFLAGS=${RUSTFLAGS}"
@ -122,7 +129,10 @@ esac
if [ "${TARGET}" = "aarch64-unknown-linux-gnu" ]; then
export CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
cargo run ${INTRINSIC_TEST} --release --bin intrinsic-test -- crates/intrinsic-test/acle/tools/intrinsic_db/advsimd.csv --runner "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" --cppcompiler "clang++-12" --skip crates/intrinsic-test/missing.txt
RUST_LOG=warn cargo run ${INTRINSIC_TEST} --release --bin intrinsic-test -- crates/intrinsic-test/acle/tools/intrinsic_db/advsimd.csv --runner "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" --cppcompiler "clang++-13" --skip crates/intrinsic-test/missing_aarch64.txt
elif [ "${TARGET}" = "armv7-unknown-linux-gnueabihf" ]; then
export CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
RUST_LOG=warn cargo run ${INTRINSIC_TEST} --release --bin intrinsic-test -- crates/intrinsic-test/acle/tools/intrinsic_db/advsimd.csv --runner "${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}" --cppcompiler "clang++-13" --skip crates/intrinsic-test/missing_arm.txt --a32
fi
if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then

View file

@ -4455,7 +4455,7 @@ pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(neg))]
pub unsafe fn vnegd_s64(a: i64) -> i64 {
-a
a.wrapping_neg()
}
/// Negate
@ -5213,7 +5213,7 @@ pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64")]
fn vld2q_s64_(ptr: *const int64x2_t) -> int64x2x2_t;
}
vld2q_s64_(a.cast())
vld2q_s64_(a as _)
}
/// Load multiple 2-element structures to two registers
@ -5242,7 +5242,7 @@ pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64")]
fn vld2_f64_(ptr: *const float64x1_t) -> float64x1x2_t;
}
vld2_f64_(a.cast())
vld2_f64_(a as _)
}
/// Load multiple 2-element structures to two registers
@ -5255,7 +5255,7 @@ pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64")]
fn vld2q_f64_(ptr: *const float64x2_t) -> float64x2x2_t;
}
vld2q_f64_(a.cast())
vld2q_f64_(a as _)
}
/// Load single 2-element structure and replicate to all lanes of two registers
@ -5268,7 +5268,7 @@ pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64")]
fn vld2q_dup_s64_(ptr: *const i64) -> int64x2x2_t;
}
vld2q_dup_s64_(a.cast())
vld2q_dup_s64_(a as _)
}
/// Load single 2-element structure and replicate to all lanes of two registers
@ -5297,7 +5297,7 @@ pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64")]
fn vld2_dup_f64_(ptr: *const f64) -> float64x1x2_t;
}
vld2_dup_f64_(a.cast())
vld2_dup_f64_(a as _)
}
/// Load single 2-element structure and replicate to all lanes of two registers
@ -5310,7 +5310,7 @@ pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64")]
fn vld2q_dup_f64_(ptr: *const f64) -> float64x2x2_t;
}
vld2q_dup_f64_(a.cast())
vld2q_dup_f64_(a as _)
}
/// Load multiple 2-element structures to two registers
@ -5325,7 +5325,7 @@ pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> in
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8")]
fn vld2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *const i8) -> int8x16x2_t;
}
vld2q_lane_s8_(b.0, b.1, LANE as i64, a.cast())
vld2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
}
/// Load multiple 2-element structures to two registers
@ -5340,7 +5340,7 @@ pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> i
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8")]
fn vld2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *const i8) -> int64x1x2_t;
}
vld2_lane_s64_(b.0, b.1, LANE as i64, a.cast())
vld2_lane_s64_(b.0, b.1, LANE as i64, a as _)
}
/// Load multiple 2-element structures to two registers
@ -5355,7 +5355,7 @@ pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) ->
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8")]
fn vld2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *const i8) -> int64x2x2_t;
}
vld2q_lane_s64_(b.0, b.1, LANE as i64, a.cast())
vld2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
}
/// Load multiple 2-element structures to two registers
@ -5430,7 +5430,7 @@ pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) ->
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8")]
fn vld2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *const i8) -> float64x1x2_t;
}
vld2_lane_f64_(b.0, b.1, LANE as i64, a.cast())
vld2_lane_f64_(b.0, b.1, LANE as i64, a as _)
}
/// Load multiple 2-element structures to two registers
@ -5445,7 +5445,7 @@ pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8")]
fn vld2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *const i8) -> float64x2x2_t;
}
vld2q_lane_f64_(b.0, b.1, LANE as i64, a.cast())
vld2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
}
/// Load multiple 3-element structures to three registers
@ -5458,7 +5458,7 @@ pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64")]
fn vld3q_s64_(ptr: *const int64x2_t) -> int64x2x3_t;
}
vld3q_s64_(a.cast())
vld3q_s64_(a as _)
}
/// Load multiple 3-element structures to three registers
@ -5487,7 +5487,7 @@ pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64")]
fn vld3_f64_(ptr: *const float64x1_t) -> float64x1x3_t;
}
vld3_f64_(a.cast())
vld3_f64_(a as _)
}
/// Load multiple 3-element structures to three registers
@ -5500,7 +5500,7 @@ pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64")]
fn vld3q_f64_(ptr: *const float64x2_t) -> float64x2x3_t;
}
vld3q_f64_(a.cast())
vld3q_f64_(a as _)
}
/// Load single 3-element structure and replicate to all lanes of three registers
@ -5513,7 +5513,7 @@ pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64")]
fn vld3q_dup_s64_(ptr: *const i64) -> int64x2x3_t;
}
vld3q_dup_s64_(a.cast())
vld3q_dup_s64_(a as _)
}
/// Load single 3-element structure and replicate to all lanes of three registers
@ -5542,7 +5542,7 @@ pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64")]
fn vld3_dup_f64_(ptr: *const f64) -> float64x1x3_t;
}
vld3_dup_f64_(a.cast())
vld3_dup_f64_(a as _)
}
/// Load single 3-element structure and replicate to all lanes of three registers
@ -5555,7 +5555,7 @@ pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64")]
fn vld3q_dup_f64_(ptr: *const f64) -> float64x2x3_t;
}
vld3q_dup_f64_(a.cast())
vld3q_dup_f64_(a as _)
}
/// Load multiple 3-element structures to two registers
@ -5570,7 +5570,7 @@ pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> in
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8")]
fn vld3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *const i8) -> int8x16x3_t;
}
vld3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a.cast())
vld3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Load multiple 3-element structures to two registers
@ -5585,7 +5585,7 @@ pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> i
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8")]
fn vld3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *const i8) -> int64x1x3_t;
}
vld3_lane_s64_(b.0, b.1, b.2, LANE as i64, a.cast())
vld3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Load multiple 3-element structures to two registers
@ -5600,7 +5600,7 @@ pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) ->
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8")]
fn vld3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *const i8) -> int64x2x3_t;
}
vld3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a.cast())
vld3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Load multiple 3-element structures to three registers
@ -5675,7 +5675,7 @@ pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) ->
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8")]
fn vld3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *const i8) -> float64x1x3_t;
}
vld3_lane_f64_(b.0, b.1, b.2, LANE as i64, a.cast())
vld3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Load multiple 3-element structures to three registers
@ -5690,7 +5690,7 @@ pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8")]
fn vld3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *const i8) -> float64x2x3_t;
}
vld3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a.cast())
vld3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Load multiple 4-element structures to four registers
@ -5703,7 +5703,7 @@ pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64")]
fn vld4q_s64_(ptr: *const int64x2_t) -> int64x2x4_t;
}
vld4q_s64_(a.cast())
vld4q_s64_(a as _)
}
/// Load multiple 4-element structures to four registers
@ -5732,7 +5732,7 @@ pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64")]
fn vld4_f64_(ptr: *const float64x1_t) -> float64x1x4_t;
}
vld4_f64_(a.cast())
vld4_f64_(a as _)
}
/// Load multiple 4-element structures to four registers
@ -5745,7 +5745,7 @@ pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64")]
fn vld4q_f64_(ptr: *const float64x2_t) -> float64x2x4_t;
}
vld4q_f64_(a.cast())
vld4q_f64_(a as _)
}
/// Load single 4-element structure and replicate to all lanes of four registers
@ -5758,7 +5758,7 @@ pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64")]
fn vld4q_dup_s64_(ptr: *const i64) -> int64x2x4_t;
}
vld4q_dup_s64_(a.cast())
vld4q_dup_s64_(a as _)
}
/// Load single 4-element structure and replicate to all lanes of four registers
@ -5787,7 +5787,7 @@ pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64")]
fn vld4_dup_f64_(ptr: *const f64) -> float64x1x4_t;
}
vld4_dup_f64_(a.cast())
vld4_dup_f64_(a as _)
}
/// Load single 4-element structure and replicate to all lanes of four registers
@ -5800,7 +5800,7 @@ pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64")]
fn vld4q_dup_f64_(ptr: *const f64) -> float64x2x4_t;
}
vld4q_dup_f64_(a.cast())
vld4q_dup_f64_(a as _)
}
/// Load multiple 4-element structures to four registers
@ -5815,7 +5815,7 @@ pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> in
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8")]
fn vld4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *const i8) -> int8x16x4_t;
}
vld4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vld4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Load multiple 4-element structures to four registers
@ -5830,7 +5830,7 @@ pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> i
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8")]
fn vld4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *const i8) -> int64x1x4_t;
}
vld4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vld4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Load multiple 4-element structures to four registers
@ -5845,7 +5845,7 @@ pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) ->
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8")]
fn vld4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *const i8) -> int64x2x4_t;
}
vld4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vld4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Load multiple 4-element structures to four registers
@ -5920,7 +5920,7 @@ pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) ->
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8")]
fn vld4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *const i8) -> float64x1x4_t;
}
vld4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vld4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Load multiple 4-element structures to four registers
@ -5935,7 +5935,7 @@ pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8")]
fn vld4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *const i8) -> float64x2x4_t;
}
vld4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vld4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Store multiple single-element structures from one, two, three, or four registers
@ -6046,7 +6046,7 @@ pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2i64.p0i8")]
fn vst2q_s64_(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
}
vst2q_s64_(b.0, b.1, a.cast())
vst2q_s64_(b.0, b.1, a as _)
}
/// Store multiple 2-element structures from two registers
@ -6075,7 +6075,7 @@ pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v1f64.p0i8")]
fn vst2_f64_(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
}
vst2_f64_(b.0, b.1, a.cast())
vst2_f64_(b.0, b.1, a as _)
}
/// Store multiple 2-element structures from two registers
@ -6088,7 +6088,7 @@ pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2f64.p0i8")]
fn vst2q_f64_(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
}
vst2q_f64_(b.0, b.1, a.cast())
vst2q_f64_(b.0, b.1, a as _)
}
/// Store multiple 2-element structures from two registers
@ -6103,7 +6103,7 @@ pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8")]
fn vst2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8);
}
vst2q_lane_s8_(b.0, b.1, LANE as i64, a.cast())
vst2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
}
/// Store multiple 2-element structures from two registers
@ -6118,7 +6118,7 @@ pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8")]
fn vst2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8);
}
vst2_lane_s64_(b.0, b.1, LANE as i64, a.cast())
vst2_lane_s64_(b.0, b.1, LANE as i64, a as _)
}
/// Store multiple 2-element structures from two registers
@ -6133,7 +6133,7 @@ pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8")]
fn vst2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8);
}
vst2q_lane_s64_(b.0, b.1, LANE as i64, a.cast())
vst2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
}
/// Store multiple 2-element structures from two registers
@ -6208,7 +6208,7 @@ pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8")]
fn vst2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8);
}
vst2_lane_f64_(b.0, b.1, LANE as i64, a.cast())
vst2_lane_f64_(b.0, b.1, LANE as i64, a as _)
}
/// Store multiple 2-element structures from two registers
@ -6223,7 +6223,7 @@ pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8")]
fn vst2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8);
}
vst2q_lane_f64_(b.0, b.1, LANE as i64, a.cast())
vst2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
}
/// Store multiple 3-element structures from three registers
@ -6236,7 +6236,7 @@ pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2i64.p0i8")]
fn vst3q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
}
vst3q_s64_(b.0, b.1, b.2, a.cast())
vst3q_s64_(b.0, b.1, b.2, a as _)
}
/// Store multiple 3-element structures from three registers
@ -6265,7 +6265,7 @@ pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v1f64.p0i8")]
fn vst3_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
}
vst3_f64_(b.0, b.1, b.2, a.cast())
vst3_f64_(b.0, b.1, b.2, a as _)
}
/// Store multiple 3-element structures from three registers
@ -6278,7 +6278,7 @@ pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2f64.p0i8")]
fn vst3q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
}
vst3q_f64_(b.0, b.1, b.2, a.cast())
vst3q_f64_(b.0, b.1, b.2, a as _)
}
/// Store multiple 3-element structures from three registers
@ -6293,7 +6293,7 @@ pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8")]
fn vst3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8);
}
vst3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a.cast())
vst3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Store multiple 3-element structures from three registers
@ -6308,7 +6308,7 @@ pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8")]
fn vst3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8);
}
vst3_lane_s64_(b.0, b.1, b.2, LANE as i64, a.cast())
vst3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Store multiple 3-element structures from three registers
@ -6323,7 +6323,7 @@ pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8")]
fn vst3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8);
}
vst3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a.cast())
vst3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Store multiple 3-element structures from three registers
@ -6398,7 +6398,7 @@ pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8")]
fn vst3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8);
}
vst3_lane_f64_(b.0, b.1, b.2, LANE as i64, a.cast())
vst3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Store multiple 3-element structures from three registers
@ -6413,7 +6413,7 @@ pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8")]
fn vst3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8);
}
vst3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a.cast())
vst3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
}
/// Store multiple 4-element structures from four registers
@ -6426,7 +6426,7 @@ pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2i64.p0i8")]
fn vst4q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
}
vst4q_s64_(b.0, b.1, b.2, b.3, a.cast())
vst4q_s64_(b.0, b.1, b.2, b.3, a as _)
}
/// Store multiple 4-element structures from four registers
@ -6455,7 +6455,7 @@ pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v1f64.p0i8")]
fn vst4_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
}
vst4_f64_(b.0, b.1, b.2, b.3, a.cast())
vst4_f64_(b.0, b.1, b.2, b.3, a as _)
}
/// Store multiple 4-element structures from four registers
@ -6468,7 +6468,7 @@ pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2f64.p0i8")]
fn vst4q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
}
vst4q_f64_(b.0, b.1, b.2, b.3, a.cast())
vst4q_f64_(b.0, b.1, b.2, b.3, a as _)
}
/// Store multiple 4-element structures from four registers
@ -6483,7 +6483,7 @@ pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8")]
fn vst4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *mut i8);
}
vst4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vst4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Store multiple 4-element structures from four registers
@ -6498,7 +6498,7 @@ pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8")]
fn vst4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *mut i8);
}
vst4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vst4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Store multiple 4-element structures from four registers
@ -6513,7 +6513,7 @@ pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8")]
fn vst4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *mut i8);
}
vst4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vst4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Store multiple 4-element structures from four registers
@ -6588,7 +6588,7 @@ pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8")]
fn vst4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *mut i8);
}
vst4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vst4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Store multiple 4-element structures from four registers
@ -6603,7 +6603,7 @@ pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8")]
fn vst4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *mut i8);
}
vst4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
vst4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
}
/// Multiply
@ -7512,7 +7512,7 @@ pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
a - b
a.wrapping_sub(b)
}
/// Subtract
@ -7520,7 +7520,7 @@ pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
a - b
a.wrapping_sub(b)
}
/// Add
@ -7528,7 +7528,7 @@ pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
a + b
a.wrapping_add(b)
}
/// Add
@ -7536,7 +7536,7 @@ pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
a + b
a.wrapping_add(b)
}
/// Floating-point add across vector
@ -11536,7 +11536,7 @@ pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> u
pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
static_assert!(N : i32 where N >= 1 && N <= 64);
let b: i64 = vrshrd_n_s64::<N>(b);
a + b
a.wrapping_add(b)
}
/// Ungisned rounding shift right and accumulate.
@ -11547,7 +11547,7 @@ pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
static_assert!(N : i32 where N >= 1 && N <= 64);
let b: u64 = vrshrd_n_u64::<N>(b);
a + b
a.wrapping_add(b)
}
/// Rounding subtract returning high narrow
@ -17802,7 +17802,7 @@ mod test {
let a: [f64; 2] = [0., 1.];
let e: [f64; 1] = [1.];
let mut r: [f64; 1] = [0f64; 1];
vst1_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst1_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17811,7 +17811,7 @@ mod test {
let a: [f64; 3] = [0., 1., 2.];
let e: [f64; 2] = [1., 0.];
let mut r: [f64; 2] = [0f64; 2];
vst1q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst1q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17820,7 +17820,7 @@ mod test {
let a: [f64; 3] = [0., 1., 2.];
let e: [f64; 2] = [1., 2.];
let mut r: [f64; 2] = [0f64; 2];
vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17829,7 +17829,7 @@ mod test {
let a: [f64; 5] = [0., 1., 2., 3., 4.];
let e: [f64; 4] = [1., 2., 3., 4.];
let mut r: [f64; 4] = [0f64; 4];
vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17838,7 +17838,7 @@ mod test {
let a: [f64; 4] = [0., 1., 2., 3.];
let e: [f64; 3] = [1., 2., 3.];
let mut r: [f64; 3] = [0f64; 3];
vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17847,7 +17847,7 @@ mod test {
let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
let mut r: [f64; 6] = [0f64; 6];
vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17856,7 +17856,7 @@ mod test {
let a: [f64; 5] = [0., 1., 2., 3., 4.];
let e: [f64; 4] = [1., 2., 3., 4.];
let mut r: [f64; 4] = [0f64; 4];
vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17865,7 +17865,7 @@ mod test {
let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
let mut r: [f64; 8] = [0f64; 8];
vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17874,7 +17874,7 @@ mod test {
let a: [i64; 5] = [0, 1, 2, 2, 3];
let e: [i64; 4] = [1, 2, 2, 3];
let mut r: [i64; 4] = [0i64; 4];
vst2q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17883,7 +17883,7 @@ mod test {
let a: [u64; 5] = [0, 1, 2, 2, 3];
let e: [u64; 4] = [1, 2, 2, 3];
let mut r: [u64; 4] = [0u64; 4];
vst2q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17892,7 +17892,7 @@ mod test {
let a: [u64; 5] = [0, 1, 2, 2, 3];
let e: [u64; 4] = [1, 2, 2, 3];
let mut r: [u64; 4] = [0u64; 4];
vst2q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17901,7 +17901,7 @@ mod test {
let a: [f64; 3] = [0., 1., 2.];
let e: [f64; 2] = [1., 2.];
let mut r: [f64; 2] = [0f64; 2];
vst2_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17910,7 +17910,7 @@ mod test {
let a: [f64; 5] = [0., 1., 2., 2., 3.];
let e: [f64; 4] = [1., 2., 2., 3.];
let mut r: [f64; 4] = [0f64; 4];
vst2q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17919,7 +17919,7 @@ mod test {
let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
let e: [i8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let mut r: [i8; 32] = [0i8; 32];
vst2q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17928,7 +17928,7 @@ mod test {
let a: [i64; 3] = [0, 1, 2];
let e: [i64; 2] = [1, 2];
let mut r: [i64; 2] = [0i64; 2];
vst2_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17937,7 +17937,7 @@ mod test {
let a: [i64; 5] = [0, 1, 2, 2, 3];
let e: [i64; 4] = [1, 2, 0, 0];
let mut r: [i64; 4] = [0i64; 4];
vst2q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17946,7 +17946,7 @@ mod test {
let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let mut r: [u8; 32] = [0u8; 32];
vst2q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17955,7 +17955,7 @@ mod test {
let a: [u64; 3] = [0, 1, 2];
let e: [u64; 2] = [1, 2];
let mut r: [u64; 2] = [0u64; 2];
vst2_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17964,7 +17964,7 @@ mod test {
let a: [u64; 5] = [0, 1, 2, 2, 3];
let e: [u64; 4] = [1, 2, 0, 0];
let mut r: [u64; 4] = [0u64; 4];
vst2q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17973,7 +17973,7 @@ mod test {
let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let mut r: [u8; 32] = [0u8; 32];
vst2q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17982,7 +17982,7 @@ mod test {
let a: [u64; 3] = [0, 1, 2];
let e: [u64; 2] = [1, 2];
let mut r: [u64; 2] = [0u64; 2];
vst2_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -17991,7 +17991,7 @@ mod test {
let a: [u64; 5] = [0, 1, 2, 2, 3];
let e: [u64; 4] = [1, 2, 0, 0];
let mut r: [u64; 4] = [0u64; 4];
vst2q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18000,7 +18000,7 @@ mod test {
let a: [f64; 3] = [0., 1., 2.];
let e: [f64; 2] = [1., 2.];
let mut r: [f64; 2] = [0f64; 2];
vst2_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18009,7 +18009,7 @@ mod test {
let a: [f64; 5] = [0., 1., 2., 2., 3.];
let e: [f64; 4] = [1., 2., 0., 0.];
let mut r: [f64; 4] = [0f64; 4];
vst2q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst2q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18018,7 +18018,7 @@ mod test {
let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
let e: [i64; 6] = [1, 2, 2, 2, 4, 4];
let mut r: [i64; 6] = [0i64; 6];
vst3q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18027,7 +18027,7 @@ mod test {
let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
let mut r: [u64; 6] = [0u64; 6];
vst3q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18036,7 +18036,7 @@ mod test {
let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
let mut r: [u64; 6] = [0u64; 6];
vst3q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18045,7 +18045,7 @@ mod test {
let a: [f64; 4] = [0., 1., 2., 2.];
let e: [f64; 3] = [1., 2., 2.];
let mut r: [f64; 3] = [0f64; 3];
vst3_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18054,7 +18054,7 @@ mod test {
let a: [f64; 7] = [0., 1., 2., 2., 4., 2., 4.];
let e: [f64; 6] = [1., 2., 2., 2., 4., 4.];
let mut r: [f64; 6] = [0f64; 6];
vst3q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18063,7 +18063,7 @@ mod test {
let a: [i8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
let e: [i8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let mut r: [i8; 48] = [0i8; 48];
vst3q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18072,7 +18072,7 @@ mod test {
let a: [i64; 4] = [0, 1, 2, 2];
let e: [i64; 3] = [1, 2, 2];
let mut r: [i64; 3] = [0i64; 3];
vst3_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18081,7 +18081,7 @@ mod test {
let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
let e: [i64; 6] = [1, 2, 2, 0, 0, 0];
let mut r: [i64; 6] = [0i64; 6];
vst3q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18090,7 +18090,7 @@ mod test {
let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let mut r: [u8; 48] = [0u8; 48];
vst3q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18099,7 +18099,7 @@ mod test {
let a: [u64; 4] = [0, 1, 2, 2];
let e: [u64; 3] = [1, 2, 2];
let mut r: [u64; 3] = [0u64; 3];
vst3_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18108,7 +18108,7 @@ mod test {
let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
let mut r: [u64; 6] = [0u64; 6];
vst3q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18117,7 +18117,7 @@ mod test {
let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let mut r: [u8; 48] = [0u8; 48];
vst3q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18126,7 +18126,7 @@ mod test {
let a: [u64; 4] = [0, 1, 2, 2];
let e: [u64; 3] = [1, 2, 2];
let mut r: [u64; 3] = [0u64; 3];
vst3_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18135,7 +18135,7 @@ mod test {
let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
let mut r: [u64; 6] = [0u64; 6];
vst3q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18144,7 +18144,7 @@ mod test {
let a: [f64; 4] = [0., 1., 2., 2.];
let e: [f64; 3] = [1., 2., 2.];
let mut r: [f64; 3] = [0f64; 3];
vst3_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18153,7 +18153,7 @@ mod test {
let a: [f64; 7] = [0., 1., 2., 2., 3., 2., 3.];
let e: [f64; 6] = [1., 2., 2., 0., 0., 0.];
let mut r: [f64; 6] = [0f64; 6];
vst3q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst3q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18162,7 +18162,7 @@ mod test {
let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
let e: [i64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
let mut r: [i64; 8] = [0i64; 8];
vst4q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18171,7 +18171,7 @@ mod test {
let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
let mut r: [u64; 8] = [0u64; 8];
vst4q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18180,7 +18180,7 @@ mod test {
let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
let mut r: [u64; 8] = [0u64; 8];
vst4q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18189,7 +18189,7 @@ mod test {
let a: [f64; 5] = [0., 1., 2., 2., 6.];
let e: [f64; 4] = [1., 2., 2., 6.];
let mut r: [f64; 4] = [0f64; 4];
vst4_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18198,7 +18198,7 @@ mod test {
let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
let e: [f64; 8] = [1., 2., 2., 6., 2., 6., 6., 8.];
let mut r: [f64; 8] = [0f64; 8];
vst4q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18207,7 +18207,7 @@ mod test {
let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
let e: [i8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let mut r: [i8; 64] = [0i8; 64];
vst4q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18216,7 +18216,7 @@ mod test {
let a: [i64; 5] = [0, 1, 2, 2, 6];
let e: [i64; 4] = [1, 2, 2, 6];
let mut r: [i64; 4] = [0i64; 4];
vst4_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18225,7 +18225,7 @@ mod test {
let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
let e: [i64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
let mut r: [i64; 8] = [0i64; 8];
vst4q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18234,7 +18234,7 @@ mod test {
let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let mut r: [u8; 64] = [0u8; 64];
vst4q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18243,7 +18243,7 @@ mod test {
let a: [u64; 5] = [0, 1, 2, 2, 6];
let e: [u64; 4] = [1, 2, 2, 6];
let mut r: [u64; 4] = [0u64; 4];
vst4_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18252,7 +18252,7 @@ mod test {
let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
let mut r: [u64; 8] = [0u64; 8];
vst4q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18261,7 +18261,7 @@ mod test {
let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let mut r: [u8; 64] = [0u8; 64];
vst4q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18270,7 +18270,7 @@ mod test {
let a: [u64; 5] = [0, 1, 2, 2, 6];
let e: [u64; 4] = [1, 2, 2, 6];
let mut r: [u64; 4] = [0u64; 4];
vst4_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18279,7 +18279,7 @@ mod test {
let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
let mut r: [u64; 8] = [0u64; 8];
vst4q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18288,7 +18288,7 @@ mod test {
let a: [f64; 5] = [0., 1., 2., 2., 6.];
let e: [f64; 4] = [1., 2., 2., 6.];
let mut r: [f64; 4] = [0f64; 4];
vst4_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}
@ -18297,7 +18297,7 @@ mod test {
let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
let e: [f64; 8] = [1., 2., 2., 6., 0., 0., 0., 0.];
let mut r: [f64; 8] = [0f64; 8];
vst4q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
vst4q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
}

View file

@ -2814,7 +2814,7 @@ pub unsafe fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
static_assert!(N : i32 where N >= 1 && N <= 64);
a + vshrd_n_s64::<N>(b)
a.wrapping_add(vshrd_n_s64::<N>(b))
}
/// Unsigned shift right and accumulate
@ -2824,7 +2824,7 @@ pub unsafe fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
static_assert!(N : i32 where N >= 1 && N <= 64);
a + vshrd_n_u64::<N>(b)
a.wrapping_add(vshrd_n_u64::<N>(b))
}
/// Shift Left and Insert (immediate)

View file

@ -6,8 +6,7 @@ mod generated;
pub use self::generated::*;
use crate::{
convert::TryInto, core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked,
mem::transmute,
core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute,
};
#[cfg(test)]
use stdarch_test::assert_instr;

View file

@ -10,7 +10,7 @@ macro_rules! simd_ty {
#[allow(clippy::use_self)]
impl $id {
#[inline]
#[inline(always)]
pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self {
$id($($elem_name),*)
}
@ -43,12 +43,12 @@ macro_rules! simd_m_ty {
#[allow(clippy::use_self)]
impl $id {
#[inline]
#[inline(always)]
const fn bool_to_internal(x: bool) -> $ety {
[0 as $ety, !(0 as $ety)][x as usize]
}
#[inline]
#[inline(always)]
pub(crate) const fn new($($elem_name: bool),*) -> Self {
$id($(Self::bool_to_internal($elem_name)),*)
}

View file

@ -1,115 +0,0 @@
vmmlaq_s32
vmmlaq_u32
vrnd32x_f64
vrnd32xq_f64
vrnd32z_f64
vrnd32zq_f64
vrnd64x_f64
vrnd64z_f64
vrnd64zq_f64
vsm3partw1q_u32
vsm3partw2q_u32
vsm3tt1bq_u32
vsm3tt2aq_u32
vsm3tt2bq_u32
vsm4ekeyq_u32
vsm4eq_u32
vsudot_lane_s32
vsudot_laneq_s32
vsudotq_lane_s32
vsudotq_laneq_s32
vusdot_lane_s32
vusdot_laneq_s32
vusdot_s32
vusdotq_lane_s32
vusdotq_laneq_s32
vusdotq_s32
vcls_u16
vcls_u32
vcls_u8
vclsq_u16
vclsq_u32
vclsq_u8
vcreate_s16
vcreate_u16
vpaddq_s64
vpaddq_u64
vqshlu_n_s16
vqshlu_n_s32
vqshlu_n_s64
vqshlu_n_s8
vqshlub_n_s8
vqshlud_n_s64
vqshluh_n_s16
vqshluq_n_s16
vqshluq_n_s32
vqshluq_n_s64
vqshluq_n_s8
vqshlus_n_s32
vrax1q_u64
vreinterpretq_p128_f32
vreinterpretq_p128_f64
vreinterpretq_p128_p16
vreinterpretq_p128_p8
vreinterpretq_p128_s16
vreinterpretq_p128_s32
vreinterpretq_p128_s64
vreinterpretq_p128_s8
vreinterpretq_p128_u16
vreinterpretq_p128_u32
vreinterpretq_p128_u64
vreinterpretq_p128_u8
vrnd32x_f32
vrnd32xq_f32
vrnd32z_f32
vrnd32zq_f32
vrnd64x_f32
vrnd64xq_f32
vrnd64xq_f64
vrnd64z_f32
vrnd64zq_f32
vsha512h2q_u64
vsha512hq_u64
vsha512su0q_u64
vsha512su1q_u64
vslid_n_s64
vslid_n_u64
vsm3ss1q_u32
vsm3tt1aq_u32
vsrid_n_s64
vsrid_n_u64
vusmmlaq_s32
vxarq_u64
vadd_p16
vadd_p64
vadd_p8
vaddq_p16
vaddq_p64
vaddq_p8
vbcaxq_s16
vbcaxq_s32
vbcaxq_s64
vbcaxq_s8
vbcaxq_u16
vbcaxq_u32
vbcaxq_u64
vbcaxq_u8
veor3q_s16
veor3q_s32
veor3q_s64
veor3q_s8
veor3q_u16
veor3q_u32
veor3q_u64
veor3q_u8
vshld_s64
vshld_u64
vcopyq_laneq_u8
vcopyq_laneq_s8
vcopyq_laneq_p8
vcopyq_lane_u8
vcopyq_lane_s8
vcopyq_lane_p8
vcopy_laneq_u8
vcopy_laneq_s8
vcopy_laneq_p8

View file

@ -0,0 +1,133 @@
# Not implemented in stdarch yet
vbfdot_f32
vbfdot_lane_f32
vbfdot_laneq_f32
vbfdotq_f32
vbfdotq_lane_f32
vbfdotq_laneq_f32
vbfmlalbq_f32
vbfmlalbq_lane_f32
vbfmlalbq_laneq_f32
vbfmlaltq_f32
vbfmlaltq_lane_f32
vbfmlaltq_laneq_f32
vbfmmlaq_f32
vsudot_laneq_s32
vsudot_lane_s32
vsudotq_laneq_s32
vsudotq_lane_s32
vusdot_laneq_s32
vusdot_lane_s32
vusdotq_laneq_s32
vusdotq_lane_s32
vusdotq_s32
vusdot_s32
# Implemented in Clang but missing from CSV
vcmla_f64
vcmla_lane_f64
vcmla_laneq_f64
vcmlaq_lane_f64
vcmlaq_laneq_f64
vcmlaq_rot180_lane_f64
vcmlaq_rot180_laneq_f64
vcmlaq_rot270_lane_f64
vcmlaq_rot270_laneq_f64
vcmlaq_rot90_lane_f64
vcmlaq_rot90_laneq_f64
vcmla_rot180_f64
vcmla_rot180_lane_f64
vcmla_rot180_laneq_f64
vcmla_rot270_f64
vcmla_rot270_lane_f64
vcmla_rot270_laneq_f64
vcmla_rot90_f64
vcmla_rot90_lane_f64
vcmla_rot90_laneq_f64
# Implemented in Clang and stdarch but missing from CSV
vmov_n_p64
vmovq_n_p64
vreinterpret_f32_p64
vreinterpret_p64_s64
vreinterpretq_f32_p128
vreinterpretq_f32_p64
vreinterpretq_p128_p64
vreinterpretq_p64_p128
vtst_p16
vtstq_p16
# Missing from both Clang and stdarch
vrnd32x_f64
vrnd32xq_f64
vrnd32z_f64
vrnd32zq_f64
vrnd64x_f64
vrnd64xq_f64
vrnd64z_f64
vrnd64zq_f64
# Takes too long to compile tests
vcopyq_laneq_u8
vcopyq_laneq_s8
vcopyq_laneq_p8
vcopyq_lane_u8
vcopyq_lane_s8
vcopyq_lane_p8
vcopy_laneq_u8
vcopy_laneq_s8
vcopy_laneq_p8
vcopy_lane_u8
vcopy_lane_s8
vcopy_lane_p8
# QEMU 6.0 doesn't support these instructions
vmmlaq_s32
vmmlaq_u32
vsm3partw1q_u32
vsm3partw2q_u32
vsm3ss1q_u32
vsm3tt1aq_u32
vsm3tt1bq_u32
vsm3tt2aq_u32
vsm3tt2bq_u32
vsm4ekeyq_u32
vsm4eq_u32
vusmmlaq_s32
# LLVM select error in debug builds
vqshlu_n_s16
vqshlu_n_s32
vqshlu_n_s64
vqshlu_n_s8
vqshlub_n_s8
vqshlud_n_s64
vqshluh_n_s16
vqshluq_n_s16
vqshluq_n_s32
vqshluq_n_s64
vqshluq_n_s8
vqshlus_n_s32
# These tests produce a different result from C but only in debug builds of
# stdarch. This likely both a bug in stdarch (expanding to a different LLVM
# intrinsic) and a bug in LLVM (incorrect optimization changing the behavior of
# integer operations).
vqrdmlah_lane_s16
vqrdmlah_lane_s32
vqrdmlah_laneq_s16
vqrdmlah_laneq_s32
vqrdmlah_s16
vqrdmlah_s32
vqrdmlahh_lane_s16
vqrdmlahh_laneq_s16
vqrdmlahh_s16
vqrdmlahq_lane_s16
vqrdmlahq_lane_s32
vqrdmlahq_laneq_s16
vqrdmlahq_laneq_s32
vqrdmlahq_s16
vqrdmlahq_s32
vqrdmlahs_lane_s32
vqrdmlahs_laneq_s32
vqrdmlahs_s32

View file

@ -0,0 +1,334 @@
# Not implemented in stdarch yet
vbfdot_f32
vbfdot_lane_f32
vbfdot_laneq_f32
vbfdotq_f32
vbfdotq_lane_f32
vbfdotq_laneq_f32
vbfmlalbq_f32
vbfmlalbq_lane_f32
vbfmlalbq_laneq_f32
vbfmlaltq_f32
vbfmlaltq_lane_f32
vbfmlaltq_laneq_f32
vbfmmlaq_f32
vsudot_laneq_s32
vsudot_lane_s32
vsudotq_laneq_s32
vsudotq_lane_s32
vusdot_laneq_s32
vusdot_lane_s32
vusdotq_laneq_s32
vusdotq_lane_s32
vusdotq_s32
vusdot_s32
# Implemented in Clang and stdarch but missing from CSV
vtst_p16
vtstq_p16
# QEMU 6.0 doesn't support these instructions
vmmlaq_s32
vmmlaq_u32
vusmmlaq_s32
# Implemented in Clang and stdarch for A64 only even though CSV claims A32 support
__crc32d
__crc32cd
vaddq_p64
vbsl_p64
vbslq_p64
vceq_p64
vceqq_p64
vceqz_p64
vceqzq_p64
vcombine_p64
vcopy_lane_p64
vcopy_laneq_p64
vcopyq_lane_p64
vcopyq_laneq_p64
vcreate_p64
vdup_lane_p64
vdup_n_p64
vdupq_lane_p64
vdupq_n_p64
vext_p64
vextq_p64
vget_high_p64
vget_lane_p64
vget_low_p64
vgetq_lane_p64
vmovn_high_s16
vmovn_high_s32
vmovn_high_s64
vmovn_high_u16
vmovn_high_u32
vmovn_high_u64
vmull_high_p64
vmull_p64
vreinterpret_p16_p64
vreinterpret_p64_f32
vreinterpret_p64_p16
vreinterpret_p64_p8
vreinterpret_p64_s16
vreinterpret_p64_s32
vreinterpret_p64_s8
vreinterpret_p64_u16
vreinterpret_p64_u32
vreinterpret_p64_u64
vreinterpret_p64_u8
vreinterpret_p8_p64
vreinterpretq_f64_u64
vreinterpretq_p128_f32
vreinterpretq_p128_p16
vreinterpretq_p128_p8
vreinterpretq_p128_s16
vreinterpretq_p128_s32
vreinterpretq_p128_s64
vreinterpretq_p128_s8
vreinterpretq_p128_u16
vreinterpretq_p128_u32
vreinterpretq_p128_u64
vreinterpretq_p128_u8
vreinterpretq_p16_p64
vreinterpretq_p64_f32
vreinterpretq_p64_p16
vreinterpretq_p64_p8
vreinterpretq_p64_s16
vreinterpretq_p64_s32
vreinterpretq_p64_s64
vreinterpretq_p64_s8
vreinterpretq_p64_u16
vreinterpretq_p64_u32
vreinterpretq_p64_u64
vreinterpretq_p64_u8
vreinterpretq_p8_p64
vreinterpretq_s16_p64
vreinterpretq_s32_p64
vreinterpretq_s64_p64
vreinterpretq_s8_p64
vreinterpretq_u16_p64
vreinterpretq_u32_p64
vreinterpretq_u64_p64
vreinterpretq_u8_p64
vreinterpret_s16_p64
vreinterpret_s32_p64
vreinterpret_s64_p64
vreinterpret_s8_p64
vreinterpret_u16_p64
vreinterpret_u32_p64
vreinterpret_u64_p64
vreinterpret_u8_p64
vrndn_f64
vrndnq_f64
vset_lane_p64
vsetq_lane_p64
vsli_n_p64
vsliq_n_p64
vsri_n_p64
vsriq_n_p64
vtst_p64
vtstq_p64
# Present in Clang header but triggers an ICE due to lack of backend support.
vcmla_f32
vcmla_lane_f32
vcmla_laneq_f32
vcmla_rot180_f32
vcmla_rot180_lane_f32
vcmla_rot180_laneq_f32
vcmla_rot270_f32
vcmla_rot270_lane_f32
vcmla_rot270_laneq_f32
vcmla_rot90_f32
vcmla_rot90_lane_f32
vcmla_rot90_laneq_f32
vcmlaq_f32
vcmlaq_lane_f32
vcmlaq_laneq_f32
vcmlaq_rot180_f32
vcmlaq_rot180_lane_f32
vcmlaq_rot180_laneq_f32
vcmlaq_rot270_f32
vcmlaq_rot270_lane_f32
vcmlaq_rot270_laneq_f32
vcmlaq_rot90_f32
vcmlaq_rot90_lane_f32
vcmlaq_rot90_laneq_f32
# Implemented in stdarch for A64 only, Clang support both A32/A64
vadd_s64
vadd_u64
vcaddq_rot270_f32
vcaddq_rot90_f32
vcadd_rot270_f32
vcadd_rot90_f32
vcombine_f32
vcombine_p16
vcombine_p8
vcombine_s16
vcombine_s32
vcombine_s64
vcombine_s8
vcombine_u16
vcombine_u32
vcombine_u64
vcombine_u8
vcvtaq_s32_f32
vcvtaq_u32_f32
vcvta_s32_f32
vcvta_u32_f32
vcvtmq_s32_f32
vcvtmq_u32_f32
vcvtm_s32_f32
vcvtm_u32_f32
vcvtnq_s32_f32
vcvtnq_u32_f32
vcvtn_s32_f32
vcvtn_u32_f32
vcvtpq_s32_f32
vcvtpq_u32_f32
vcvtp_s32_f32
vcvtp_u32_f32
vdot_lane_s32
vdot_lane_u32
vdotq_lane_s32
vdotq_lane_u32
vdotq_s32
vdotq_u32
vdot_s32
vdot_u32
vqdmulh_lane_s16
vqdmulh_lane_s32
vqdmulhq_lane_s16
vqdmulhq_lane_s32
vrnda_f32
vrnda_f32
vrndaq_f32
vrndaq_f32
vrnd_f32
vrnd_f32
vrndi_f32
vrndi_f32
vrndiq_f32
vrndiq_f32
vrndm_f32
vrndm_f32
vrndmq_f32
vrndmq_f32
vrndns_f32
vrndp_f32
vrndpq_f32
vrndq_f32
vrndq_f32
vrndx_f32
vrndxq_f32
# LLVM select error in debug builds
vqrshrn_n_s16
vqrshrn_n_s32
vqrshrn_n_s64
vqrshrn_n_u16
vqrshrn_n_u32
vqrshrn_n_u64
vqrshrun_n_s16
vqrshrun_n_s32
vqrshrun_n_s64
vqshrn_n_s16
vqshrn_n_s32
vqshrn_n_s64
vqshrn_n_u16
vqshrn_n_u32
vqshrn_n_u64
vqshrun_n_s16
vqshrun_n_s32
vqshrun_n_s64
vrshrn_n_s16
vrshrn_n_s32
vrshrn_n_s64
vrshrn_n_u16
vrshrn_n_u32
vrshrn_n_u64
vshrq_n_u64
vshr_n_u64
# Failing tests: stdarch has incorrect results compared to Clang
vqshlu_n_s16
vqshlu_n_s32
vqshlu_n_s64
vqshlu_n_s8
vqshluq_n_s16
vqshluq_n_s32
vqshluq_n_s64
vqshluq_n_s8
vsli_n_p16
vsli_n_p8
vsli_n_s16
vsli_n_s32
vsli_n_s64
vsli_n_s8
vsli_n_u16
vsli_n_u32
vsli_n_u64
vsli_n_u8
vsliq_n_p16
vsliq_n_p8
vsliq_n_s16
vsliq_n_s32
vsliq_n_s64
vsliq_n_s8
vsliq_n_u16
vsliq_n_u32
vsliq_n_u64
vsliq_n_u8
vsri_n_p16
vsri_n_p8
vsri_n_s16
vsri_n_s32
vsri_n_s64
vsri_n_s8
vsri_n_u16
vsri_n_u32
vsri_n_u64
vsri_n_u8
vsriq_n_p16
vsriq_n_p8
vsriq_n_s16
vsriq_n_s32
vsriq_n_s64
vsriq_n_s8
vsriq_n_u16
vsriq_n_u32
vsriq_n_u64
vsriq_n_u8
# These produce a different result on Clang depending on the optimization level.
# This is definitely a bug in LLVM.
vadd_f32
vaddq_f32
vcvt_s32_f32
vcvt_u32_f32
vcvtq_s32_f32
vcvtq_u32_f32
vfma_f32
vfma_n_f32
vfmaq_f32
vfmaq_n_f32
vfms_f32
vfmsq_f32
vmla_f32
vmla_lane_f32
vmla_n_f32
vmlaq_f32
vmlaq_lane_f32
vmlaq_n_f32
vmls_f32
vmls_lane_f32
vmls_n_f32
vmlsq_f32
vmlsq_lane_f32
vmlsq_n_f32
vmul_lane_f32
vmul_n_f32
vmulq_lane_f32
vmulq_n_f32

View file

@ -82,11 +82,17 @@ impl Into<Intrinsic> for ACLEIntrinsicLine {
})
.collect();
let arguments = ArgumentList { args };
let a64_only = match &*self.supported_architectures {
"A64" => true,
"v7/A32/A64" | "A32/A64" => false,
_ => panic!("Invalid supported architectures"),
};
Intrinsic {
name: name.to_string(),
arguments,
results,
a64_only,
}
}
}

View file

@ -13,6 +13,9 @@ pub struct Intrinsic {
/// The return type of this intrinsic.
pub results: IntrinsicType,
/// Whether this intrinsic is only available on A64.
pub a64_only: bool,
}
impl Intrinsic {

View file

@ -72,12 +72,15 @@ fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String {
#include <cstring>
#include <iomanip>
#include <sstream>
template<typename T1, typename T2> T1 cast(T2 x) {{
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
T1 ret = 0;
memcpy(&ret, &x, sizeof(T1));
return ret;
}}
#ifdef __aarch64__
std::ostream& operator<<(std::ostream& os, poly128_t value) {{
std::stringstream temp;
do {{
@ -90,6 +93,8 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) {{
os << res;
return os;
}}
#endif
int main(int argc, char **argv) {{
{passes}
return 0;
@ -133,7 +138,7 @@ fn gen_code_rust(intrinsic: &Intrinsic, constraints: &[&Argument], name: String)
}
}
fn generate_rust_program(intrinsic: &Intrinsic) -> String {
fn generate_rust_program(intrinsic: &Intrinsic, a32: bool) -> String {
let constraints = intrinsic
.arguments
.iter()
@ -146,25 +151,26 @@ fn generate_rust_program(intrinsic: &Intrinsic) -> String {
#![feature(stdsimd)]
#![allow(overflowing_literals)]
#![allow(non_upper_case_globals)]
use core_arch::arch::aarch64::*;
use core_arch::arch::{target_arch}::*;
fn main() {{
{passes}
}}
"#,
target_arch = if a32 { "arm" } else { "aarch64" },
passes = gen_code_rust(intrinsic, &constraints, Default::default())
)
}
fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str) -> bool {
fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str, a32: bool) -> bool {
let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
let output = Command::new("sh")
.arg("-c")
.arg(format!(
"{cpp} {cppflags} {arch_flags} -Wno-narrowing -O2 -target {target} -o c_programs/{intrinsic} {filename}",
target = "aarch64-unknown-linux-gnu",
arch_flags = "-march=armv8.6-a+crypto+sha3+crc+dotprod",
target = if a32 { "armv7-unknown-linux-gnueabihf" } else { "aarch64-unknown-linux-gnu" },
arch_flags = if a32 { "-march=armv8.6-a+crypto+crc+dotprod" } else { "-march=armv8.6-a+crypto+sha3+crc+dotprod" },
filename = c_filename,
intrinsic = intrinsic.name,
cpp = compiler,
@ -175,19 +181,13 @@ fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str) -> bool {
if output.status.success() {
true
} else {
let stderr = std::str::from_utf8(&output.stderr).unwrap_or("");
if stderr.contains("error: use of undeclared identifier") {
warn!("Skipping intrinsic due to no support: {}", intrinsic.name);
true
} else {
error!(
"Failed to compile code for intrinsic: {}\n\nstdout:\n{}\n\nstderr:\n{}",
intrinsic.name,
std::str::from_utf8(&output.stdout).unwrap_or(""),
std::str::from_utf8(&output.stderr).unwrap_or("")
);
false
}
error!(
"Failed to compile code for intrinsic: {}\n\nstdout:\n{}\n\nstderr:\n{}",
intrinsic.name,
std::str::from_utf8(&output.stdout).unwrap_or(""),
std::str::from_utf8(&output.stderr).unwrap_or("")
);
false
}
} else {
error!("Command failed: {:#?}", output);
@ -195,7 +195,7 @@ fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str) -> bool {
}
}
fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str) -> bool {
fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str, a32: bool) -> bool {
let _ = std::fs::create_dir("c_programs");
intrinsics
.par_iter()
@ -205,20 +205,20 @@ fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str) -> bool {
let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i);
file.write_all(c_code.into_bytes().as_slice()).unwrap();
compile_c(&c_filename, &i, compiler)
compile_c(&c_filename, &i, compiler, a32)
})
.find_any(|x| !x)
.is_none()
}
fn build_rust(intrinsics: &Vec<Intrinsic>, toolchain: &str) -> bool {
fn build_rust(intrinsics: &Vec<Intrinsic>, toolchain: &str, a32: bool) -> bool {
intrinsics.iter().for_each(|i| {
let rust_dir = format!(r#"rust_programs/{}"#, i.name);
let _ = std::fs::create_dir_all(&rust_dir);
let rust_filename = format!(r#"{}/main.rs"#, rust_dir);
let mut file = File::create(&rust_filename).unwrap();
let c_code = generate_rust_program(&i);
let c_code = generate_rust_program(&i, a32);
file.write_all(c_code.into_bytes().as_slice()).unwrap();
});
@ -259,10 +259,15 @@ path = "{intrinsic}/main.rs""#,
.current_dir("rust_programs")
.arg("-c")
.arg(format!(
"cargo {toolchain} build --release --target {target}",
"cargo {toolchain} build --target {target}",
toolchain = toolchain,
target = "aarch64-unknown-linux-gnu",
target = if a32 {
"armv7-unknown-linux-gnueabihf"
} else {
"aarch64-unknown-linux-gnu"
},
))
.env("RUSTFLAGS", "-Cdebuginfo=0")
.output();
if let Ok(output) = output {
if output.status.success() {
@ -317,6 +322,12 @@ fn main() {
.long("skip")
.help("Filename for a list of intrinsics to skip (one per line)"),
)
.arg(
Arg::with_name("A32")
.takes_value(false)
.long("a32")
.help("Run tests for A32 instrinsics instead of A64"),
)
.get_matches();
let filename = matches.value_of("INPUT").unwrap();
@ -328,10 +339,15 @@ fn main() {
let c_runner = matches.value_of("RUNNER").unwrap_or("");
let skip = if let Some(filename) = matches.value_of("SKIP") {
let data = std::fs::read_to_string(&filename).expect("Failed to open file");
data.lines().map(String::from).collect_vec()
data.lines()
.map(str::trim)
.filter(|s| !s.contains('#'))
.map(String::from)
.collect_vec()
} else {
Default::default()
};
let a32 = matches.is_present("A32");
let intrinsics = get_acle_intrinsics(filename);
@ -352,18 +368,19 @@ fn main() {
.filter(|i| !i.arguments.iter().any(|a| a.is_ptr()))
.filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128))
.filter(|i| !skip.contains(&i.name))
.filter(|i| !(a32 && i.a64_only))
.collect::<Vec<_>>();
intrinsics.dedup();
if !build_c(&intrinsics, cpp_compiler) {
if !build_c(&intrinsics, cpp_compiler, a32) {
std::process::exit(2);
}
if !build_rust(&intrinsics, &toolchain) {
if !build_rust(&intrinsics, &toolchain, a32) {
std::process::exit(3);
}
if !compare_outputs(&intrinsics, &toolchain, &c_runner) {
if !compare_outputs(&intrinsics, &toolchain, &c_runner, a32) {
std::process::exit(1)
}
}
@ -374,7 +391,7 @@ enum FailureReason {
Difference(String, String, String),
}
fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str) -> bool {
fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a32: bool) -> bool {
let intrinsics = intrinsics
.par_iter()
.filter_map(|intrinsic| {
@ -390,11 +407,16 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str) -
.current_dir("rust_programs")
.arg("-c")
.arg(format!(
"cargo {toolchain} run --release --target {target} --bin {intrinsic}",
"cargo {toolchain} run --target {target} --bin {intrinsic}",
intrinsic = intrinsic.name,
toolchain = toolchain,
target = "aarch64-unknown-linux-gnu",
target = if a32 {
"armv7-unknown-linux-gnueabihf"
} else {
"aarch64-unknown-linux-gnu"
},
))
.env("RUSTFLAGS", "-Cdebuginfo=0")
.output();
let (c, rust) = match (c, rust) {

View file

@ -258,6 +258,9 @@ impl IntrinsicType {
/// This is required for 8 bit types due to printing as the 8 bit types use
/// a char and when using that in `std::cout` it will print as a character,
/// which means value of 0 will be printed as a null byte.
///
/// This is also needed for polynomial types because we want them to be
/// printed as unsigned integers to match Rust's `Debug` impl.
pub fn c_promotion(&self) -> &str {
match *self {
IntrinsicType::Type {
@ -267,9 +270,21 @@ impl IntrinsicType {
} if bit_len == 8 => match kind {
TypeKind::Int => "(int)",
TypeKind::UInt => "(unsigned int)",
TypeKind::Poly => "(unsigned int)",
TypeKind::Poly => "(unsigned int)(uint8_t)",
_ => "",
},
IntrinsicType::Type {
kind: TypeKind::Poly,
bit_len: Some(bit_len),
..
} => match bit_len {
8 => unreachable!("handled above"),
16 => "(uint16_t)",
32 => "(uint32_t)",
64 => "(uint64_t)",
128 => "",
_ => panic!("invalid bit_len"),
},
_ => "",
}
}

View file

@ -896,7 +896,7 @@ validate BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1,
arm = vcls
aarch64 = cls
generate uint*_t
generate uint8x8_t:int8x8_t, uint8x16_t:int8x16_t, uint16x4_t:int16x4_t, uint16x8_t:int16x8_t, uint32x2_t:int32x2_t, uint32x4_t:int32x4_t
/// Count leading zero bits
name = vclz
@ -2058,7 +2058,7 @@ generate int*_t
/// Negate
name = vneg
multi_fn = -a
multi_fn = a.wrapping_neg()
a = 1
validate -1
@ -4055,7 +4055,7 @@ generate float*_t
/// Subtract
name = vsub
multi_fn = a - b
multi_fn = a.wrapping_sub(b)
a = 3
b = 2
validate 1
@ -4065,7 +4065,7 @@ generate i64, u64
/// Add
name = vadd
multi_fn = a + b
multi_fn = a.wrapping_add(b)
a = 1
b = 2
validate 3
@ -5894,7 +5894,7 @@ name = vqshl
n-suffix
constn = N
multi_fn = static_assert_imm-out_bits_exp_len-N
multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N.try_into().unwrap()}
multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N as _}
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
n = 2
validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
@ -5921,7 +5921,7 @@ name = vqshl
n-suffix
constn = N
multi_fn = static_assert_imm-out_bits_exp_len-N
multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N.try_into().unwrap()}
multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N as _}
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
n = 2
validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
@ -6480,7 +6480,7 @@ name = vrshr
n-suffix
constn = N
multi_fn = static_assert-N-1-bits
multi_fn = vrshl-self-noext, a, {vdup-nself-noext, (-N).try_into().unwrap()}
multi_fn = vrshl-self-noext, a, {vdup-nself-noext, (-N) as _}
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
n = 2
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
@ -6507,7 +6507,7 @@ name = vrshr
n-suffix
constn = N
multi_fn = static_assert-N-1-bits
multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, (-N).try_into().unwrap()}
multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, (-N) as _}
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
n = 2
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
@ -6613,7 +6613,7 @@ n-suffix
constn = N
multi_fn = static_assert-N-1-bits
multi_fn = vrshr-nself-::<N>, b:in_t, b
multi_fn = a + b
multi_fn = a.wrapping_add(b)
a = 1
b = 4
n = 2
@ -6628,7 +6628,7 @@ n-suffix
constn = N
multi_fn = static_assert-N-1-bits
multi_fn = vrshr-nself-::<N>, b:in_t, b
multi_fn = a + b
multi_fn = a.wrapping_add(b)
a = 1
b = 4
n = 2
@ -6804,7 +6804,7 @@ name = vshl
n-suffix
constn = N
multi_fn = static_assert_imm-out_bits_exp_len-N
multi_fn = simd_shl, a, {vdup-nself-noext, N.try_into().unwrap()}
multi_fn = simd_shl, a, {vdup-nself-noext, N as _}
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
n = 2
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
@ -6818,7 +6818,7 @@ name = vshll
n-suffix
constn = N
multi_fn = static_assert-N-0-bits
multi_fn = simd_shl, {simd_cast, a}, {vdup-nout-noext, N.try_into().unwrap()}
multi_fn = simd_shl, {simd_cast, a}, {vdup-nout-noext, N as _}
a = 1, 2, 3, 4, 5, 6, 7, 8
n = 2
validate 4, 8, 12, 16, 20, 24, 28, 32
@ -6851,7 +6851,7 @@ n-suffix
constn = N
multi_fn = static_assert-N-1-bits
multi_fn = fix_right_shift_imm-N-bits
multi_fn = simd_shr, a, {vdup-nself-noext, n.try_into().unwrap()}
multi_fn = simd_shr, a, {vdup-nself-noext, n as _}
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
n = 2
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
@ -6867,7 +6867,7 @@ name = vshrn_n
no-q
constn = N
multi_fn = static_assert-N-1-halfbits
multi_fn = simd_cast, {simd_shr, a, {vdup-nself-noext, N.try_into().unwrap()}}
multi_fn = simd_cast, {simd_shr, a, {vdup-nself-noext, N as _}}
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
n = 2
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16

View file

@ -1304,7 +1304,7 @@ fn gen_aarch64(
};
format!(
r#"{}
{}{}({}, {} as i64, a.cast())"#,
{}{}({}, {} as i64, a as _)"#,
multi_calls,
ext_c,
current_fn,
@ -1327,7 +1327,7 @@ fn gen_aarch64(
}
}
} else if link_aarch64.is_some() && matches!(fn_type, Fntype::Store) {
let cast = if is_vstx(&name) { ".cast()" } else { "" };
let cast = if is_vstx(&name) { " as _" } else { "" };
match type_sub_len(in_t[1]) {
1 => format!(r#"{}{}(b, a{})"#, ext_c, current_fn, cast),
2 => format!(r#"{}{}(b.0, b.1, a{})"#, ext_c, current_fn, cast),
@ -1336,7 +1336,7 @@ fn gen_aarch64(
_ => panic!("unsupported type: {}", in_t[1]),
}
} else if link_aarch64.is_some() && is_vldx(&name) {
format!(r#"{}{}(a.cast())"#, ext_c, current_fn,)
format!(r#"{}{}(a as _)"#, ext_c, current_fn,)
} else {
let trans: [&str; 2] = if link_t[3] != out_t {
["transmute(", ")"]
@ -1553,7 +1553,7 @@ fn gen_store_test(
let a: [{}; {}] = {};
let e: [{}; {}] = {};
let mut r: [{}; {}] = [0{}; {}];
{}{}(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
{}{}(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
assert_eq!(r, e);
"#,
type_to_native_type(in_t[1]),
@ -2196,7 +2196,7 @@ fn gen_arm(
_ => "",
};
format!(
"{}(a.cast(), {}, {}, {})",
"{}(a as _, {}, {}, {})",
current_fn,
subs,
constn.as_deref().unwrap(),
@ -2235,7 +2235,7 @@ fn gen_arm(
} else if matches!(fn_type, Fntype::Store) {
let (cast, size) = if is_vstx(&name) {
(
".cast()",
" as _",
format!(", {}", type_bits(&type_to_sub_type(in_t[1])) / 8),
)
} else {
@ -2276,7 +2276,7 @@ fn gen_arm(
_ => "",
};
format!(
"{}({}, {} as i64, a.cast())",
"{}({}, {} as i64, a as _)",
current_fn,
subs,
constn.as_deref().unwrap()
@ -2307,7 +2307,7 @@ fn gen_arm(
_ => String::new(),
}
} else if matches!(fn_type, Fntype::Store) {
let cast = if is_vstx(&name) { ".cast()" } else { "" };
let cast = if is_vstx(&name) { " as _" } else { "" };
match type_sub_len(in_t[1]) {
1 => format!("{}(b, a{})", current_fn, cast),
2 => format!("{}(b.0, b.1, a{})", current_fn, cast),
@ -2316,7 +2316,7 @@ fn gen_arm(
_ => String::new(),
}
} else if link_aarch64.is_some() && is_vldx(&name) {
format!("{}(a.cast())", current_fn)
format!("{}(a as _)", current_fn)
} else {
String::new()
};

View file