Update the intrinsic checker tool (#1258)
This commit is contained in:
parent
972030f2b2
commit
937978eeef
20 changed files with 1204 additions and 794 deletions
1
library/stdarch/.github/workflows/main.yml
vendored
1
library/stdarch/.github/workflows/main.yml
vendored
|
|
@ -116,7 +116,6 @@ jobs:
|
|||
os: ubuntu-latest
|
||||
- target: armv7-unknown-linux-gnueabihf
|
||||
os: ubuntu-latest
|
||||
rustflags: -C target-feature=+neon
|
||||
- target: mips-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
norun: true
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:20.04
|
||||
FROM ubuntu:21.10
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
|
|
@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang-12 \
|
||||
clang-13 \
|
||||
lld
|
||||
|
||||
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
|
||||
|
|
|
|||
|
|
@ -1,13 +1,17 @@
|
|||
FROM ubuntu:18.04
|
||||
FROM ubuntu:21.10
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
g++-arm-linux-gnueabihf \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
file \
|
||||
clang-13 \
|
||||
lld
|
||||
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ run() {
|
|||
--env NORUN \
|
||||
--env RUSTFLAGS \
|
||||
--env STDARCH_TEST_NORUN \
|
||||
--volume "$(dirname "$(dirname "$(command -v cargo)")")":/cargo \
|
||||
--volume "${HOME}/.cargo":/cargo \
|
||||
--volume "$(rustc --print sysroot)":/rust:ro \
|
||||
--volume "$(pwd)":/checkout:ro \
|
||||
--volume "$(pwd)"/target:/checkout/target \
|
||||
|
|
|
|||
|
|
@ -37,6 +37,13 @@ case ${TARGET} in
|
|||
mips-* | mipsel-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
|
||||
;;
|
||||
# Some of our test dependencies use the deprecated `gcc` crates which is
|
||||
# missing a fix from https://github.com/alexcrichton/cc-rs/pull/627. Apply
|
||||
# the workaround manually here.
|
||||
armv7-*eabihf | thumbv7-*eabihf)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
|
||||
export TARGET_CFLAGS="-mfpu=vfpv3-d16"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "RUSTFLAGS=${RUSTFLAGS}"
|
||||
|
|
@ -122,7 +129,10 @@ esac
|
|||
|
||||
if [ "${TARGET}" = "aarch64-unknown-linux-gnu" ]; then
|
||||
export CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
|
||||
cargo run ${INTRINSIC_TEST} --release --bin intrinsic-test -- crates/intrinsic-test/acle/tools/intrinsic_db/advsimd.csv --runner "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" --cppcompiler "clang++-12" --skip crates/intrinsic-test/missing.txt
|
||||
RUST_LOG=warn cargo run ${INTRINSIC_TEST} --release --bin intrinsic-test -- crates/intrinsic-test/acle/tools/intrinsic_db/advsimd.csv --runner "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" --cppcompiler "clang++-13" --skip crates/intrinsic-test/missing_aarch64.txt
|
||||
elif [ "${TARGET}" = "armv7-unknown-linux-gnueabihf" ]; then
|
||||
export CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
|
||||
RUST_LOG=warn cargo run ${INTRINSIC_TEST} --release --bin intrinsic-test -- crates/intrinsic-test/acle/tools/intrinsic_db/advsimd.csv --runner "${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}" --cppcompiler "clang++-13" --skip crates/intrinsic-test/missing_arm.txt --a32
|
||||
fi
|
||||
|
||||
if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
|
||||
|
|
|
|||
|
|
@ -4455,7 +4455,7 @@ pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(neg))]
|
||||
pub unsafe fn vnegd_s64(a: i64) -> i64 {
|
||||
-a
|
||||
a.wrapping_neg()
|
||||
}
|
||||
|
||||
/// Negate
|
||||
|
|
@ -5213,7 +5213,7 @@ pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64")]
|
||||
fn vld2q_s64_(ptr: *const int64x2_t) -> int64x2x2_t;
|
||||
}
|
||||
vld2q_s64_(a.cast())
|
||||
vld2q_s64_(a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
|
|
@ -5242,7 +5242,7 @@ pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64")]
|
||||
fn vld2_f64_(ptr: *const float64x1_t) -> float64x1x2_t;
|
||||
}
|
||||
vld2_f64_(a.cast())
|
||||
vld2_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
|
|
@ -5255,7 +5255,7 @@ pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64")]
|
||||
fn vld2q_f64_(ptr: *const float64x2_t) -> float64x2x2_t;
|
||||
}
|
||||
vld2q_f64_(a.cast())
|
||||
vld2q_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load single 2-element structure and replicate to all lanes of two registers
|
||||
|
|
@ -5268,7 +5268,7 @@ pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64")]
|
||||
fn vld2q_dup_s64_(ptr: *const i64) -> int64x2x2_t;
|
||||
}
|
||||
vld2q_dup_s64_(a.cast())
|
||||
vld2q_dup_s64_(a as _)
|
||||
}
|
||||
|
||||
/// Load single 2-element structure and replicate to all lanes of two registers
|
||||
|
|
@ -5297,7 +5297,7 @@ pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64")]
|
||||
fn vld2_dup_f64_(ptr: *const f64) -> float64x1x2_t;
|
||||
}
|
||||
vld2_dup_f64_(a.cast())
|
||||
vld2_dup_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load single 2-element structure and replicate to all lanes of two registers
|
||||
|
|
@ -5310,7 +5310,7 @@ pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64")]
|
||||
fn vld2q_dup_f64_(ptr: *const f64) -> float64x2x2_t;
|
||||
}
|
||||
vld2q_dup_f64_(a.cast())
|
||||
vld2q_dup_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
|
|
@ -5325,7 +5325,7 @@ pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> in
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8")]
|
||||
fn vld2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *const i8) -> int8x16x2_t;
|
||||
}
|
||||
vld2q_lane_s8_(b.0, b.1, LANE as i64, a.cast())
|
||||
vld2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
|
|
@ -5340,7 +5340,7 @@ pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> i
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8")]
|
||||
fn vld2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *const i8) -> int64x1x2_t;
|
||||
}
|
||||
vld2_lane_s64_(b.0, b.1, LANE as i64, a.cast())
|
||||
vld2_lane_s64_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
|
|
@ -5355,7 +5355,7 @@ pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) ->
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8")]
|
||||
fn vld2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *const i8) -> int64x2x2_t;
|
||||
}
|
||||
vld2q_lane_s64_(b.0, b.1, LANE as i64, a.cast())
|
||||
vld2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
|
|
@ -5430,7 +5430,7 @@ pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) ->
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8")]
|
||||
fn vld2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *const i8) -> float64x1x2_t;
|
||||
}
|
||||
vld2_lane_f64_(b.0, b.1, LANE as i64, a.cast())
|
||||
vld2_lane_f64_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
|
|
@ -5445,7 +5445,7 @@ pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8")]
|
||||
fn vld2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *const i8) -> float64x2x2_t;
|
||||
}
|
||||
vld2q_lane_f64_(b.0, b.1, LANE as i64, a.cast())
|
||||
vld2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
|
|
@ -5458,7 +5458,7 @@ pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64")]
|
||||
fn vld3q_s64_(ptr: *const int64x2_t) -> int64x2x3_t;
|
||||
}
|
||||
vld3q_s64_(a.cast())
|
||||
vld3q_s64_(a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
|
|
@ -5487,7 +5487,7 @@ pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64")]
|
||||
fn vld3_f64_(ptr: *const float64x1_t) -> float64x1x3_t;
|
||||
}
|
||||
vld3_f64_(a.cast())
|
||||
vld3_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
|
|
@ -5500,7 +5500,7 @@ pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64")]
|
||||
fn vld3q_f64_(ptr: *const float64x2_t) -> float64x2x3_t;
|
||||
}
|
||||
vld3q_f64_(a.cast())
|
||||
vld3q_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load single 3-element structure and replicate to all lanes of three registers
|
||||
|
|
@ -5513,7 +5513,7 @@ pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64")]
|
||||
fn vld3q_dup_s64_(ptr: *const i64) -> int64x2x3_t;
|
||||
}
|
||||
vld3q_dup_s64_(a.cast())
|
||||
vld3q_dup_s64_(a as _)
|
||||
}
|
||||
|
||||
/// Load single 3-element structure and replicate to all lanes of three registers
|
||||
|
|
@ -5542,7 +5542,7 @@ pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64")]
|
||||
fn vld3_dup_f64_(ptr: *const f64) -> float64x1x3_t;
|
||||
}
|
||||
vld3_dup_f64_(a.cast())
|
||||
vld3_dup_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load single 3-element structure and replicate to all lanes of three registers
|
||||
|
|
@ -5555,7 +5555,7 @@ pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64")]
|
||||
fn vld3q_dup_f64_(ptr: *const f64) -> float64x2x3_t;
|
||||
}
|
||||
vld3q_dup_f64_(a.cast())
|
||||
vld3q_dup_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 3-element structures to two registers
|
||||
|
|
@ -5570,7 +5570,7 @@ pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> in
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8")]
|
||||
fn vld3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *const i8) -> int8x16x3_t;
|
||||
}
|
||||
vld3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vld3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 3-element structures to two registers
|
||||
|
|
@ -5585,7 +5585,7 @@ pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> i
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8")]
|
||||
fn vld3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *const i8) -> int64x1x3_t;
|
||||
}
|
||||
vld3_lane_s64_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vld3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 3-element structures to two registers
|
||||
|
|
@ -5600,7 +5600,7 @@ pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) ->
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8")]
|
||||
fn vld3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *const i8) -> int64x2x3_t;
|
||||
}
|
||||
vld3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vld3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
|
|
@ -5675,7 +5675,7 @@ pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) ->
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8")]
|
||||
fn vld3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *const i8) -> float64x1x3_t;
|
||||
}
|
||||
vld3_lane_f64_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vld3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
|
|
@ -5690,7 +5690,7 @@ pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8")]
|
||||
fn vld3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *const i8) -> float64x2x3_t;
|
||||
}
|
||||
vld3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vld3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
|
|
@ -5703,7 +5703,7 @@ pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64")]
|
||||
fn vld4q_s64_(ptr: *const int64x2_t) -> int64x2x4_t;
|
||||
}
|
||||
vld4q_s64_(a.cast())
|
||||
vld4q_s64_(a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
|
|
@ -5732,7 +5732,7 @@ pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64")]
|
||||
fn vld4_f64_(ptr: *const float64x1_t) -> float64x1x4_t;
|
||||
}
|
||||
vld4_f64_(a.cast())
|
||||
vld4_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
|
|
@ -5745,7 +5745,7 @@ pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64")]
|
||||
fn vld4q_f64_(ptr: *const float64x2_t) -> float64x2x4_t;
|
||||
}
|
||||
vld4q_f64_(a.cast())
|
||||
vld4q_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load single 4-element structure and replicate to all lanes of four registers
|
||||
|
|
@ -5758,7 +5758,7 @@ pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64")]
|
||||
fn vld4q_dup_s64_(ptr: *const i64) -> int64x2x4_t;
|
||||
}
|
||||
vld4q_dup_s64_(a.cast())
|
||||
vld4q_dup_s64_(a as _)
|
||||
}
|
||||
|
||||
/// Load single 4-element structure and replicate to all lanes of four registers
|
||||
|
|
@ -5787,7 +5787,7 @@ pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64")]
|
||||
fn vld4_dup_f64_(ptr: *const f64) -> float64x1x4_t;
|
||||
}
|
||||
vld4_dup_f64_(a.cast())
|
||||
vld4_dup_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load single 4-element structure and replicate to all lanes of four registers
|
||||
|
|
@ -5800,7 +5800,7 @@ pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64")]
|
||||
fn vld4q_dup_f64_(ptr: *const f64) -> float64x2x4_t;
|
||||
}
|
||||
vld4q_dup_f64_(a.cast())
|
||||
vld4q_dup_f64_(a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
|
|
@ -5815,7 +5815,7 @@ pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> in
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8")]
|
||||
fn vld4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *const i8) -> int8x16x4_t;
|
||||
}
|
||||
vld4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vld4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
|
|
@ -5830,7 +5830,7 @@ pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> i
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8")]
|
||||
fn vld4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *const i8) -> int64x1x4_t;
|
||||
}
|
||||
vld4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vld4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
|
|
@ -5845,7 +5845,7 @@ pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) ->
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8")]
|
||||
fn vld4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *const i8) -> int64x2x4_t;
|
||||
}
|
||||
vld4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vld4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
|
|
@ -5920,7 +5920,7 @@ pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) ->
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8")]
|
||||
fn vld4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *const i8) -> float64x1x4_t;
|
||||
}
|
||||
vld4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vld4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
|
|
@ -5935,7 +5935,7 @@ pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8")]
|
||||
fn vld4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *const i8) -> float64x2x4_t;
|
||||
}
|
||||
vld4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vld4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers
|
||||
|
|
@ -6046,7 +6046,7 @@ pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2i64.p0i8")]
|
||||
fn vst2q_s64_(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
|
||||
}
|
||||
vst2q_s64_(b.0, b.1, a.cast())
|
||||
vst2q_s64_(b.0, b.1, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
|
|
@ -6075,7 +6075,7 @@ pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v1f64.p0i8")]
|
||||
fn vst2_f64_(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
|
||||
}
|
||||
vst2_f64_(b.0, b.1, a.cast())
|
||||
vst2_f64_(b.0, b.1, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
|
|
@ -6088,7 +6088,7 @@ pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2f64.p0i8")]
|
||||
fn vst2q_f64_(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
|
||||
}
|
||||
vst2q_f64_(b.0, b.1, a.cast())
|
||||
vst2q_f64_(b.0, b.1, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
|
|
@ -6103,7 +6103,7 @@ pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8")]
|
||||
fn vst2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst2q_lane_s8_(b.0, b.1, LANE as i64, a.cast())
|
||||
vst2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
|
|
@ -6118,7 +6118,7 @@ pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8")]
|
||||
fn vst2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst2_lane_s64_(b.0, b.1, LANE as i64, a.cast())
|
||||
vst2_lane_s64_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
|
|
@ -6133,7 +6133,7 @@ pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8")]
|
||||
fn vst2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst2q_lane_s64_(b.0, b.1, LANE as i64, a.cast())
|
||||
vst2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
|
|
@ -6208,7 +6208,7 @@ pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8")]
|
||||
fn vst2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst2_lane_f64_(b.0, b.1, LANE as i64, a.cast())
|
||||
vst2_lane_f64_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
|
|
@ -6223,7 +6223,7 @@ pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8")]
|
||||
fn vst2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst2q_lane_f64_(b.0, b.1, LANE as i64, a.cast())
|
||||
vst2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
|
|
@ -6236,7 +6236,7 @@ pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2i64.p0i8")]
|
||||
fn vst3q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
|
||||
}
|
||||
vst3q_s64_(b.0, b.1, b.2, a.cast())
|
||||
vst3q_s64_(b.0, b.1, b.2, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
|
|
@ -6265,7 +6265,7 @@ pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v1f64.p0i8")]
|
||||
fn vst3_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
|
||||
}
|
||||
vst3_f64_(b.0, b.1, b.2, a.cast())
|
||||
vst3_f64_(b.0, b.1, b.2, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
|
|
@ -6278,7 +6278,7 @@ pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2f64.p0i8")]
|
||||
fn vst3q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
|
||||
}
|
||||
vst3q_f64_(b.0, b.1, b.2, a.cast())
|
||||
vst3q_f64_(b.0, b.1, b.2, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
|
|
@ -6293,7 +6293,7 @@ pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8")]
|
||||
fn vst3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vst3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
|
|
@ -6308,7 +6308,7 @@ pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8")]
|
||||
fn vst3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst3_lane_s64_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vst3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
|
|
@ -6323,7 +6323,7 @@ pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8")]
|
||||
fn vst3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vst3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
|
|
@ -6398,7 +6398,7 @@ pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8")]
|
||||
fn vst3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst3_lane_f64_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vst3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
|
|
@ -6413,7 +6413,7 @@ pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8")]
|
||||
fn vst3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a.cast())
|
||||
vst3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
|
|
@ -6426,7 +6426,7 @@ pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2i64.p0i8")]
|
||||
fn vst4q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
|
||||
}
|
||||
vst4q_s64_(b.0, b.1, b.2, b.3, a.cast())
|
||||
vst4q_s64_(b.0, b.1, b.2, b.3, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
|
|
@ -6455,7 +6455,7 @@ pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v1f64.p0i8")]
|
||||
fn vst4_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
|
||||
}
|
||||
vst4_f64_(b.0, b.1, b.2, b.3, a.cast())
|
||||
vst4_f64_(b.0, b.1, b.2, b.3, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
|
|
@ -6468,7 +6468,7 @@ pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2f64.p0i8")]
|
||||
fn vst4q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
|
||||
}
|
||||
vst4q_f64_(b.0, b.1, b.2, b.3, a.cast())
|
||||
vst4q_f64_(b.0, b.1, b.2, b.3, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
|
|
@ -6483,7 +6483,7 @@ pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8")]
|
||||
fn vst4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vst4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
|
|
@ -6498,7 +6498,7 @@ pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8")]
|
||||
fn vst4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vst4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
|
|
@ -6513,7 +6513,7 @@ pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8")]
|
||||
fn vst4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vst4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
|
|
@ -6588,7 +6588,7 @@ pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8")]
|
||||
fn vst4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vst4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
|
|
@ -6603,7 +6603,7 @@ pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
|
|||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8")]
|
||||
fn vst4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *mut i8);
|
||||
}
|
||||
vst4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a.cast())
|
||||
vst4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
|
|
@ -7512,7 +7512,7 @@ pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
|
||||
a - b
|
||||
a.wrapping_sub(b)
|
||||
}
|
||||
|
||||
/// Subtract
|
||||
|
|
@ -7520,7 +7520,7 @@ pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
|
||||
a - b
|
||||
a.wrapping_sub(b)
|
||||
}
|
||||
|
||||
/// Add
|
||||
|
|
@ -7528,7 +7528,7 @@ pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
|
||||
a + b
|
||||
a.wrapping_add(b)
|
||||
}
|
||||
|
||||
/// Add
|
||||
|
|
@ -7536,7 +7536,7 @@ pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
|
||||
a + b
|
||||
a.wrapping_add(b)
|
||||
}
|
||||
|
||||
/// Floating-point add across vector
|
||||
|
|
@ -11536,7 +11536,7 @@ pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> u
|
|||
pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
let b: i64 = vrshrd_n_s64::<N>(b);
|
||||
a + b
|
||||
a.wrapping_add(b)
|
||||
}
|
||||
|
||||
/// Ungisned rounding shift right and accumulate.
|
||||
|
|
@ -11547,7 +11547,7 @@ pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
|
|||
pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
let b: u64 = vrshrd_n_u64::<N>(b);
|
||||
a + b
|
||||
a.wrapping_add(b)
|
||||
}
|
||||
|
||||
/// Rounding subtract returning high narrow
|
||||
|
|
@ -17802,7 +17802,7 @@ mod test {
|
|||
let a: [f64; 2] = [0., 1.];
|
||||
let e: [f64; 1] = [1.];
|
||||
let mut r: [f64; 1] = [0f64; 1];
|
||||
vst1_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst1_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17811,7 +17811,7 @@ mod test {
|
|||
let a: [f64; 3] = [0., 1., 2.];
|
||||
let e: [f64; 2] = [1., 0.];
|
||||
let mut r: [f64; 2] = [0f64; 2];
|
||||
vst1q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst1q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17820,7 +17820,7 @@ mod test {
|
|||
let a: [f64; 3] = [0., 1., 2.];
|
||||
let e: [f64; 2] = [1., 2.];
|
||||
let mut r: [f64; 2] = [0f64; 2];
|
||||
vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17829,7 +17829,7 @@ mod test {
|
|||
let a: [f64; 5] = [0., 1., 2., 3., 4.];
|
||||
let e: [f64; 4] = [1., 2., 3., 4.];
|
||||
let mut r: [f64; 4] = [0f64; 4];
|
||||
vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17838,7 +17838,7 @@ mod test {
|
|||
let a: [f64; 4] = [0., 1., 2., 3.];
|
||||
let e: [f64; 3] = [1., 2., 3.];
|
||||
let mut r: [f64; 3] = [0f64; 3];
|
||||
vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17847,7 +17847,7 @@ mod test {
|
|||
let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
|
||||
let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
|
||||
let mut r: [f64; 6] = [0f64; 6];
|
||||
vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17856,7 +17856,7 @@ mod test {
|
|||
let a: [f64; 5] = [0., 1., 2., 3., 4.];
|
||||
let e: [f64; 4] = [1., 2., 3., 4.];
|
||||
let mut r: [f64; 4] = [0f64; 4];
|
||||
vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17865,7 +17865,7 @@ mod test {
|
|||
let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
|
||||
let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
|
||||
let mut r: [f64; 8] = [0f64; 8];
|
||||
vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17874,7 +17874,7 @@ mod test {
|
|||
let a: [i64; 5] = [0, 1, 2, 2, 3];
|
||||
let e: [i64; 4] = [1, 2, 2, 3];
|
||||
let mut r: [i64; 4] = [0i64; 4];
|
||||
vst2q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17883,7 +17883,7 @@ mod test {
|
|||
let a: [u64; 5] = [0, 1, 2, 2, 3];
|
||||
let e: [u64; 4] = [1, 2, 2, 3];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst2q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17892,7 +17892,7 @@ mod test {
|
|||
let a: [u64; 5] = [0, 1, 2, 2, 3];
|
||||
let e: [u64; 4] = [1, 2, 2, 3];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst2q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17901,7 +17901,7 @@ mod test {
|
|||
let a: [f64; 3] = [0., 1., 2.];
|
||||
let e: [f64; 2] = [1., 2.];
|
||||
let mut r: [f64; 2] = [0f64; 2];
|
||||
vst2_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17910,7 +17910,7 @@ mod test {
|
|||
let a: [f64; 5] = [0., 1., 2., 2., 3.];
|
||||
let e: [f64; 4] = [1., 2., 2., 3.];
|
||||
let mut r: [f64; 4] = [0f64; 4];
|
||||
vst2q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17919,7 +17919,7 @@ mod test {
|
|||
let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
|
||||
let e: [i8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut r: [i8; 32] = [0i8; 32];
|
||||
vst2q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17928,7 +17928,7 @@ mod test {
|
|||
let a: [i64; 3] = [0, 1, 2];
|
||||
let e: [i64; 2] = [1, 2];
|
||||
let mut r: [i64; 2] = [0i64; 2];
|
||||
vst2_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17937,7 +17937,7 @@ mod test {
|
|||
let a: [i64; 5] = [0, 1, 2, 2, 3];
|
||||
let e: [i64; 4] = [1, 2, 0, 0];
|
||||
let mut r: [i64; 4] = [0i64; 4];
|
||||
vst2q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17946,7 +17946,7 @@ mod test {
|
|||
let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
|
||||
let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut r: [u8; 32] = [0u8; 32];
|
||||
vst2q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17955,7 +17955,7 @@ mod test {
|
|||
let a: [u64; 3] = [0, 1, 2];
|
||||
let e: [u64; 2] = [1, 2];
|
||||
let mut r: [u64; 2] = [0u64; 2];
|
||||
vst2_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17964,7 +17964,7 @@ mod test {
|
|||
let a: [u64; 5] = [0, 1, 2, 2, 3];
|
||||
let e: [u64; 4] = [1, 2, 0, 0];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst2q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17973,7 +17973,7 @@ mod test {
|
|||
let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
|
||||
let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut r: [u8; 32] = [0u8; 32];
|
||||
vst2q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17982,7 +17982,7 @@ mod test {
|
|||
let a: [u64; 3] = [0, 1, 2];
|
||||
let e: [u64; 2] = [1, 2];
|
||||
let mut r: [u64; 2] = [0u64; 2];
|
||||
vst2_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -17991,7 +17991,7 @@ mod test {
|
|||
let a: [u64; 5] = [0, 1, 2, 2, 3];
|
||||
let e: [u64; 4] = [1, 2, 0, 0];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst2q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18000,7 +18000,7 @@ mod test {
|
|||
let a: [f64; 3] = [0., 1., 2.];
|
||||
let e: [f64; 2] = [1., 2.];
|
||||
let mut r: [f64; 2] = [0f64; 2];
|
||||
vst2_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18009,7 +18009,7 @@ mod test {
|
|||
let a: [f64; 5] = [0., 1., 2., 2., 3.];
|
||||
let e: [f64; 4] = [1., 2., 0., 0.];
|
||||
let mut r: [f64; 4] = [0f64; 4];
|
||||
vst2q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst2q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18018,7 +18018,7 @@ mod test {
|
|||
let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
|
||||
let e: [i64; 6] = [1, 2, 2, 2, 4, 4];
|
||||
let mut r: [i64; 6] = [0i64; 6];
|
||||
vst3q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18027,7 +18027,7 @@ mod test {
|
|||
let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
|
||||
let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
|
||||
let mut r: [u64; 6] = [0u64; 6];
|
||||
vst3q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18036,7 +18036,7 @@ mod test {
|
|||
let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
|
||||
let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
|
||||
let mut r: [u64; 6] = [0u64; 6];
|
||||
vst3q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18045,7 +18045,7 @@ mod test {
|
|||
let a: [f64; 4] = [0., 1., 2., 2.];
|
||||
let e: [f64; 3] = [1., 2., 2.];
|
||||
let mut r: [f64; 3] = [0f64; 3];
|
||||
vst3_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18054,7 +18054,7 @@ mod test {
|
|||
let a: [f64; 7] = [0., 1., 2., 2., 4., 2., 4.];
|
||||
let e: [f64; 6] = [1., 2., 2., 2., 4., 4.];
|
||||
let mut r: [f64; 6] = [0f64; 6];
|
||||
vst3q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18063,7 +18063,7 @@ mod test {
|
|||
let a: [i8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
|
||||
let e: [i8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut r: [i8; 48] = [0i8; 48];
|
||||
vst3q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18072,7 +18072,7 @@ mod test {
|
|||
let a: [i64; 4] = [0, 1, 2, 2];
|
||||
let e: [i64; 3] = [1, 2, 2];
|
||||
let mut r: [i64; 3] = [0i64; 3];
|
||||
vst3_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18081,7 +18081,7 @@ mod test {
|
|||
let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
|
||||
let e: [i64; 6] = [1, 2, 2, 0, 0, 0];
|
||||
let mut r: [i64; 6] = [0i64; 6];
|
||||
vst3q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18090,7 +18090,7 @@ mod test {
|
|||
let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
|
||||
let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut r: [u8; 48] = [0u8; 48];
|
||||
vst3q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18099,7 +18099,7 @@ mod test {
|
|||
let a: [u64; 4] = [0, 1, 2, 2];
|
||||
let e: [u64; 3] = [1, 2, 2];
|
||||
let mut r: [u64; 3] = [0u64; 3];
|
||||
vst3_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18108,7 +18108,7 @@ mod test {
|
|||
let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
|
||||
let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
|
||||
let mut r: [u64; 6] = [0u64; 6];
|
||||
vst3q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18117,7 +18117,7 @@ mod test {
|
|||
let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
|
||||
let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut r: [u8; 48] = [0u8; 48];
|
||||
vst3q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18126,7 +18126,7 @@ mod test {
|
|||
let a: [u64; 4] = [0, 1, 2, 2];
|
||||
let e: [u64; 3] = [1, 2, 2];
|
||||
let mut r: [u64; 3] = [0u64; 3];
|
||||
vst3_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18135,7 +18135,7 @@ mod test {
|
|||
let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
|
||||
let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
|
||||
let mut r: [u64; 6] = [0u64; 6];
|
||||
vst3q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18144,7 +18144,7 @@ mod test {
|
|||
let a: [f64; 4] = [0., 1., 2., 2.];
|
||||
let e: [f64; 3] = [1., 2., 2.];
|
||||
let mut r: [f64; 3] = [0f64; 3];
|
||||
vst3_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18153,7 +18153,7 @@ mod test {
|
|||
let a: [f64; 7] = [0., 1., 2., 2., 3., 2., 3.];
|
||||
let e: [f64; 6] = [1., 2., 2., 0., 0., 0.];
|
||||
let mut r: [f64; 6] = [0f64; 6];
|
||||
vst3q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst3q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18162,7 +18162,7 @@ mod test {
|
|||
let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
|
||||
let e: [i64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
|
||||
let mut r: [i64; 8] = [0i64; 8];
|
||||
vst4q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18171,7 +18171,7 @@ mod test {
|
|||
let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
|
||||
let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
|
||||
let mut r: [u64; 8] = [0u64; 8];
|
||||
vst4q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18180,7 +18180,7 @@ mod test {
|
|||
let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
|
||||
let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
|
||||
let mut r: [u64; 8] = [0u64; 8];
|
||||
vst4q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18189,7 +18189,7 @@ mod test {
|
|||
let a: [f64; 5] = [0., 1., 2., 2., 6.];
|
||||
let e: [f64; 4] = [1., 2., 2., 6.];
|
||||
let mut r: [f64; 4] = [0f64; 4];
|
||||
vst4_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18198,7 +18198,7 @@ mod test {
|
|||
let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
|
||||
let e: [f64; 8] = [1., 2., 2., 6., 2., 6., 6., 8.];
|
||||
let mut r: [f64; 8] = [0f64; 8];
|
||||
vst4q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18207,7 +18207,7 @@ mod test {
|
|||
let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
|
||||
let e: [i8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut r: [i8; 64] = [0i8; 64];
|
||||
vst4q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18216,7 +18216,7 @@ mod test {
|
|||
let a: [i64; 5] = [0, 1, 2, 2, 6];
|
||||
let e: [i64; 4] = [1, 2, 2, 6];
|
||||
let mut r: [i64; 4] = [0i64; 4];
|
||||
vst4_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18225,7 +18225,7 @@ mod test {
|
|||
let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
|
||||
let e: [i64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
|
||||
let mut r: [i64; 8] = [0i64; 8];
|
||||
vst4q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18234,7 +18234,7 @@ mod test {
|
|||
let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
|
||||
let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut r: [u8; 64] = [0u8; 64];
|
||||
vst4q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18243,7 +18243,7 @@ mod test {
|
|||
let a: [u64; 5] = [0, 1, 2, 2, 6];
|
||||
let e: [u64; 4] = [1, 2, 2, 6];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst4_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18252,7 +18252,7 @@ mod test {
|
|||
let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
|
||||
let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
|
||||
let mut r: [u64; 8] = [0u64; 8];
|
||||
vst4q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18261,7 +18261,7 @@ mod test {
|
|||
let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
|
||||
let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut r: [u8; 64] = [0u8; 64];
|
||||
vst4q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18270,7 +18270,7 @@ mod test {
|
|||
let a: [u64; 5] = [0, 1, 2, 2, 6];
|
||||
let e: [u64; 4] = [1, 2, 2, 6];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst4_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18279,7 +18279,7 @@ mod test {
|
|||
let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
|
||||
let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
|
||||
let mut r: [u64; 8] = [0u64; 8];
|
||||
vst4q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18288,7 +18288,7 @@ mod test {
|
|||
let a: [f64; 5] = [0., 1., 2., 2., 6.];
|
||||
let e: [f64; 4] = [1., 2., 2., 6.];
|
||||
let mut r: [f64; 4] = [0f64; 4];
|
||||
vst4_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -18297,7 +18297,7 @@ mod test {
|
|||
let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
|
||||
let e: [f64; 8] = [1., 2., 2., 6., 0., 0., 0., 0.];
|
||||
let mut r: [f64; 8] = [0f64; 8];
|
||||
vst4q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
vst4q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2814,7 +2814,7 @@ pub unsafe fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
|
|||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
a + vshrd_n_s64::<N>(b)
|
||||
a.wrapping_add(vshrd_n_s64::<N>(b))
|
||||
}
|
||||
|
||||
/// Unsigned shift right and accumulate
|
||||
|
|
@ -2824,7 +2824,7 @@ pub unsafe fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
|
|||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
a + vshrd_n_u64::<N>(b)
|
||||
a.wrapping_add(vshrd_n_u64::<N>(b))
|
||||
}
|
||||
|
||||
/// Shift Left and Insert (immediate)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -6,8 +6,7 @@ mod generated;
|
|||
pub use self::generated::*;
|
||||
|
||||
use crate::{
|
||||
convert::TryInto, core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked,
|
||||
mem::transmute,
|
||||
core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute,
|
||||
};
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ macro_rules! simd_ty {
|
|||
|
||||
#[allow(clippy::use_self)]
|
||||
impl $id {
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self {
|
||||
$id($($elem_name),*)
|
||||
}
|
||||
|
|
@ -43,12 +43,12 @@ macro_rules! simd_m_ty {
|
|||
|
||||
#[allow(clippy::use_self)]
|
||||
impl $id {
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
const fn bool_to_internal(x: bool) -> $ety {
|
||||
[0 as $ety, !(0 as $ety)][x as usize]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[inline(always)]
|
||||
pub(crate) const fn new($($elem_name: bool),*) -> Self {
|
||||
$id($(Self::bool_to_internal($elem_name)),*)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,115 +0,0 @@
|
|||
vmmlaq_s32
|
||||
vmmlaq_u32
|
||||
vrnd32x_f64
|
||||
vrnd32xq_f64
|
||||
vrnd32z_f64
|
||||
vrnd32zq_f64
|
||||
vrnd64x_f64
|
||||
vrnd64z_f64
|
||||
vrnd64zq_f64
|
||||
vsm3partw1q_u32
|
||||
vsm3partw2q_u32
|
||||
vsm3tt1bq_u32
|
||||
vsm3tt2aq_u32
|
||||
vsm3tt2bq_u32
|
||||
vsm4ekeyq_u32
|
||||
vsm4eq_u32
|
||||
vsudot_lane_s32
|
||||
vsudot_laneq_s32
|
||||
vsudotq_lane_s32
|
||||
vsudotq_laneq_s32
|
||||
vusdot_lane_s32
|
||||
vusdot_laneq_s32
|
||||
vusdot_s32
|
||||
vusdotq_lane_s32
|
||||
vusdotq_laneq_s32
|
||||
vusdotq_s32
|
||||
vcls_u16
|
||||
vcls_u32
|
||||
vcls_u8
|
||||
vclsq_u16
|
||||
vclsq_u32
|
||||
vclsq_u8
|
||||
vcreate_s16
|
||||
vcreate_u16
|
||||
vpaddq_s64
|
||||
vpaddq_u64
|
||||
vqshlu_n_s16
|
||||
vqshlu_n_s32
|
||||
vqshlu_n_s64
|
||||
vqshlu_n_s8
|
||||
vqshlub_n_s8
|
||||
vqshlud_n_s64
|
||||
vqshluh_n_s16
|
||||
vqshluq_n_s16
|
||||
vqshluq_n_s32
|
||||
vqshluq_n_s64
|
||||
vqshluq_n_s8
|
||||
vqshlus_n_s32
|
||||
vrax1q_u64
|
||||
vreinterpretq_p128_f32
|
||||
vreinterpretq_p128_f64
|
||||
vreinterpretq_p128_p16
|
||||
vreinterpretq_p128_p8
|
||||
vreinterpretq_p128_s16
|
||||
vreinterpretq_p128_s32
|
||||
vreinterpretq_p128_s64
|
||||
vreinterpretq_p128_s8
|
||||
vreinterpretq_p128_u16
|
||||
vreinterpretq_p128_u32
|
||||
vreinterpretq_p128_u64
|
||||
vreinterpretq_p128_u8
|
||||
vrnd32x_f32
|
||||
vrnd32xq_f32
|
||||
vrnd32z_f32
|
||||
vrnd32zq_f32
|
||||
vrnd64x_f32
|
||||
vrnd64xq_f32
|
||||
vrnd64xq_f64
|
||||
vrnd64z_f32
|
||||
vrnd64zq_f32
|
||||
vsha512h2q_u64
|
||||
vsha512hq_u64
|
||||
vsha512su0q_u64
|
||||
vsha512su1q_u64
|
||||
vslid_n_s64
|
||||
vslid_n_u64
|
||||
vsm3ss1q_u32
|
||||
vsm3tt1aq_u32
|
||||
vsrid_n_s64
|
||||
vsrid_n_u64
|
||||
vusmmlaq_s32
|
||||
vxarq_u64
|
||||
vadd_p16
|
||||
vadd_p64
|
||||
vadd_p8
|
||||
vaddq_p16
|
||||
vaddq_p64
|
||||
vaddq_p8
|
||||
vbcaxq_s16
|
||||
vbcaxq_s32
|
||||
vbcaxq_s64
|
||||
vbcaxq_s8
|
||||
vbcaxq_u16
|
||||
vbcaxq_u32
|
||||
vbcaxq_u64
|
||||
vbcaxq_u8
|
||||
veor3q_s16
|
||||
veor3q_s32
|
||||
veor3q_s64
|
||||
veor3q_s8
|
||||
veor3q_u16
|
||||
veor3q_u32
|
||||
veor3q_u64
|
||||
veor3q_u8
|
||||
vshld_s64
|
||||
vshld_u64
|
||||
vcopyq_laneq_u8
|
||||
vcopyq_laneq_s8
|
||||
vcopyq_laneq_p8
|
||||
vcopyq_lane_u8
|
||||
vcopyq_lane_s8
|
||||
vcopyq_lane_p8
|
||||
vcopy_laneq_u8
|
||||
vcopy_laneq_s8
|
||||
vcopy_laneq_p8
|
||||
133
library/stdarch/crates/intrinsic-test/missing_aarch64.txt
Normal file
133
library/stdarch/crates/intrinsic-test/missing_aarch64.txt
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
# Not implemented in stdarch yet
|
||||
vbfdot_f32
|
||||
vbfdot_lane_f32
|
||||
vbfdot_laneq_f32
|
||||
vbfdotq_f32
|
||||
vbfdotq_lane_f32
|
||||
vbfdotq_laneq_f32
|
||||
vbfmlalbq_f32
|
||||
vbfmlalbq_lane_f32
|
||||
vbfmlalbq_laneq_f32
|
||||
vbfmlaltq_f32
|
||||
vbfmlaltq_lane_f32
|
||||
vbfmlaltq_laneq_f32
|
||||
vbfmmlaq_f32
|
||||
vsudot_laneq_s32
|
||||
vsudot_lane_s32
|
||||
vsudotq_laneq_s32
|
||||
vsudotq_lane_s32
|
||||
vusdot_laneq_s32
|
||||
vusdot_lane_s32
|
||||
vusdotq_laneq_s32
|
||||
vusdotq_lane_s32
|
||||
vusdotq_s32
|
||||
vusdot_s32
|
||||
|
||||
# Implemented in Clang but missing from CSV
|
||||
vcmla_f64
|
||||
vcmla_lane_f64
|
||||
vcmla_laneq_f64
|
||||
vcmlaq_lane_f64
|
||||
vcmlaq_laneq_f64
|
||||
vcmlaq_rot180_lane_f64
|
||||
vcmlaq_rot180_laneq_f64
|
||||
vcmlaq_rot270_lane_f64
|
||||
vcmlaq_rot270_laneq_f64
|
||||
vcmlaq_rot90_lane_f64
|
||||
vcmlaq_rot90_laneq_f64
|
||||
vcmla_rot180_f64
|
||||
vcmla_rot180_lane_f64
|
||||
vcmla_rot180_laneq_f64
|
||||
vcmla_rot270_f64
|
||||
vcmla_rot270_lane_f64
|
||||
vcmla_rot270_laneq_f64
|
||||
vcmla_rot90_f64
|
||||
vcmla_rot90_lane_f64
|
||||
vcmla_rot90_laneq_f64
|
||||
|
||||
# Implemented in Clang and stdarch but missing from CSV
|
||||
vmov_n_p64
|
||||
vmovq_n_p64
|
||||
vreinterpret_f32_p64
|
||||
vreinterpret_p64_s64
|
||||
vreinterpretq_f32_p128
|
||||
vreinterpretq_f32_p64
|
||||
vreinterpretq_p128_p64
|
||||
vreinterpretq_p64_p128
|
||||
vtst_p16
|
||||
vtstq_p16
|
||||
|
||||
# Missing from both Clang and stdarch
|
||||
vrnd32x_f64
|
||||
vrnd32xq_f64
|
||||
vrnd32z_f64
|
||||
vrnd32zq_f64
|
||||
vrnd64x_f64
|
||||
vrnd64xq_f64
|
||||
vrnd64z_f64
|
||||
vrnd64zq_f64
|
||||
|
||||
# Takes too long to compile tests
|
||||
vcopyq_laneq_u8
|
||||
vcopyq_laneq_s8
|
||||
vcopyq_laneq_p8
|
||||
vcopyq_lane_u8
|
||||
vcopyq_lane_s8
|
||||
vcopyq_lane_p8
|
||||
vcopy_laneq_u8
|
||||
vcopy_laneq_s8
|
||||
vcopy_laneq_p8
|
||||
vcopy_lane_u8
|
||||
vcopy_lane_s8
|
||||
vcopy_lane_p8
|
||||
|
||||
# QEMU 6.0 doesn't support these instructions
|
||||
vmmlaq_s32
|
||||
vmmlaq_u32
|
||||
vsm3partw1q_u32
|
||||
vsm3partw2q_u32
|
||||
vsm3ss1q_u32
|
||||
vsm3tt1aq_u32
|
||||
vsm3tt1bq_u32
|
||||
vsm3tt2aq_u32
|
||||
vsm3tt2bq_u32
|
||||
vsm4ekeyq_u32
|
||||
vsm4eq_u32
|
||||
vusmmlaq_s32
|
||||
|
||||
# LLVM select error in debug builds
|
||||
vqshlu_n_s16
|
||||
vqshlu_n_s32
|
||||
vqshlu_n_s64
|
||||
vqshlu_n_s8
|
||||
vqshlub_n_s8
|
||||
vqshlud_n_s64
|
||||
vqshluh_n_s16
|
||||
vqshluq_n_s16
|
||||
vqshluq_n_s32
|
||||
vqshluq_n_s64
|
||||
vqshluq_n_s8
|
||||
vqshlus_n_s32
|
||||
|
||||
# These tests produce a different result from C but only in debug builds of
|
||||
# stdarch. This likely both a bug in stdarch (expanding to a different LLVM
|
||||
# intrinsic) and a bug in LLVM (incorrect optimization changing the behavior of
|
||||
# integer operations).
|
||||
vqrdmlah_lane_s16
|
||||
vqrdmlah_lane_s32
|
||||
vqrdmlah_laneq_s16
|
||||
vqrdmlah_laneq_s32
|
||||
vqrdmlah_s16
|
||||
vqrdmlah_s32
|
||||
vqrdmlahh_lane_s16
|
||||
vqrdmlahh_laneq_s16
|
||||
vqrdmlahh_s16
|
||||
vqrdmlahq_lane_s16
|
||||
vqrdmlahq_lane_s32
|
||||
vqrdmlahq_laneq_s16
|
||||
vqrdmlahq_laneq_s32
|
||||
vqrdmlahq_s16
|
||||
vqrdmlahq_s32
|
||||
vqrdmlahs_lane_s32
|
||||
vqrdmlahs_laneq_s32
|
||||
vqrdmlahs_s32
|
||||
334
library/stdarch/crates/intrinsic-test/missing_arm.txt
Normal file
334
library/stdarch/crates/intrinsic-test/missing_arm.txt
Normal file
|
|
@ -0,0 +1,334 @@
|
|||
# Not implemented in stdarch yet
|
||||
vbfdot_f32
|
||||
vbfdot_lane_f32
|
||||
vbfdot_laneq_f32
|
||||
vbfdotq_f32
|
||||
vbfdotq_lane_f32
|
||||
vbfdotq_laneq_f32
|
||||
vbfmlalbq_f32
|
||||
vbfmlalbq_lane_f32
|
||||
vbfmlalbq_laneq_f32
|
||||
vbfmlaltq_f32
|
||||
vbfmlaltq_lane_f32
|
||||
vbfmlaltq_laneq_f32
|
||||
vbfmmlaq_f32
|
||||
vsudot_laneq_s32
|
||||
vsudot_lane_s32
|
||||
vsudotq_laneq_s32
|
||||
vsudotq_lane_s32
|
||||
vusdot_laneq_s32
|
||||
vusdot_lane_s32
|
||||
vusdotq_laneq_s32
|
||||
vusdotq_lane_s32
|
||||
vusdotq_s32
|
||||
vusdot_s32
|
||||
|
||||
# Implemented in Clang and stdarch but missing from CSV
|
||||
vtst_p16
|
||||
vtstq_p16
|
||||
|
||||
# QEMU 6.0 doesn't support these instructions
|
||||
vmmlaq_s32
|
||||
vmmlaq_u32
|
||||
vusmmlaq_s32
|
||||
|
||||
# Implemented in Clang and stdarch for A64 only even though CSV claims A32 support
|
||||
__crc32d
|
||||
__crc32cd
|
||||
vaddq_p64
|
||||
vbsl_p64
|
||||
vbslq_p64
|
||||
vceq_p64
|
||||
vceqq_p64
|
||||
vceqz_p64
|
||||
vceqzq_p64
|
||||
vcombine_p64
|
||||
vcopy_lane_p64
|
||||
vcopy_laneq_p64
|
||||
vcopyq_lane_p64
|
||||
vcopyq_laneq_p64
|
||||
vcreate_p64
|
||||
vdup_lane_p64
|
||||
vdup_n_p64
|
||||
vdupq_lane_p64
|
||||
vdupq_n_p64
|
||||
vext_p64
|
||||
vextq_p64
|
||||
vget_high_p64
|
||||
vget_lane_p64
|
||||
vget_low_p64
|
||||
vgetq_lane_p64
|
||||
vmovn_high_s16
|
||||
vmovn_high_s32
|
||||
vmovn_high_s64
|
||||
vmovn_high_u16
|
||||
vmovn_high_u32
|
||||
vmovn_high_u64
|
||||
vmull_high_p64
|
||||
vmull_p64
|
||||
vreinterpret_p16_p64
|
||||
vreinterpret_p64_f32
|
||||
vreinterpret_p64_p16
|
||||
vreinterpret_p64_p8
|
||||
vreinterpret_p64_s16
|
||||
vreinterpret_p64_s32
|
||||
vreinterpret_p64_s8
|
||||
vreinterpret_p64_u16
|
||||
vreinterpret_p64_u32
|
||||
vreinterpret_p64_u64
|
||||
vreinterpret_p64_u8
|
||||
vreinterpret_p8_p64
|
||||
vreinterpretq_f64_u64
|
||||
vreinterpretq_p128_f32
|
||||
vreinterpretq_p128_p16
|
||||
vreinterpretq_p128_p8
|
||||
vreinterpretq_p128_s16
|
||||
vreinterpretq_p128_s32
|
||||
vreinterpretq_p128_s64
|
||||
vreinterpretq_p128_s8
|
||||
vreinterpretq_p128_u16
|
||||
vreinterpretq_p128_u32
|
||||
vreinterpretq_p128_u64
|
||||
vreinterpretq_p128_u8
|
||||
vreinterpretq_p16_p64
|
||||
vreinterpretq_p64_f32
|
||||
vreinterpretq_p64_p16
|
||||
vreinterpretq_p64_p8
|
||||
vreinterpretq_p64_s16
|
||||
vreinterpretq_p64_s32
|
||||
vreinterpretq_p64_s64
|
||||
vreinterpretq_p64_s8
|
||||
vreinterpretq_p64_u16
|
||||
vreinterpretq_p64_u32
|
||||
vreinterpretq_p64_u64
|
||||
vreinterpretq_p64_u8
|
||||
vreinterpretq_p8_p64
|
||||
vreinterpretq_s16_p64
|
||||
vreinterpretq_s32_p64
|
||||
vreinterpretq_s64_p64
|
||||
vreinterpretq_s8_p64
|
||||
vreinterpretq_u16_p64
|
||||
vreinterpretq_u32_p64
|
||||
vreinterpretq_u64_p64
|
||||
vreinterpretq_u8_p64
|
||||
vreinterpret_s16_p64
|
||||
vreinterpret_s32_p64
|
||||
vreinterpret_s64_p64
|
||||
vreinterpret_s8_p64
|
||||
vreinterpret_u16_p64
|
||||
vreinterpret_u32_p64
|
||||
vreinterpret_u64_p64
|
||||
vreinterpret_u8_p64
|
||||
vrndn_f64
|
||||
vrndnq_f64
|
||||
vset_lane_p64
|
||||
vsetq_lane_p64
|
||||
vsli_n_p64
|
||||
vsliq_n_p64
|
||||
vsri_n_p64
|
||||
vsriq_n_p64
|
||||
vtst_p64
|
||||
vtstq_p64
|
||||
|
||||
# Present in Clang header but triggers an ICE due to lack of backend support.
|
||||
vcmla_f32
|
||||
vcmla_lane_f32
|
||||
vcmla_laneq_f32
|
||||
vcmla_rot180_f32
|
||||
vcmla_rot180_lane_f32
|
||||
vcmla_rot180_laneq_f32
|
||||
vcmla_rot270_f32
|
||||
vcmla_rot270_lane_f32
|
||||
vcmla_rot270_laneq_f32
|
||||
vcmla_rot90_f32
|
||||
vcmla_rot90_lane_f32
|
||||
vcmla_rot90_laneq_f32
|
||||
vcmlaq_f32
|
||||
vcmlaq_lane_f32
|
||||
vcmlaq_laneq_f32
|
||||
vcmlaq_rot180_f32
|
||||
vcmlaq_rot180_lane_f32
|
||||
vcmlaq_rot180_laneq_f32
|
||||
vcmlaq_rot270_f32
|
||||
vcmlaq_rot270_lane_f32
|
||||
vcmlaq_rot270_laneq_f32
|
||||
vcmlaq_rot90_f32
|
||||
vcmlaq_rot90_lane_f32
|
||||
vcmlaq_rot90_laneq_f32
|
||||
|
||||
# Implemented in stdarch for A64 only, Clang support both A32/A64
|
||||
vadd_s64
|
||||
vadd_u64
|
||||
vcaddq_rot270_f32
|
||||
vcaddq_rot90_f32
|
||||
vcadd_rot270_f32
|
||||
vcadd_rot90_f32
|
||||
vcombine_f32
|
||||
vcombine_p16
|
||||
vcombine_p8
|
||||
vcombine_s16
|
||||
vcombine_s32
|
||||
vcombine_s64
|
||||
vcombine_s8
|
||||
vcombine_u16
|
||||
vcombine_u32
|
||||
vcombine_u64
|
||||
vcombine_u8
|
||||
vcvtaq_s32_f32
|
||||
vcvtaq_u32_f32
|
||||
vcvta_s32_f32
|
||||
vcvta_u32_f32
|
||||
vcvtmq_s32_f32
|
||||
vcvtmq_u32_f32
|
||||
vcvtm_s32_f32
|
||||
vcvtm_u32_f32
|
||||
vcvtnq_s32_f32
|
||||
vcvtnq_u32_f32
|
||||
vcvtn_s32_f32
|
||||
vcvtn_u32_f32
|
||||
vcvtpq_s32_f32
|
||||
vcvtpq_u32_f32
|
||||
vcvtp_s32_f32
|
||||
vcvtp_u32_f32
|
||||
vdot_lane_s32
|
||||
vdot_lane_u32
|
||||
vdotq_lane_s32
|
||||
vdotq_lane_u32
|
||||
vdotq_s32
|
||||
vdotq_u32
|
||||
vdot_s32
|
||||
vdot_u32
|
||||
vqdmulh_lane_s16
|
||||
vqdmulh_lane_s32
|
||||
vqdmulhq_lane_s16
|
||||
vqdmulhq_lane_s32
|
||||
vrnda_f32
|
||||
vrnda_f32
|
||||
vrndaq_f32
|
||||
vrndaq_f32
|
||||
vrnd_f32
|
||||
vrnd_f32
|
||||
vrndi_f32
|
||||
vrndi_f32
|
||||
vrndiq_f32
|
||||
vrndiq_f32
|
||||
vrndm_f32
|
||||
vrndm_f32
|
||||
vrndmq_f32
|
||||
vrndmq_f32
|
||||
vrndns_f32
|
||||
vrndp_f32
|
||||
vrndpq_f32
|
||||
vrndq_f32
|
||||
vrndq_f32
|
||||
vrndx_f32
|
||||
vrndxq_f32
|
||||
|
||||
# LLVM select error in debug builds
|
||||
vqrshrn_n_s16
|
||||
vqrshrn_n_s32
|
||||
vqrshrn_n_s64
|
||||
vqrshrn_n_u16
|
||||
vqrshrn_n_u32
|
||||
vqrshrn_n_u64
|
||||
vqrshrun_n_s16
|
||||
vqrshrun_n_s32
|
||||
vqrshrun_n_s64
|
||||
vqshrn_n_s16
|
||||
vqshrn_n_s32
|
||||
vqshrn_n_s64
|
||||
vqshrn_n_u16
|
||||
vqshrn_n_u32
|
||||
vqshrn_n_u64
|
||||
vqshrun_n_s16
|
||||
vqshrun_n_s32
|
||||
vqshrun_n_s64
|
||||
vrshrn_n_s16
|
||||
vrshrn_n_s32
|
||||
vrshrn_n_s64
|
||||
vrshrn_n_u16
|
||||
vrshrn_n_u32
|
||||
vrshrn_n_u64
|
||||
vshrq_n_u64
|
||||
vshr_n_u64
|
||||
|
||||
# Failing tests: stdarch has incorrect results compared to Clang
|
||||
vqshlu_n_s16
|
||||
vqshlu_n_s32
|
||||
vqshlu_n_s64
|
||||
vqshlu_n_s8
|
||||
vqshluq_n_s16
|
||||
vqshluq_n_s32
|
||||
vqshluq_n_s64
|
||||
vqshluq_n_s8
|
||||
vsli_n_p16
|
||||
vsli_n_p8
|
||||
vsli_n_s16
|
||||
vsli_n_s32
|
||||
vsli_n_s64
|
||||
vsli_n_s8
|
||||
vsli_n_u16
|
||||
vsli_n_u32
|
||||
vsli_n_u64
|
||||
vsli_n_u8
|
||||
vsliq_n_p16
|
||||
vsliq_n_p8
|
||||
vsliq_n_s16
|
||||
vsliq_n_s32
|
||||
vsliq_n_s64
|
||||
vsliq_n_s8
|
||||
vsliq_n_u16
|
||||
vsliq_n_u32
|
||||
vsliq_n_u64
|
||||
vsliq_n_u8
|
||||
vsri_n_p16
|
||||
vsri_n_p8
|
||||
vsri_n_s16
|
||||
vsri_n_s32
|
||||
vsri_n_s64
|
||||
vsri_n_s8
|
||||
vsri_n_u16
|
||||
vsri_n_u32
|
||||
vsri_n_u64
|
||||
vsri_n_u8
|
||||
vsriq_n_p16
|
||||
vsriq_n_p8
|
||||
vsriq_n_s16
|
||||
vsriq_n_s32
|
||||
vsriq_n_s64
|
||||
vsriq_n_s8
|
||||
vsriq_n_u16
|
||||
vsriq_n_u32
|
||||
vsriq_n_u64
|
||||
vsriq_n_u8
|
||||
|
||||
# These produce a different result on Clang depending on the optimization level.
|
||||
# This is definitely a bug in LLVM.
|
||||
vadd_f32
|
||||
vaddq_f32
|
||||
vcvt_s32_f32
|
||||
vcvt_u32_f32
|
||||
vcvtq_s32_f32
|
||||
vcvtq_u32_f32
|
||||
vfma_f32
|
||||
vfma_n_f32
|
||||
vfmaq_f32
|
||||
vfmaq_n_f32
|
||||
vfms_f32
|
||||
vfmsq_f32
|
||||
vmla_f32
|
||||
vmla_lane_f32
|
||||
vmla_n_f32
|
||||
vmlaq_f32
|
||||
vmlaq_lane_f32
|
||||
vmlaq_n_f32
|
||||
vmls_f32
|
||||
vmls_lane_f32
|
||||
vmls_n_f32
|
||||
vmlsq_f32
|
||||
vmlsq_lane_f32
|
||||
vmlsq_n_f32
|
||||
vmul_lane_f32
|
||||
vmul_n_f32
|
||||
vmulq_lane_f32
|
||||
vmulq_n_f32
|
||||
|
|
@ -82,11 +82,17 @@ impl Into<Intrinsic> for ACLEIntrinsicLine {
|
|||
})
|
||||
.collect();
|
||||
let arguments = ArgumentList { args };
|
||||
let a64_only = match &*self.supported_architectures {
|
||||
"A64" => true,
|
||||
"v7/A32/A64" | "A32/A64" => false,
|
||||
_ => panic!("Invalid supported architectures"),
|
||||
};
|
||||
|
||||
Intrinsic {
|
||||
name: name.to_string(),
|
||||
arguments,
|
||||
results,
|
||||
a64_only,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,9 @@ pub struct Intrinsic {
|
|||
|
||||
/// The return type of this intrinsic.
|
||||
pub results: IntrinsicType,
|
||||
|
||||
/// Whether this intrinsic is only available on A64.
|
||||
pub a64_only: bool,
|
||||
}
|
||||
|
||||
impl Intrinsic {
|
||||
|
|
|
|||
|
|
@ -72,12 +72,15 @@ fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String {
|
|||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
template<typename T1, typename T2> T1 cast(T2 x) {{
|
||||
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
|
||||
T1 ret = 0;
|
||||
memcpy(&ret, &x, sizeof(T1));
|
||||
return ret;
|
||||
}}
|
||||
|
||||
#ifdef __aarch64__
|
||||
std::ostream& operator<<(std::ostream& os, poly128_t value) {{
|
||||
std::stringstream temp;
|
||||
do {{
|
||||
|
|
@ -90,6 +93,8 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) {{
|
|||
os << res;
|
||||
return os;
|
||||
}}
|
||||
#endif
|
||||
|
||||
int main(int argc, char **argv) {{
|
||||
{passes}
|
||||
return 0;
|
||||
|
|
@ -133,7 +138,7 @@ fn gen_code_rust(intrinsic: &Intrinsic, constraints: &[&Argument], name: String)
|
|||
}
|
||||
}
|
||||
|
||||
fn generate_rust_program(intrinsic: &Intrinsic) -> String {
|
||||
fn generate_rust_program(intrinsic: &Intrinsic, a32: bool) -> String {
|
||||
let constraints = intrinsic
|
||||
.arguments
|
||||
.iter()
|
||||
|
|
@ -146,25 +151,26 @@ fn generate_rust_program(intrinsic: &Intrinsic) -> String {
|
|||
#![feature(stdsimd)]
|
||||
#![allow(overflowing_literals)]
|
||||
#![allow(non_upper_case_globals)]
|
||||
use core_arch::arch::aarch64::*;
|
||||
use core_arch::arch::{target_arch}::*;
|
||||
|
||||
fn main() {{
|
||||
{passes}
|
||||
}}
|
||||
"#,
|
||||
target_arch = if a32 { "arm" } else { "aarch64" },
|
||||
passes = gen_code_rust(intrinsic, &constraints, Default::default())
|
||||
)
|
||||
}
|
||||
|
||||
fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str) -> bool {
|
||||
fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str, a32: bool) -> bool {
|
||||
let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
|
||||
|
||||
let output = Command::new("sh")
|
||||
.arg("-c")
|
||||
.arg(format!(
|
||||
"{cpp} {cppflags} {arch_flags} -Wno-narrowing -O2 -target {target} -o c_programs/{intrinsic} {filename}",
|
||||
target = "aarch64-unknown-linux-gnu",
|
||||
arch_flags = "-march=armv8.6-a+crypto+sha3+crc+dotprod",
|
||||
target = if a32 { "armv7-unknown-linux-gnueabihf" } else { "aarch64-unknown-linux-gnu" },
|
||||
arch_flags = if a32 { "-march=armv8.6-a+crypto+crc+dotprod" } else { "-march=armv8.6-a+crypto+sha3+crc+dotprod" },
|
||||
filename = c_filename,
|
||||
intrinsic = intrinsic.name,
|
||||
cpp = compiler,
|
||||
|
|
@ -175,19 +181,13 @@ fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str) -> bool {
|
|||
if output.status.success() {
|
||||
true
|
||||
} else {
|
||||
let stderr = std::str::from_utf8(&output.stderr).unwrap_or("");
|
||||
if stderr.contains("error: use of undeclared identifier") {
|
||||
warn!("Skipping intrinsic due to no support: {}", intrinsic.name);
|
||||
true
|
||||
} else {
|
||||
error!(
|
||||
"Failed to compile code for intrinsic: {}\n\nstdout:\n{}\n\nstderr:\n{}",
|
||||
intrinsic.name,
|
||||
std::str::from_utf8(&output.stdout).unwrap_or(""),
|
||||
std::str::from_utf8(&output.stderr).unwrap_or("")
|
||||
);
|
||||
false
|
||||
}
|
||||
error!(
|
||||
"Failed to compile code for intrinsic: {}\n\nstdout:\n{}\n\nstderr:\n{}",
|
||||
intrinsic.name,
|
||||
std::str::from_utf8(&output.stdout).unwrap_or(""),
|
||||
std::str::from_utf8(&output.stderr).unwrap_or("")
|
||||
);
|
||||
false
|
||||
}
|
||||
} else {
|
||||
error!("Command failed: {:#?}", output);
|
||||
|
|
@ -195,7 +195,7 @@ fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str) -> bool {
|
||||
fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str, a32: bool) -> bool {
|
||||
let _ = std::fs::create_dir("c_programs");
|
||||
intrinsics
|
||||
.par_iter()
|
||||
|
|
@ -205,20 +205,20 @@ fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str) -> bool {
|
|||
|
||||
let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i);
|
||||
file.write_all(c_code.into_bytes().as_slice()).unwrap();
|
||||
compile_c(&c_filename, &i, compiler)
|
||||
compile_c(&c_filename, &i, compiler, a32)
|
||||
})
|
||||
.find_any(|x| !x)
|
||||
.is_none()
|
||||
}
|
||||
|
||||
fn build_rust(intrinsics: &Vec<Intrinsic>, toolchain: &str) -> bool {
|
||||
fn build_rust(intrinsics: &Vec<Intrinsic>, toolchain: &str, a32: bool) -> bool {
|
||||
intrinsics.iter().for_each(|i| {
|
||||
let rust_dir = format!(r#"rust_programs/{}"#, i.name);
|
||||
let _ = std::fs::create_dir_all(&rust_dir);
|
||||
let rust_filename = format!(r#"{}/main.rs"#, rust_dir);
|
||||
let mut file = File::create(&rust_filename).unwrap();
|
||||
|
||||
let c_code = generate_rust_program(&i);
|
||||
let c_code = generate_rust_program(&i, a32);
|
||||
file.write_all(c_code.into_bytes().as_slice()).unwrap();
|
||||
});
|
||||
|
||||
|
|
@ -259,10 +259,15 @@ path = "{intrinsic}/main.rs""#,
|
|||
.current_dir("rust_programs")
|
||||
.arg("-c")
|
||||
.arg(format!(
|
||||
"cargo {toolchain} build --release --target {target}",
|
||||
"cargo {toolchain} build --target {target}",
|
||||
toolchain = toolchain,
|
||||
target = "aarch64-unknown-linux-gnu",
|
||||
target = if a32 {
|
||||
"armv7-unknown-linux-gnueabihf"
|
||||
} else {
|
||||
"aarch64-unknown-linux-gnu"
|
||||
},
|
||||
))
|
||||
.env("RUSTFLAGS", "-Cdebuginfo=0")
|
||||
.output();
|
||||
if let Ok(output) = output {
|
||||
if output.status.success() {
|
||||
|
|
@ -317,6 +322,12 @@ fn main() {
|
|||
.long("skip")
|
||||
.help("Filename for a list of intrinsics to skip (one per line)"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("A32")
|
||||
.takes_value(false)
|
||||
.long("a32")
|
||||
.help("Run tests for A32 instrinsics instead of A64"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let filename = matches.value_of("INPUT").unwrap();
|
||||
|
|
@ -328,10 +339,15 @@ fn main() {
|
|||
let c_runner = matches.value_of("RUNNER").unwrap_or("");
|
||||
let skip = if let Some(filename) = matches.value_of("SKIP") {
|
||||
let data = std::fs::read_to_string(&filename).expect("Failed to open file");
|
||||
data.lines().map(String::from).collect_vec()
|
||||
data.lines()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.contains('#'))
|
||||
.map(String::from)
|
||||
.collect_vec()
|
||||
} else {
|
||||
Default::default()
|
||||
};
|
||||
let a32 = matches.is_present("A32");
|
||||
|
||||
let intrinsics = get_acle_intrinsics(filename);
|
||||
|
||||
|
|
@ -352,18 +368,19 @@ fn main() {
|
|||
.filter(|i| !i.arguments.iter().any(|a| a.is_ptr()))
|
||||
.filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128))
|
||||
.filter(|i| !skip.contains(&i.name))
|
||||
.filter(|i| !(a32 && i.a64_only))
|
||||
.collect::<Vec<_>>();
|
||||
intrinsics.dedup();
|
||||
|
||||
if !build_c(&intrinsics, cpp_compiler) {
|
||||
if !build_c(&intrinsics, cpp_compiler, a32) {
|
||||
std::process::exit(2);
|
||||
}
|
||||
|
||||
if !build_rust(&intrinsics, &toolchain) {
|
||||
if !build_rust(&intrinsics, &toolchain, a32) {
|
||||
std::process::exit(3);
|
||||
}
|
||||
|
||||
if !compare_outputs(&intrinsics, &toolchain, &c_runner) {
|
||||
if !compare_outputs(&intrinsics, &toolchain, &c_runner, a32) {
|
||||
std::process::exit(1)
|
||||
}
|
||||
}
|
||||
|
|
@ -374,7 +391,7 @@ enum FailureReason {
|
|||
Difference(String, String, String),
|
||||
}
|
||||
|
||||
fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str) -> bool {
|
||||
fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a32: bool) -> bool {
|
||||
let intrinsics = intrinsics
|
||||
.par_iter()
|
||||
.filter_map(|intrinsic| {
|
||||
|
|
@ -390,11 +407,16 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str) -
|
|||
.current_dir("rust_programs")
|
||||
.arg("-c")
|
||||
.arg(format!(
|
||||
"cargo {toolchain} run --release --target {target} --bin {intrinsic}",
|
||||
"cargo {toolchain} run --target {target} --bin {intrinsic}",
|
||||
intrinsic = intrinsic.name,
|
||||
toolchain = toolchain,
|
||||
target = "aarch64-unknown-linux-gnu",
|
||||
target = if a32 {
|
||||
"armv7-unknown-linux-gnueabihf"
|
||||
} else {
|
||||
"aarch64-unknown-linux-gnu"
|
||||
},
|
||||
))
|
||||
.env("RUSTFLAGS", "-Cdebuginfo=0")
|
||||
.output();
|
||||
|
||||
let (c, rust) = match (c, rust) {
|
||||
|
|
|
|||
|
|
@ -258,6 +258,9 @@ impl IntrinsicType {
|
|||
/// This is required for 8 bit types due to printing as the 8 bit types use
|
||||
/// a char and when using that in `std::cout` it will print as a character,
|
||||
/// which means value of 0 will be printed as a null byte.
|
||||
///
|
||||
/// This is also needed for polynomial types because we want them to be
|
||||
/// printed as unsigned integers to match Rust's `Debug` impl.
|
||||
pub fn c_promotion(&self) -> &str {
|
||||
match *self {
|
||||
IntrinsicType::Type {
|
||||
|
|
@ -267,9 +270,21 @@ impl IntrinsicType {
|
|||
} if bit_len == 8 => match kind {
|
||||
TypeKind::Int => "(int)",
|
||||
TypeKind::UInt => "(unsigned int)",
|
||||
TypeKind::Poly => "(unsigned int)",
|
||||
TypeKind::Poly => "(unsigned int)(uint8_t)",
|
||||
_ => "",
|
||||
},
|
||||
IntrinsicType::Type {
|
||||
kind: TypeKind::Poly,
|
||||
bit_len: Some(bit_len),
|
||||
..
|
||||
} => match bit_len {
|
||||
8 => unreachable!("handled above"),
|
||||
16 => "(uint16_t)",
|
||||
32 => "(uint32_t)",
|
||||
64 => "(uint64_t)",
|
||||
128 => "",
|
||||
_ => panic!("invalid bit_len"),
|
||||
},
|
||||
_ => "",
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -896,7 +896,7 @@ validate BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1,
|
|||
|
||||
arm = vcls
|
||||
aarch64 = cls
|
||||
generate uint*_t
|
||||
generate uint8x8_t:int8x8_t, uint8x16_t:int8x16_t, uint16x4_t:int16x4_t, uint16x8_t:int16x8_t, uint32x2_t:int32x2_t, uint32x4_t:int32x4_t
|
||||
|
||||
/// Count leading zero bits
|
||||
name = vclz
|
||||
|
|
@ -2058,7 +2058,7 @@ generate int*_t
|
|||
|
||||
/// Negate
|
||||
name = vneg
|
||||
multi_fn = -a
|
||||
multi_fn = a.wrapping_neg()
|
||||
a = 1
|
||||
validate -1
|
||||
|
||||
|
|
@ -4055,7 +4055,7 @@ generate float*_t
|
|||
|
||||
/// Subtract
|
||||
name = vsub
|
||||
multi_fn = a - b
|
||||
multi_fn = a.wrapping_sub(b)
|
||||
a = 3
|
||||
b = 2
|
||||
validate 1
|
||||
|
|
@ -4065,7 +4065,7 @@ generate i64, u64
|
|||
|
||||
/// Add
|
||||
name = vadd
|
||||
multi_fn = a + b
|
||||
multi_fn = a.wrapping_add(b)
|
||||
a = 1
|
||||
b = 2
|
||||
validate 3
|
||||
|
|
@ -5894,7 +5894,7 @@ name = vqshl
|
|||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_bits_exp_len-N
|
||||
multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N.try_into().unwrap()}
|
||||
multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N as _}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
n = 2
|
||||
validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
|
|
@ -5921,7 +5921,7 @@ name = vqshl
|
|||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_bits_exp_len-N
|
||||
multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N.try_into().unwrap()}
|
||||
multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N as _}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
n = 2
|
||||
validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
|
|
@ -6480,7 +6480,7 @@ name = vrshr
|
|||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshl-self-noext, a, {vdup-nself-noext, (-N).try_into().unwrap()}
|
||||
multi_fn = vrshl-self-noext, a, {vdup-nself-noext, (-N) as _}
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
@ -6507,7 +6507,7 @@ name = vrshr
|
|||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, (-N).try_into().unwrap()}
|
||||
multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, (-N) as _}
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
@ -6613,7 +6613,7 @@ n-suffix
|
|||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshr-nself-::<N>, b:in_t, b
|
||||
multi_fn = a + b
|
||||
multi_fn = a.wrapping_add(b)
|
||||
a = 1
|
||||
b = 4
|
||||
n = 2
|
||||
|
|
@ -6628,7 +6628,7 @@ n-suffix
|
|||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshr-nself-::<N>, b:in_t, b
|
||||
multi_fn = a + b
|
||||
multi_fn = a.wrapping_add(b)
|
||||
a = 1
|
||||
b = 4
|
||||
n = 2
|
||||
|
|
@ -6804,7 +6804,7 @@ name = vshl
|
|||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_bits_exp_len-N
|
||||
multi_fn = simd_shl, a, {vdup-nself-noext, N.try_into().unwrap()}
|
||||
multi_fn = simd_shl, a, {vdup-nself-noext, N as _}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
n = 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
|
|
@ -6818,7 +6818,7 @@ name = vshll
|
|||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-0-bits
|
||||
multi_fn = simd_shl, {simd_cast, a}, {vdup-nout-noext, N.try_into().unwrap()}
|
||||
multi_fn = simd_shl, {simd_cast, a}, {vdup-nout-noext, N as _}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8
|
||||
n = 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32
|
||||
|
|
@ -6851,7 +6851,7 @@ n-suffix
|
|||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = fix_right_shift_imm-N-bits
|
||||
multi_fn = simd_shr, a, {vdup-nself-noext, n.try_into().unwrap()}
|
||||
multi_fn = simd_shr, a, {vdup-nself-noext, n as _}
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
@ -6867,7 +6867,7 @@ name = vshrn_n
|
|||
no-q
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_cast, {simd_shr, a, {vdup-nself-noext, N.try_into().unwrap()}}
|
||||
multi_fn = simd_cast, {simd_shr, a, {vdup-nself-noext, N as _}}
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
|
|||
|
|
@ -1304,7 +1304,7 @@ fn gen_aarch64(
|
|||
};
|
||||
format!(
|
||||
r#"{}
|
||||
{}{}({}, {} as i64, a.cast())"#,
|
||||
{}{}({}, {} as i64, a as _)"#,
|
||||
multi_calls,
|
||||
ext_c,
|
||||
current_fn,
|
||||
|
|
@ -1327,7 +1327,7 @@ fn gen_aarch64(
|
|||
}
|
||||
}
|
||||
} else if link_aarch64.is_some() && matches!(fn_type, Fntype::Store) {
|
||||
let cast = if is_vstx(&name) { ".cast()" } else { "" };
|
||||
let cast = if is_vstx(&name) { " as _" } else { "" };
|
||||
match type_sub_len(in_t[1]) {
|
||||
1 => format!(r#"{}{}(b, a{})"#, ext_c, current_fn, cast),
|
||||
2 => format!(r#"{}{}(b.0, b.1, a{})"#, ext_c, current_fn, cast),
|
||||
|
|
@ -1336,7 +1336,7 @@ fn gen_aarch64(
|
|||
_ => panic!("unsupported type: {}", in_t[1]),
|
||||
}
|
||||
} else if link_aarch64.is_some() && is_vldx(&name) {
|
||||
format!(r#"{}{}(a.cast())"#, ext_c, current_fn,)
|
||||
format!(r#"{}{}(a as _)"#, ext_c, current_fn,)
|
||||
} else {
|
||||
let trans: [&str; 2] = if link_t[3] != out_t {
|
||||
["transmute(", ")"]
|
||||
|
|
@ -1553,7 +1553,7 @@ fn gen_store_test(
|
|||
let a: [{}; {}] = {};
|
||||
let e: [{}; {}] = {};
|
||||
let mut r: [{}; {}] = [0{}; {}];
|
||||
{}{}(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
{}{}(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
|
||||
assert_eq!(r, e);
|
||||
"#,
|
||||
type_to_native_type(in_t[1]),
|
||||
|
|
@ -2196,7 +2196,7 @@ fn gen_arm(
|
|||
_ => "",
|
||||
};
|
||||
format!(
|
||||
"{}(a.cast(), {}, {}, {})",
|
||||
"{}(a as _, {}, {}, {})",
|
||||
current_fn,
|
||||
subs,
|
||||
constn.as_deref().unwrap(),
|
||||
|
|
@ -2235,7 +2235,7 @@ fn gen_arm(
|
|||
} else if matches!(fn_type, Fntype::Store) {
|
||||
let (cast, size) = if is_vstx(&name) {
|
||||
(
|
||||
".cast()",
|
||||
" as _",
|
||||
format!(", {}", type_bits(&type_to_sub_type(in_t[1])) / 8),
|
||||
)
|
||||
} else {
|
||||
|
|
@ -2276,7 +2276,7 @@ fn gen_arm(
|
|||
_ => "",
|
||||
};
|
||||
format!(
|
||||
"{}({}, {} as i64, a.cast())",
|
||||
"{}({}, {} as i64, a as _)",
|
||||
current_fn,
|
||||
subs,
|
||||
constn.as_deref().unwrap()
|
||||
|
|
@ -2307,7 +2307,7 @@ fn gen_arm(
|
|||
_ => String::new(),
|
||||
}
|
||||
} else if matches!(fn_type, Fntype::Store) {
|
||||
let cast = if is_vstx(&name) { ".cast()" } else { "" };
|
||||
let cast = if is_vstx(&name) { " as _" } else { "" };
|
||||
match type_sub_len(in_t[1]) {
|
||||
1 => format!("{}(b, a{})", current_fn, cast),
|
||||
2 => format!("{}(b.0, b.1, a{})", current_fn, cast),
|
||||
|
|
@ -2316,7 +2316,7 @@ fn gen_arm(
|
|||
_ => String::new(),
|
||||
}
|
||||
} else if link_aarch64.is_some() && is_vldx(&name) {
|
||||
format!("{}(a.cast())", current_fn)
|
||||
format!("{}(a as _)", current_fn)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
|
|
|||
0
library/stdarch/rustfmt.toml
Normal file
0
library/stdarch/rustfmt.toml
Normal file
Loading…
Add table
Add a link
Reference in a new issue