Provides work-around for vreinterpret inline fail

This commit is contained in:
reucru01 2025-11-24 14:34:03 +00:00
parent 1c2a0aaa31
commit 73ad94ab86
2 changed files with 300 additions and 24 deletions

View file

@ -9183,6 +9183,7 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
#[doc = "Dot product arithmetic (indexed)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"]
#[inline]
#[cfg(target_endian = "little")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[target_feature(enable = "neon,dotprod")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
@ -9201,15 +9202,48 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
)]
pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 1);
let c: int32x2_t = vreinterpret_s32_s8(c);
unsafe {
let c: int32x2_t = transmute(c);
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
vdot_s32(a, b, vreinterpret_s8_s32(c))
vdot_s32(a, b, transmute(c))
}
}
#[doc = "Dot product arithmetic (indexed)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"]
#[inline]
#[cfg(target_endian = "big")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[target_feature(enable = "neon,dotprod")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(sdot, LANE = 0)
)]
#[rustc_legacy_const_generics(3)]
#[cfg_attr(
not(target_arch = "arm"),
unstable(feature = "stdarch_neon_dotprod", issue = "117224")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 1);
let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let c: int32x2_t = transmute(c);
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
let ret_val: int32x2_t = vdot_s32(a, b, transmute(c));
simd_shuffle!(ret_val, ret_val, [1, 0])
}
}
#[doc = "Dot product arithmetic (indexed)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
#[inline]
#[cfg(target_endian = "little")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[target_feature(enable = "neon,dotprod")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
@ -9228,16 +9262,51 @@ pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) ->
)]
pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 1);
let c: int32x2_t = vreinterpret_s32_s8(c);
unsafe {
let c: int32x2_t = transmute(c);
let c: int32x4_t =
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vdotq_s32(a, b, vreinterpretq_s8_s32(c))
vdotq_s32(a, b, transmute(c))
}
}
#[doc = "Dot product arithmetic (indexed)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
#[inline]
#[cfg(target_endian = "big")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[target_feature(enable = "neon,dotprod")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(sdot, LANE = 0)
)]
#[rustc_legacy_const_generics(3)]
#[cfg_attr(
not(target_arch = "arm"),
unstable(feature = "stdarch_neon_dotprod", issue = "117224")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 1);
let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
let b: int8x16_t =
unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let c: int32x2_t = transmute(c);
let c: int32x4_t =
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
let ret_val: int32x4_t = vdotq_s32(a, b, transmute(c));
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
}
}
#[doc = "Dot product arithmetic (indexed)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
#[inline]
#[cfg(target_endian = "little")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[target_feature(enable = "neon,dotprod")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
@ -9256,15 +9325,48 @@ pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t)
)]
pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
static_assert_uimm_bits!(LANE, 1);
let c: uint32x2_t = vreinterpret_u32_u8(c);
unsafe {
let c: uint32x2_t = transmute(c);
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
vdot_u32(a, b, vreinterpret_u8_u32(c))
vdot_u32(a, b, transmute(c))
}
}
#[doc = "Dot product arithmetic (indexed)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
#[inline]
#[cfg(target_endian = "big")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[target_feature(enable = "neon,dotprod")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(udot, LANE = 0)
)]
#[rustc_legacy_const_generics(3)]
#[cfg_attr(
not(target_arch = "arm"),
unstable(feature = "stdarch_neon_dotprod", issue = "117224")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
static_assert_uimm_bits!(LANE, 1);
let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let c: uint32x2_t = transmute(c);
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
let ret_val: uint32x2_t = vdot_u32(a, b, transmute(c));
simd_shuffle!(ret_val, ret_val, [1, 0])
}
}
#[doc = "Dot product arithmetic (indexed)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
#[inline]
#[cfg(target_endian = "little")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[target_feature(enable = "neon,dotprod")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
@ -9283,11 +9385,45 @@ pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t)
)]
pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
static_assert_uimm_bits!(LANE, 1);
let c: uint32x2_t = vreinterpret_u32_u8(c);
unsafe {
let c: uint32x2_t = transmute(c);
let c: uint32x4_t =
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vdotq_u32(a, b, vreinterpretq_u8_u32(c))
vdotq_u32(a, b, transmute(c))
}
}
#[doc = "Dot product arithmetic (indexed)"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
#[inline]
#[cfg(target_endian = "big")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[target_feature(enable = "neon,dotprod")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(udot, LANE = 0)
)]
#[rustc_legacy_const_generics(3)]
#[cfg_attr(
not(target_arch = "arm"),
unstable(feature = "stdarch_neon_dotprod", issue = "117224")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
static_assert_uimm_bits!(LANE, 1);
let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
let b: uint8x16_t =
unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let c: uint32x2_t = transmute(c);
let c: uint32x4_t =
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
let ret_val: uint32x4_t = vdotq_u32(a, b, transmute(c));
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
}
}
#[doc = "Dot product arithmetic (vector)"]
@ -71692,6 +71828,7 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
#[doc = "Dot product index form with signed and unsigned integers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"]
#[inline]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon,i8mm")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
@ -71710,15 +71847,48 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
)]
pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 1);
let c: uint32x2_t = vreinterpret_u32_u8(c);
unsafe {
let c: uint32x2_t = transmute(c);
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
vusdot_s32(a, vreinterpret_u8_u32(c), b)
vusdot_s32(a, transmute(c), b)
}
}
#[doc = "Dot product index form with signed and unsigned integers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"]
#[inline]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon,i8mm")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(sudot, LANE = 0)
)]
#[rustc_legacy_const_generics(3)]
#[cfg_attr(
not(target_arch = "arm"),
unstable(feature = "stdarch_neon_i8mm", issue = "117223")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 1);
let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let c: uint32x2_t = transmute(c);
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
let ret_val: int32x2_t = vusdot_s32(a, transmute(c), b);
simd_shuffle!(ret_val, ret_val, [1, 0])
}
}
#[doc = "Dot product index form with signed and unsigned integers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
#[inline]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon,i8mm")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
@ -71737,11 +71907,45 @@ pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t)
)]
pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 1);
let c: uint32x2_t = vreinterpret_u32_u8(c);
unsafe {
let c: uint32x2_t = transmute(c);
let c: uint32x4_t =
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
vusdotq_s32(a, transmute(c), b)
}
}
#[doc = "Dot product index form with signed and unsigned integers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
#[inline]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon,i8mm")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(sudot, LANE = 0)
)]
#[rustc_legacy_const_generics(3)]
#[cfg_attr(
not(target_arch = "arm"),
unstable(feature = "stdarch_neon_i8mm", issue = "117223")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 1);
let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
let b: int8x16_t =
unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let c: uint32x2_t = transmute(c);
let c: uint32x4_t =
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
let ret_val: int32x4_t = vusdotq_s32(a, transmute(c), b);
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
}
}
#[doc = "Table look-up"]
@ -73612,6 +73816,7 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
#[doc = "Dot product index form with unsigned and signed integers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"]
#[inline]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon,i8mm")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
@ -73630,15 +73835,48 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
)]
pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 1);
let c: int32x2_t = vreinterpret_s32_s8(c);
unsafe {
let c: int32x2_t = transmute(c);
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
vusdot_s32(a, b, vreinterpret_s8_s32(c))
vusdot_s32(a, b, transmute(c))
}
}
#[doc = "Dot product index form with unsigned and signed integers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"]
#[inline]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon,i8mm")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(usdot, LANE = 0)
)]
#[rustc_legacy_const_generics(3)]
#[cfg_attr(
not(target_arch = "arm"),
unstable(feature = "stdarch_neon_i8mm", issue = "117223")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 1);
let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let c: int32x2_t = transmute(c);
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
let ret_val: int32x2_t = vusdot_s32(a, b, transmute(c));
simd_shuffle!(ret_val, ret_val, [1, 0])
}
}
#[doc = "Dot product index form with unsigned and signed integers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
#[inline]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon,i8mm")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
@ -73657,11 +73895,45 @@ pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t)
)]
pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 1);
let c: int32x2_t = vreinterpret_s32_s8(c);
unsafe {
let c: int32x2_t = transmute(c);
let c: int32x4_t =
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
vusdotq_s32(a, b, transmute(c))
}
}
#[doc = "Dot product index form with unsigned and signed integers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
#[inline]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon,i8mm")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(usdot, LANE = 0)
)]
#[rustc_legacy_const_generics(3)]
#[cfg_attr(
not(target_arch = "arm"),
unstable(feature = "stdarch_neon_i8mm", issue = "117223")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 1);
let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
let b: uint8x16_t =
unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let c: int32x2_t = transmute(c);
let c: int32x4_t =
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
let ret_val: int32x4_t = vusdotq_s32(a, b, transmute(c));
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
}
}
#[doc = "Dot product vector form with unsigned and signed integers"]

View file

@ -6259,6 +6259,7 @@ intrinsics:
- *neon-unstable-i8mm
- *neon-cfg-arm-unstable
static_defs: ["const LANE: i32"]
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
safety: safe
types:
- ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]','']
@ -6268,12 +6269,12 @@ intrinsics:
- Let:
- c
- int32x2_t
- FnCall: ['vreinterpret_s32_s8', [c]]
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_s32_s8', [c]]
- Let:
- c
- "{type[1]}"
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
- FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]]
- FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] #'vreinterpret{type[4]}_s8_s32'
- name: "vsudot{neon_type[0].lane_nox}"
doc: "Dot product index form with signed and unsigned integers"
@ -6288,6 +6289,7 @@ intrinsics:
- *neon-unstable-i8mm
- *neon-cfg-arm-unstable
static_defs: ["const LANE: i32"]
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
safety: safe
types:
- [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,'']
@ -6297,7 +6299,7 @@ intrinsics:
- Let:
- c
- uint32x2_t
- FnCall: ['vreinterpret_u32_u8', [c]]
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_u32_u8', [c]]
- Let:
- c
- "{type[4]}"
@ -6305,7 +6307,7 @@ intrinsics:
- FnCall:
- "vusdot{neon_type[0].no}"
- - a
- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]]
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]]
- b
- name: "vmul{neon_type[1].no}"
@ -7115,6 +7117,7 @@ intrinsics:
- FnCall: [rustc_legacy_const_generics, ['3']]
- FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
- *neon-cfg-arm-unstable
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
safety: safe
types:
- [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]','']
@ -7124,7 +7127,7 @@ intrinsics:
- Let:
- c
- "{neon_type[3]}"
- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
- FnCall: [transmute, [c]]
- Let:
- c
- "{neon_type[0]}"
@ -7133,7 +7136,7 @@ intrinsics:
- "vdot{neon_type[0].no}"
- - a
- b
- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
- FnCall: [transmute, [c]]
- name: "vdot{neon_type[0].lane_nox}"
doc: Dot product arithmetic (indexed)
@ -7149,6 +7152,7 @@ intrinsics:
- FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
- *neon-cfg-arm-unstable
safety: safe
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
types:
- [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]','']
- [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
@ -7157,7 +7161,7 @@ intrinsics:
- Let:
- c
- "{neon_type[3]}"
- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
- Let:
- c
- "{neon_type[0]}"
@ -7166,7 +7170,7 @@ intrinsics:
- "vdot{neon_type[0].no}"
- - a
- b
- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
- name: "vmax{neon_type.no}"
doc: Maximum (vector)