Provides work-around for vreinterpret inline fail
This commit is contained in:
parent
1c2a0aaa31
commit
73ad94ab86
2 changed files with 300 additions and 24 deletions
|
|
@ -9183,6 +9183,7 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
|
|||
#[doc = "Dot product arithmetic (indexed)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[target_feature(enable = "neon,dotprod")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
|
||||
|
|
@ -9201,15 +9202,48 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
|
|||
)]
|
||||
pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let c: int32x2_t = vreinterpret_s32_s8(c);
|
||||
unsafe {
|
||||
let c: int32x2_t = transmute(c);
|
||||
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
|
||||
vdot_s32(a, b, vreinterpret_s8_s32(c))
|
||||
vdot_s32(a, b, transmute(c))
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product arithmetic (indexed)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[target_feature(enable = "neon,dotprod")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(sdot, LANE = 0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
#[cfg_attr(
|
||||
not(target_arch = "arm"),
|
||||
unstable(feature = "stdarch_neon_dotprod", issue = "117224")
|
||||
)]
|
||||
#[cfg_attr(
|
||||
target_arch = "arm",
|
||||
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
|
||||
)]
|
||||
pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
|
||||
let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
unsafe {
|
||||
let c: int32x2_t = transmute(c);
|
||||
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
|
||||
let ret_val: int32x2_t = vdot_s32(a, b, transmute(c));
|
||||
simd_shuffle!(ret_val, ret_val, [1, 0])
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product arithmetic (indexed)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[target_feature(enable = "neon,dotprod")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
|
||||
|
|
@ -9228,16 +9262,51 @@ pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) ->
|
|||
)]
|
||||
pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let c: int32x2_t = vreinterpret_s32_s8(c);
|
||||
unsafe {
|
||||
let c: int32x2_t = transmute(c);
|
||||
let c: int32x4_t =
|
||||
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
|
||||
vdotq_s32(a, b, vreinterpretq_s8_s32(c))
|
||||
vdotq_s32(a, b, transmute(c))
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product arithmetic (indexed)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[target_feature(enable = "neon,dotprod")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(sdot, LANE = 0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
#[cfg_attr(
|
||||
not(target_arch = "arm"),
|
||||
unstable(feature = "stdarch_neon_dotprod", issue = "117224")
|
||||
)]
|
||||
#[cfg_attr(
|
||||
target_arch = "arm",
|
||||
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
|
||||
)]
|
||||
pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
|
||||
let b: int8x16_t =
|
||||
unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
unsafe {
|
||||
let c: int32x2_t = transmute(c);
|
||||
let c: int32x4_t =
|
||||
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
|
||||
let ret_val: int32x4_t = vdotq_s32(a, b, transmute(c));
|
||||
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product arithmetic (indexed)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[target_feature(enable = "neon,dotprod")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
|
||||
|
|
@ -9256,15 +9325,48 @@ pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t)
|
|||
)]
|
||||
pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let c: uint32x2_t = vreinterpret_u32_u8(c);
|
||||
unsafe {
|
||||
let c: uint32x2_t = transmute(c);
|
||||
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
|
||||
vdot_u32(a, b, vreinterpret_u8_u32(c))
|
||||
vdot_u32(a, b, transmute(c))
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product arithmetic (indexed)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[target_feature(enable = "neon,dotprod")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(udot, LANE = 0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
#[cfg_attr(
|
||||
not(target_arch = "arm"),
|
||||
unstable(feature = "stdarch_neon_dotprod", issue = "117224")
|
||||
)]
|
||||
#[cfg_attr(
|
||||
target_arch = "arm",
|
||||
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
|
||||
)]
|
||||
pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
|
||||
let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
unsafe {
|
||||
let c: uint32x2_t = transmute(c);
|
||||
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
|
||||
let ret_val: uint32x2_t = vdot_u32(a, b, transmute(c));
|
||||
simd_shuffle!(ret_val, ret_val, [1, 0])
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product arithmetic (indexed)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[target_feature(enable = "neon,dotprod")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
|
||||
|
|
@ -9283,11 +9385,45 @@ pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t)
|
|||
)]
|
||||
pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let c: uint32x2_t = vreinterpret_u32_u8(c);
|
||||
unsafe {
|
||||
let c: uint32x2_t = transmute(c);
|
||||
let c: uint32x4_t =
|
||||
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
|
||||
vdotq_u32(a, b, vreinterpretq_u8_u32(c))
|
||||
vdotq_u32(a, b, transmute(c))
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product arithmetic (indexed)"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[target_feature(enable = "neon,dotprod")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(udot, LANE = 0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
#[cfg_attr(
|
||||
not(target_arch = "arm"),
|
||||
unstable(feature = "stdarch_neon_dotprod", issue = "117224")
|
||||
)]
|
||||
#[cfg_attr(
|
||||
target_arch = "arm",
|
||||
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
|
||||
)]
|
||||
pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
|
||||
let b: uint8x16_t =
|
||||
unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
unsafe {
|
||||
let c: uint32x2_t = transmute(c);
|
||||
let c: uint32x4_t =
|
||||
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
|
||||
let ret_val: uint32x4_t = vdotq_u32(a, b, transmute(c));
|
||||
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product arithmetic (vector)"]
|
||||
|
|
@ -71692,6 +71828,7 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
|
|||
#[doc = "Dot product index form with signed and unsigned integers"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[target_feature(enable = "neon,i8mm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
|
||||
|
|
@ -71710,15 +71847,48 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
|
|||
)]
|
||||
pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let c: uint32x2_t = vreinterpret_u32_u8(c);
|
||||
unsafe {
|
||||
let c: uint32x2_t = transmute(c);
|
||||
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
|
||||
vusdot_s32(a, vreinterpret_u8_u32(c), b)
|
||||
vusdot_s32(a, transmute(c), b)
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product index form with signed and unsigned integers"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[target_feature(enable = "neon,i8mm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(sudot, LANE = 0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
#[cfg_attr(
|
||||
not(target_arch = "arm"),
|
||||
unstable(feature = "stdarch_neon_i8mm", issue = "117223")
|
||||
)]
|
||||
#[cfg_attr(
|
||||
target_arch = "arm",
|
||||
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
|
||||
)]
|
||||
pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
|
||||
let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
unsafe {
|
||||
let c: uint32x2_t = transmute(c);
|
||||
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
|
||||
let ret_val: int32x2_t = vusdot_s32(a, transmute(c), b);
|
||||
simd_shuffle!(ret_val, ret_val, [1, 0])
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product index form with signed and unsigned integers"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[target_feature(enable = "neon,i8mm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
|
||||
|
|
@ -71737,11 +71907,45 @@ pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t)
|
|||
)]
|
||||
pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let c: uint32x2_t = vreinterpret_u32_u8(c);
|
||||
unsafe {
|
||||
let c: uint32x2_t = transmute(c);
|
||||
let c: uint32x4_t =
|
||||
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
|
||||
vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
|
||||
vusdotq_s32(a, transmute(c), b)
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product index form with signed and unsigned integers"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[target_feature(enable = "neon,i8mm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(sudot, LANE = 0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
#[cfg_attr(
|
||||
not(target_arch = "arm"),
|
||||
unstable(feature = "stdarch_neon_i8mm", issue = "117223")
|
||||
)]
|
||||
#[cfg_attr(
|
||||
target_arch = "arm",
|
||||
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
|
||||
)]
|
||||
pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
|
||||
let b: int8x16_t =
|
||||
unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
unsafe {
|
||||
let c: uint32x2_t = transmute(c);
|
||||
let c: uint32x4_t =
|
||||
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
|
||||
let ret_val: int32x4_t = vusdotq_s32(a, transmute(c), b);
|
||||
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
|
||||
}
|
||||
}
|
||||
#[doc = "Table look-up"]
|
||||
|
|
@ -73612,6 +73816,7 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
|
|||
#[doc = "Dot product index form with unsigned and signed integers"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[target_feature(enable = "neon,i8mm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
|
||||
|
|
@ -73630,15 +73835,48 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
|
|||
)]
|
||||
pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let c: int32x2_t = vreinterpret_s32_s8(c);
|
||||
unsafe {
|
||||
let c: int32x2_t = transmute(c);
|
||||
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
|
||||
vusdot_s32(a, b, vreinterpret_s8_s32(c))
|
||||
vusdot_s32(a, b, transmute(c))
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product index form with unsigned and signed integers"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[target_feature(enable = "neon,i8mm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(usdot, LANE = 0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
#[cfg_attr(
|
||||
not(target_arch = "arm"),
|
||||
unstable(feature = "stdarch_neon_i8mm", issue = "117223")
|
||||
)]
|
||||
#[cfg_attr(
|
||||
target_arch = "arm",
|
||||
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
|
||||
)]
|
||||
pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
|
||||
let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
unsafe {
|
||||
let c: int32x2_t = transmute(c);
|
||||
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
|
||||
let ret_val: int32x2_t = vusdot_s32(a, b, transmute(c));
|
||||
simd_shuffle!(ret_val, ret_val, [1, 0])
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product index form with unsigned and signed integers"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "little")]
|
||||
#[target_feature(enable = "neon,i8mm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
|
||||
|
|
@ -73657,11 +73895,45 @@ pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t)
|
|||
)]
|
||||
pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let c: int32x2_t = vreinterpret_s32_s8(c);
|
||||
unsafe {
|
||||
let c: int32x2_t = transmute(c);
|
||||
let c: int32x4_t =
|
||||
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
|
||||
vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
|
||||
vusdotq_s32(a, b, transmute(c))
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product index form with unsigned and signed integers"]
|
||||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
|
||||
#[inline]
|
||||
#[cfg(target_endian = "big")]
|
||||
#[target_feature(enable = "neon,i8mm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(usdot, LANE = 0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
#[cfg_attr(
|
||||
not(target_arch = "arm"),
|
||||
unstable(feature = "stdarch_neon_i8mm", issue = "117223")
|
||||
)]
|
||||
#[cfg_attr(
|
||||
target_arch = "arm",
|
||||
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
|
||||
)]
|
||||
pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
|
||||
static_assert_uimm_bits!(LANE, 1);
|
||||
let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
|
||||
let b: uint8x16_t =
|
||||
unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
|
||||
unsafe {
|
||||
let c: int32x2_t = transmute(c);
|
||||
let c: int32x4_t =
|
||||
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
|
||||
let ret_val: int32x4_t = vusdotq_s32(a, b, transmute(c));
|
||||
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
|
||||
}
|
||||
}
|
||||
#[doc = "Dot product vector form with unsigned and signed integers"]
|
||||
|
|
|
|||
|
|
@ -6259,6 +6259,7 @@ intrinsics:
|
|||
- *neon-unstable-i8mm
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ["const LANE: i32"]
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
safety: safe
|
||||
types:
|
||||
- ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]','']
|
||||
|
|
@ -6268,12 +6269,12 @@ intrinsics:
|
|||
- Let:
|
||||
- c
|
||||
- int32x2_t
|
||||
- FnCall: ['vreinterpret_s32_s8', [c]]
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_s32_s8', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{type[1]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
|
||||
- FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]]
|
||||
- FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] #'vreinterpret{type[4]}_s8_s32'
|
||||
|
||||
- name: "vsudot{neon_type[0].lane_nox}"
|
||||
doc: "Dot product index form with signed and unsigned integers"
|
||||
|
|
@ -6288,6 +6289,7 @@ intrinsics:
|
|||
- *neon-unstable-i8mm
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ["const LANE: i32"]
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,'']
|
||||
|
|
@ -6297,7 +6299,7 @@ intrinsics:
|
|||
- Let:
|
||||
- c
|
||||
- uint32x2_t
|
||||
- FnCall: ['vreinterpret_u32_u8', [c]]
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_u32_u8', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{type[4]}"
|
||||
|
|
@ -6305,7 +6307,7 @@ intrinsics:
|
|||
- FnCall:
|
||||
- "vusdot{neon_type[0].no}"
|
||||
- - a
|
||||
- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]]
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]]
|
||||
- b
|
||||
|
||||
- name: "vmul{neon_type[1].no}"
|
||||
|
|
@ -7115,6 +7117,7 @@ intrinsics:
|
|||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
|
||||
- *neon-cfg-arm-unstable
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]','']
|
||||
|
|
@ -7124,7 +7127,7 @@ intrinsics:
|
|||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- FnCall: [transmute, [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
|
|
@ -7133,7 +7136,7 @@ intrinsics:
|
|||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
- FnCall: [transmute, [c]]
|
||||
|
||||
- name: "vdot{neon_type[0].lane_nox}"
|
||||
doc: Dot product arithmetic (indexed)
|
||||
|
|
@ -7149,6 +7152,7 @@ intrinsics:
|
|||
- FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
types:
|
||||
- [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]','']
|
||||
- [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
|
|
@ -7157,7 +7161,7 @@ intrinsics:
|
|||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
|
|
@ -7166,7 +7170,7 @@ intrinsics:
|
|||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
|
||||
- name: "vmax{neon_type.no}"
|
||||
doc: Maximum (vector)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue