diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index 7229f33f6142..d45454c696c7 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -9183,6 +9183,7 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9201,15 +9202,48 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { )] pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_s32(a, b, vreinterpret_s8_s32(c)) + vdot_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: int32x2_t = vdot_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9228,16 +9262,51 @@ pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> )] pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_s32(a, b, vreinterpretq_s8_s32(c)) + vdotq_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: int8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let ret_val: int32x4_t = vdotq_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9256,15 +9325,48 @@ pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) )] pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_u32(a, b, vreinterpret_u8_u32(c)) + vdot_u32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: uint32x2_t = vdot_u32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9283,11 +9385,45 @@ pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) )] pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_u32(a, b, vreinterpretq_u8_u32(c)) + vdotq_u32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: uint8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let ret_val: uint32x4_t = vdotq_u32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Dot product arithmetic (vector)"] @@ -71692,6 +71828,7 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { #[doc = "Dot product index form with signed and unsigned integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] @@ -71710,15 +71847,48 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { )] pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, vreinterpret_u8_u32(c), b) + vusdot_s32(a, transmute(c), b) + } +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: int32x2_t = vusdot_s32(a, transmute(c), b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Dot product index form with signed and unsigned integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] @@ -71737,11 +71907,45 @@ pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) )] pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, vreinterpretq_u8_u32(c), b) + vusdotq_s32(a, transmute(c), b) + } +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: int8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let ret_val: int32x4_t = vusdotq_s32(a, transmute(c), b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Table look-up"] @@ -73612,6 +73816,7 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[doc = "Dot product index form with unsigned and signed integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] @@ -73630,15 +73835,48 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { )] pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, b, vreinterpret_s8_s32(c)) + vusdot_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: int32x2_t = vusdot_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Dot product index form with unsigned and signed integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] @@ -73657,11 +73895,45 @@ pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) )] pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) + vusdotq_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: uint8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let ret_val: int32x4_t = vusdotq_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Dot product vector form with unsigned and signed integers"] diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index bb44aab66b40..9ebdc4334c25 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -6259,6 +6259,7 @@ intrinsics: - *neon-unstable-i8mm - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]',''] @@ -6268,12 +6269,12 @@ intrinsics: - Let: - c - int32x2_t - - FnCall: ['vreinterpret_s32_s8', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_s32_s8', [c]] - Let: - c - "{type[1]}" - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] + - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] #'vreinterpret{type[4]}_s8_s32' - name: "vsudot{neon_type[0].lane_nox}" doc: "Dot product index form with signed and unsigned integers" @@ -6288,6 +6289,7 @@ intrinsics: - *neon-unstable-i8mm - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,''] @@ -6297,7 +6299,7 @@ intrinsics: - Let: - c - uint32x2_t - - FnCall: ['vreinterpret_u32_u8', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_u32_u8', [c]] - Let: - c - "{type[4]}" @@ -6305,7 +6307,7 @@ intrinsics: - FnCall: - "vusdot{neon_type[0].no}" - - a - - FnCall: ['vreinterpret{type[5]}_u8_u32', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]] - b - name: "vmul{neon_type[1].no}" @@ -7115,6 +7117,7 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['3']] - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]',''] @@ -7124,7 +7127,7 @@ intrinsics: - Let: - c - "{neon_type[3]}" - - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] + - FnCall: [transmute, [c]] - Let: - c - "{neon_type[0]}" @@ -7133,7 +7136,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] + - FnCall: [transmute, [c]] - name: "vdot{neon_type[0].lane_nox}" doc: Dot product arithmetic (indexed) @@ -7149,6 +7152,7 @@ intrinsics: - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. types: - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]',''] - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] @@ -7157,7 +7161,7 @@ intrinsics: - Let: - c - "{neon_type[3]}" - - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -7166,7 +7170,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vmax{neon_type.no}" doc: Maximum (vector)