diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 03c56ff0cbd1..30512e11bbcc 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -195,6 +195,17 @@ pub unsafe fn vabdd_f64(a: f64, b: f64) -> f64 { pub unsafe fn vabds_f32(a: f32, b: f32) -> f32 { simd_extract!(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } +#[doc = "Floating-point absolute difference"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fabd))] +pub unsafe fn vabdh_f16(a: f16, b: f16) -> f16 { + simd_extract!(vabd_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} #[doc = "Signed Absolute difference Long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"] #[doc = "## Safety"] @@ -1041,6 +1052,44 @@ pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x _vbcaxq_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned() } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcadd))] +pub unsafe fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v4f16" + )] + fn _vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vcadd_rot270_f16(a, b) +} +#[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcadd))] +pub unsafe fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v8f16" + )] + fn _vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vcaddq_rot270_f16(a, b) +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1095,6 +1144,44 @@ pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { _vcaddq_rot270_f64(a, b) } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcadd))] +pub unsafe fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v4f16" + )] + fn _vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vcadd_rot90_f16(a, b) +} +#[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcadd))] +pub unsafe fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v8f16" + )] + fn _vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vcaddq_rot90_f16(a, b) +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1220,6 +1307,24 @@ pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 { } _vcages_f32(a, b).as_unsigned() } +#[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(facge))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcageh_f16(a: f16, b: f16) -> u16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.i32.f16" + )] + fn _vcageh_f16(a: f16, b: f16) -> i32; + } + _vcageh_f16(a, b).as_unsigned() as u16 +} #[doc = "Floating-point absolute compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f64)"] #[doc = "## Safety"] @@ -1292,6 +1397,24 @@ pub unsafe fn vcagts_f32(a: f32, b: f32) -> u32 { } _vcagts_f32(a, b).as_unsigned() } +#[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(facgt))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcagth_f16(a: f16, b: f16) -> u16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.i32.f16" + )] + fn _vcagth_f16(a: f16, b: f16) -> i32; + } + _vcagth_f16(a, b).as_unsigned() as u16 +} #[doc = "Floating-point absolute compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f64)"] #[doc = "## Safety"] @@ -1336,6 +1459,17 @@ pub unsafe fn vcaled_f64(a: f64, b: f64) -> u64 { pub unsafe fn vcales_f32(a: f32, b: f32) -> u32 { vcages_f32(b, a) } +#[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(facge))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcaleh_f16(a: f16, b: f16) -> u16 { + vcageh_f16(b, a) +} #[doc = "Floating-point absolute compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f64)"] #[doc = "## Safety"] @@ -1380,6 +1514,17 @@ pub unsafe fn vcaltd_f64(a: f64, b: f64) -> u64 { pub unsafe fn vcalts_f32(a: f32, b: f32) -> u32 { vcagts_f32(b, a) } +#[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(facgt))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcalth_f16(a: f16, b: f16) -> u16 { + vcagth_f16(b, a) +} #[doc = "Floating-point compare equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f64)"] #[doc = "## Safety"] @@ -1512,6 +1657,41 @@ pub unsafe fn vceqd_s64(a: i64, b: i64) -> u64 { pub unsafe fn vceqd_u64(a: u64, b: u64) -> u64 { transmute(vceq_u64(transmute(a), transmute(b))) } +#[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqh_f16(a: f16, b: f16) -> u16 { + simd_extract!(vceq_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} +#[doc = "Floating-point compare bitwise equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmeq))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqz_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_eq(a, transmute(b)) +} +#[doc = "Floating-point compare bitwise equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmeq))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqzq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_eq(a, transmute(b)) +} #[doc = "Floating-point compare bitwise equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f32)"] #[doc = "## Safety"] @@ -1823,6 +2003,17 @@ pub unsafe fn vceqzd_u64(a: u64) -> u64 { transmute(vceqz_u64(transmute(a))) } #[doc = "Floating-point compare bitwise equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqzh_f16(a: f16) -> u16 { + simd_extract!(vceqz_f16(vdup_n_f16(a)), 0) +} +#[doc = "Floating-point compare bitwise equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzs_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1954,6 +2145,17 @@ pub unsafe fn vcged_s64(a: i64, b: i64) -> u64 { pub unsafe fn vcged_u64(a: u64, b: u64) -> u64 { transmute(vcge_u64(transmute(a), transmute(b))) } +#[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgeh_f16(a: f16, b: f16) -> u16 { + simd_extract!(vcge_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} #[doc = "Floating-point compare greater than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f32)"] #[doc = "## Safety"] @@ -2131,6 +2333,17 @@ pub unsafe fn vcgezs_f32(a: f32) -> u32 { pub unsafe fn vcgezd_s64(a: i64) -> u64 { transmute(vcgez_s64(transmute(a))) } +#[doc = "Floating-point compare greater than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgezh_f16(a: f16) -> u16 { + simd_extract!(vcgez_f16(vdup_n_f16(a)), 0) +} #[doc = "Floating-point compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f64)"] #[doc = "## Safety"] @@ -2241,6 +2454,17 @@ pub unsafe fn vcgtd_s64(a: i64, b: i64) -> u64 { pub unsafe fn vcgtd_u64(a: u64, b: u64) -> u64 { transmute(vcgt_u64(transmute(a), transmute(b))) } +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgth_f16(a: f16, b: f16) -> u16 { + simd_extract!(vcgt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} #[doc = "Floating-point compare greater than zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f32)"] #[doc = "## Safety"] @@ -2418,6 +2642,17 @@ pub unsafe fn vcgtzs_f32(a: f32) -> u32 { pub unsafe fn vcgtzd_s64(a: i64) -> u64 { transmute(vcgtz_s64(transmute(a))) } +#[doc = "Floating-point compare greater than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgtzh_f16(a: f16) -> u16 { + simd_extract!(vcgtz_f16(vdup_n_f16(a)), 0) +} #[doc = "Floating-point compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f64)"] #[doc = "## Safety"] @@ -2528,6 +2763,17 @@ pub unsafe fn vcled_u64(a: u64, b: u64) -> u64 { pub unsafe fn vcled_s64(a: i64, b: i64) -> u64 { transmute(vcle_s64(transmute(a), transmute(b))) } +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcleh_f16(a: f16, b: f16) -> u16 { + simd_extract!(vcle_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} #[doc = "Floating-point compare less than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f32)"] #[doc = "## Safety"] @@ -2705,6 +2951,17 @@ pub unsafe fn vclezs_f32(a: f32) -> u32 { pub unsafe fn vclezd_s64(a: i64) -> u64 { transmute(vclez_s64(transmute(a))) } +#[doc = "Floating-point compare less than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclezh_f16(a: f16) -> u16 { + simd_extract!(vclez_f16(vdup_n_f16(a)), 0) +} #[doc = "Floating-point compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f64)"] #[doc = "## Safety"] @@ -2794,6 +3051,17 @@ pub unsafe fn vcltd_s64(a: i64, b: i64) -> u64 { transmute(vclt_s64(transmute(a), transmute(b))) } #[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclth_f16(a: f16, b: f16) -> u16 { + simd_extract!(vclt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} +#[doc = "Floating-point compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclts_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2992,6 +3260,55 @@ pub unsafe fn vcltzs_f32(a: f32) -> u32 { pub unsafe fn vcltzd_s64(a: i64) -> u64 { transmute(vcltz_s64(transmute(a))) } +#[doc = "Floating-point compare less than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcltzh_f16(a: f16) -> u16 { + simd_extract!(vcltz_f16(vdup_n_f16(a)), 0) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v4f16" + )] + fn _vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vcmla_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v8f16" + )] + fn _vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vcmlaq_f16(a, b, c) +} #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"] #[doc = "## Safety"] @@ -3047,6 +3364,66 @@ pub unsafe fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa _vcmlaq_f64(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3092,6 +3469,66 @@ pub unsafe fn vcmlaq_lane_f32( vcmlaq_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3137,6 +3574,44 @@ pub unsafe fn vcmlaq_laneq_f32( vcmlaq_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v4f16" + )] + fn _vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vcmla_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v8f16" + )] + fn _vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vcmlaq_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3191,6 +3666,66 @@ pub unsafe fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) _vcmlaq_rot180_f64(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot180_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot180_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3236,6 +3771,66 @@ pub unsafe fn vcmlaq_rot180_lane_f32( vcmlaq_rot180_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot180_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot180_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3281,6 +3876,44 @@ pub unsafe fn vcmlaq_rot180_laneq_f32( vcmlaq_rot180_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v4f16" + )] + fn _vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vcmla_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v8f16" + )] + fn _vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vcmlaq_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3335,6 +3968,66 @@ pub unsafe fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) _vcmlaq_rot270_f64(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot270_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot270_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3380,6 +4073,66 @@ pub unsafe fn vcmlaq_rot270_lane_f32( vcmlaq_rot270_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot270_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot270_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3425,6 +4178,44 @@ pub unsafe fn vcmlaq_rot270_laneq_f32( vcmlaq_rot270_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v4f16" + )] + fn _vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vcmla_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v8f16" + )] + fn _vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vcmlaq_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3479,6 +4270,66 @@ pub unsafe fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) - _vcmlaq_rot90_f64(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot90_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot90_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3524,6 +4375,66 @@ pub unsafe fn vcmlaq_rot90_lane_f32( vcmlaq_rot90_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot90_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot90_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -6721,7 +7632,7 @@ pub unsafe fn vcopyq_laneq_p64( pub unsafe fn vcreate_f64(a: u64) -> float64x1_t { transmute(a) } -#[doc = "Floating-point convert to lower precision narrow"] +#[doc = "Floating-point convert"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -6787,6 +7698,28 @@ pub unsafe fn vcvt_f64_u64(a: uint64x1_t) -> float64x1_t { pub unsafe fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t { simd_cast(a) } +#[doc = "Floating-point convert to lower precision"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtn2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t { + vcombine_f16(a, vcvt_f16_f32(b)) +} +#[doc = "Floating-point convert to higher precision"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtl2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t { + vcvt_f32_f16(vget_high_f16(a)) +} #[doc = "Floating-point convert to lower precision narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f64)"] #[doc = "## Safety"] @@ -7043,6 +7976,42 @@ pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t { _vcvtq_u64_f64(a).as_unsigned() } #[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvta_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.v4i16.v4f16" + )] + fn _vcvta_s16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvta_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtaq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.v8i16.v8f16" + )] + fn _vcvtaq_s16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtaq_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7115,6 +8084,42 @@ pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t { _vcvtaq_s64_f64(a) } #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvta_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.v4i16.v4f16" + )] + fn _vcvta_u16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvta_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtaq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.v8i16.v8f16" + )] + fn _vcvtaq_u16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtaq_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7187,6 +8192,100 @@ pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t { _vcvtaq_u64_f64(a).as_unsigned() } #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_s16_f16(a: f16) -> i16 { + vcvtah_s32_f16(a) as i16 +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_s32_f16(a: f16) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.i32.f16" + )] + fn _vcvtah_s32_f16(a: f16) -> i32; + } + _vcvtah_s32_f16(a) +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_s64_f16(a: f16) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.i64.f16" + )] + fn _vcvtah_s64_f16(a: f16) -> i64; + } + _vcvtah_s64_f16(a) +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_u16_f16(a: f16) -> u16 { + vcvtah_u32_f16(a) as u16 +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_u32_f16(a: f16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.i32.f16" + )] + fn _vcvtah_u32_f16(a: f16) -> i32; + } + _vcvtah_u32_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_u64_f16(a: f16) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.i64.f16" + )] + fn _vcvtah_u64_f16(a: f16) -> i64; + } + _vcvtah_u64_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_s32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7280,6 +8379,386 @@ pub unsafe fn vcvtd_f64_s64(a: i64) -> f64 { pub unsafe fn vcvts_f32_s32(a: i32) -> f32 { a as f32 } +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(scvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_s16(a: i16) -> f16 { + a as f16 +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(scvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_s32(a: i32) -> f16 { + a as f16 +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(scvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_s64(a: i64) -> f16 { + a as f16 +} +#[doc = "Unsigned fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_u16(a: u16) -> f16 { + a as f16 +} +#[doc = "Unsigned fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_u32(a: u32) -> f16 { + a as f16 +} +#[doc = "Unsigned fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_u64(a: u64) -> f16 { + a as f16 +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_s16(a: i16) -> f16 { + static_assert!(N >= 1 && N <= 16); + vcvth_n_f16_s32::(a as i32) as f16 +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_s32(a: i32) -> f16 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.f16.i32" + )] + fn _vcvth_n_f16_s32(a: i32, n: i32) -> f16; + } + _vcvth_n_f16_s32(a, N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_s64(a: i64) -> f16 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.f16.i64" + )] + fn _vcvth_n_f16_s64(a: i64, n: i32) -> f16; + } + _vcvth_n_f16_s64(a, N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_u16(a: u16) -> f16 { + static_assert!(N >= 1 && N <= 16); + vcvth_n_f16_u32::(a as u32) as f16 +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_u32(a: u32) -> f16 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.f16.i32" + )] + fn _vcvth_n_f16_u32(a: i32, n: i32) -> f16; + } + _vcvth_n_f16_u32(a.as_signed(), N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_u64(a: u64) -> f16 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.f16.i64" + )] + fn _vcvth_n_f16_u64(a: i64, n: i32) -> f16; + } + _vcvth_n_f16_u64(a.as_signed(), N) +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_s16_f16(a: f16) -> i16 { + static_assert!(N >= 1 && N <= 16); + vcvth_n_s32_f16::(a) as i16 +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_s32_f16(a: f16) -> i32 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f16" + )] + fn _vcvth_n_s32_f16(a: f16, n: i32) -> i32; + } + _vcvth_n_s32_f16(a, N) +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_s64_f16(a: f16) -> i64 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f16" + )] + fn _vcvth_n_s64_f16(a: f16, n: i32) -> i64; + } + _vcvth_n_s64_f16(a, N) +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_u16_f16(a: f16) -> u16 { + static_assert!(N >= 1 && N <= 16); + vcvth_n_u32_f16::(a) as u16 +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_u32_f16(a: f16) -> u32 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f16" + )] + fn _vcvth_n_u32_f16(a: f16, n: i32) -> i32; + } + _vcvth_n_u32_f16(a, N).as_unsigned() +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_u64_f16(a: f16) -> u64 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f16" + )] + fn _vcvth_n_u64_f16(a: f16, n: i32) -> i64; + } + _vcvth_n_u64_f16(a, N).as_unsigned() +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_s16_f16(a: f16) -> i16 { + a as i16 +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_s32_f16(a: f16) -> i32 { + a as i32 +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_s64_f16(a: f16) -> i64 { + a as i64 +} +#[doc = "Floating-point convert to unsigned fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_u16_f16(a: f16) -> u16 { + a as u16 +} +#[doc = "Floating-point convert to unsigned fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_u32_f16(a: f16) -> u32 { + a as u32 +} +#[doc = "Floating-point convert to unsigned fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_u64_f16(a: f16) -> u64 { + a as u64 +} +#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtm_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.v4i16.v4f16" + )] + fn _vcvtm_s16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtm_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.v8i16.v8f16" + )] + fn _vcvtmq_s16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtmq_s16_f16(a) +} #[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s32_f32)"] #[doc = "## Safety"] @@ -7353,6 +8832,42 @@ pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t { _vcvtmq_s64_f64(a) } #[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtm_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.v4i16.v4f16" + )] + fn _vcvtm_u16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtm_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.v8i16.v8f16" + )] + fn _vcvtmq_u16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtmq_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7424,6 +8939,100 @@ pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t { } _vcvtmq_u64_f64(a).as_unsigned() } +#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_s16_f16(a: f16) -> i16 { + vcvtmh_s32_f16(a) as i16 +} +#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_s32_f16(a: f16) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.i32.f16" + )] + fn _vcvtmh_s32_f16(a: f16) -> i32; + } + _vcvtmh_s32_f16(a) +} +#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_s64_f16(a: f16) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.i64.f16" + )] + fn _vcvtmh_s64_f16(a: f16) -> i64; + } + _vcvtmh_s64_f16(a) +} +#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_u16_f16(a: f16) -> u16 { + vcvtmh_u32_f16(a) as u16 +} +#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_u32_f16(a: f16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.i32.f16" + )] + fn _vcvtmh_u32_f16(a: f16) -> i32; + } + _vcvtmh_u32_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_u64_f16(a: f16) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.i64.f16" + )] + fn _vcvtmh_u64_f16(a: f16) -> i64; + } + _vcvtmh_u64_f16(a).as_unsigned() +} #[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_s32_f32)"] #[doc = "## Safety"] @@ -7497,6 +9106,42 @@ pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 { _vcvtmd_u64_f64(a).as_unsigned() } #[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtn_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.v4i16.v4f16" + )] + fn _vcvtn_s16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtn_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.v8i16.v8f16" + )] + fn _vcvtnq_s16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtnq_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7569,6 +9214,42 @@ pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t { _vcvtnq_s64_f64(a) } #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtn_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.v4i16.v4f16" + )] + fn _vcvtn_u16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtn_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.v8i16.v8f16" + )] + fn _vcvtnq_u16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtnq_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7640,6 +9321,100 @@ pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t { } _vcvtnq_u64_f64(a).as_unsigned() } +#[doc = "Floating-point convert to integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_s16_f16(a: f16) -> i16 { + vcvtnh_s32_f16(a) as i16 +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_s32_f16(a: f16) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.i32.f16" + )] + fn _vcvtnh_s32_f16(a: f16) -> i32; + } + _vcvtnh_s32_f16(a) +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_s64_f16(a: f16) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.i64.f16" + )] + fn _vcvtnh_s64_f16(a: f16) -> i64; + } + _vcvtnh_s64_f16(a) +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_u16_f16(a: f16) -> u16 { + vcvtnh_u32_f16(a) as u16 +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_u32_f16(a: f16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.i32.f16" + )] + fn _vcvtnh_u32_f16(a: f16) -> i32; + } + _vcvtnh_u32_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_u64_f16(a: f16) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.i64.f16" + )] + fn _vcvtnh_u64_f16(a: f16) -> i64; + } + _vcvtnh_u64_f16(a).as_unsigned() +} #[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_s32_f32)"] #[doc = "## Safety"] @@ -7712,6 +9487,42 @@ pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 { } _vcvtnd_u64_f64(a).as_unsigned() } +#[doc = "Floating-point convert to signed integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtp_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.v4i16.v4f16" + )] + fn _vcvtp_s16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtp_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtpq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.v8i16.v8f16" + )] + fn _vcvtpq_s16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtpq_s16_f16(a) +} #[doc = "Floating-point convert to signed integer, rounding toward plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s32_f32)"] #[doc = "## Safety"] @@ -7784,6 +9595,42 @@ pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t { } _vcvtpq_s64_f64(a) } +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtp_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.v4i16.v4f16" + )] + fn _vcvtp_u16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtp_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtpq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.v8i16.v8f16" + )] + fn _vcvtpq_u16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtpq_u16_f16(a).as_unsigned() +} #[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u32_f32)"] #[doc = "## Safety"] @@ -7856,6 +9703,100 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t { } _vcvtpq_u64_f64(a).as_unsigned() } +#[doc = "Floating-point convert to integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_s16_f16(a: f16) -> i16 { + vcvtph_s32_f16(a) as i16 +} +#[doc = "Floating-point convert to integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_s32_f16(a: f16) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.i32.f16" + )] + fn _vcvtph_s32_f16(a: f16) -> i32; + } + _vcvtph_s32_f16(a) +} +#[doc = "Floating-point convert to integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_s64_f16(a: f16) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.i64.f16" + )] + fn _vcvtph_s64_f16(a: f16) -> i64; + } + _vcvtph_s64_f16(a) +} +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_u16_f16(a: f16) -> u16 { + vcvtph_u32_f16(a) as u16 +} +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_u32_f16(a: f16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.i32.f16" + )] + fn _vcvtph_u32_f16(a: f16) -> i32; + } + _vcvtph_u32_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_u64_f16(a: f16) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.i64.f16" + )] + fn _vcvtph_u64_f16(a: f16) -> i64; + } + _vcvtph_u64_f16(a).as_unsigned() +} #[doc = "Floating-point convert to signed integer, rounding toward plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_s32_f32)"] #[doc = "## Safety"] @@ -8195,6 +10136,28 @@ pub unsafe fn vcvtxd_f32_f64(a: f64) -> f32 { simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0) } #[doc = "Divide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fdiv))] +pub unsafe fn vdiv_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_div(a, b) +} +#[doc = "Divide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fdiv))] +pub unsafe fn vdivq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_div(a, b) +} +#[doc = "Divide"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8238,6 +10201,17 @@ pub unsafe fn vdiv_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { pub unsafe fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { simd_div(a, b) } +#[doc = "Divide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vdivh_f16(a: f16, b: f16) -> f16 { + a / b +} #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"] #[doc = "## Safety"] @@ -8444,7 +10418,7 @@ pub unsafe fn vduph_laneq_p16(a: poly16x8_t) -> p16 { static_assert_uimm_bits!(N, 3); simd_extract!(a, N as u32) } -#[doc = "Set all vector lanes to the same value"] +#[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_s8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8457,7 +10431,7 @@ pub unsafe fn vdupb_laneq_s8(a: int8x16_t) -> i8 { static_assert_uimm_bits!(N, 4); simd_extract!(a, N as u32) } -#[doc = "Set all vector lanes to the same value"] +#[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_u8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8470,7 +10444,7 @@ pub unsafe fn vdupb_laneq_u8(a: uint8x16_t) -> u8 { static_assert_uimm_bits!(N, 4); simd_extract!(a, N as u32) } -#[doc = "Set all vector lanes to the same value"] +#[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8523,6 +10497,32 @@ pub unsafe fn vdupd_lane_u64(a: uint64x1_t) -> u64 { simd_extract!(a, N as u32) } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vduph_lane_f16(a: float16x4_t) -> f16 { + static_assert_uimm_bits!(N, 2); + simd_extract!(a, N as u32) +} +#[doc = "Extract an element from a vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(nop, N = 4))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vduph_laneq_f16(a: float16x8_t) -> f16 { + static_assert_uimm_bits!(N, 4); + simd_extract!(a, N as u32) +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8927,6 +10927,74 @@ pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float6 _vfma_f64(b, c, a) } #[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfma_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfma_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 3); + vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmaq_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmaq_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -9045,6 +11113,28 @@ pub unsafe fn vfma_laneq_f64( static_assert_uimm_bits!(LANE, 1); vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } +#[doc = "Floating-point fused Multiply-Subtract from accumulator."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmla))] +pub unsafe fn vfma_n_f16(a: float16x4_t, b: float16x4_t, c: f16) -> float16x4_t { + vfma_f16(a, b, vdup_n_f16(c)) +} +#[doc = "Floating-point fused Multiply-Subtract from accumulator."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmla))] +pub unsafe fn vfmaq_n_f16(a: float16x8_t, b: float16x8_t, c: f16) -> float16x8_t { + vfmaq_f16(a, b, vdupq_n_f16(c)) +} #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f64)"] #[doc = "## Safety"] @@ -9077,6 +11167,52 @@ pub unsafe fn vfmad_lane_f64(a: f64, b: f64, c: float64x1_t) -> let c: f64 = simd_extract!(c, LANE as u32); _vfmad_lane_f64(b, c, a) } +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmadd))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmah_f16(a: f16, b: f16, c: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.fma.f16" + )] + fn _vfmah_f16(a: f16, b: f16, c: f16) -> f16; + } + _vfmah_f16(b, c, a) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmah_lane_f16(a: f16, b: f16, v: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + let c: f16 = simd_extract!(v, LANE as u32); + vfmah_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmah_laneq_f16(a: f16, b: f16, v: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + let c: f16 = simd_extract!(v, LANE as u32); + vfmah_f16(a, b, c) +} #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f64)"] #[doc = "## Safety"] @@ -9186,6 +11322,446 @@ pub unsafe fn vfmad_laneq_f64(a: f64, b: f64, c: float64x2_t) - let c: f64 = simd_extract!(c, LANE as u32); _vfmad_laneq_f64(b, c, a) } +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlal2))] +pub unsafe fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal2.v2f32.v4f16" + )] + fn _vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + _vfmlal_high_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlal2))] +pub unsafe fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal2.v4f32.v8f16" + )] + fn _vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + _vfmlalq_high_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlal_lane_high_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlal_laneq_high_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x8_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 3); + vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlalq_lane_high_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlalq_laneq_high_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x8_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 3); + vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlal_lane_low_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlal_laneq_low_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x8_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 3); + vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlalq_lane_low_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlalq_laneq_low_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x8_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 3); + vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlal))] +pub unsafe fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal.v2f32.v4f16" + )] + fn _vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + _vfmlal_low_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlal))] +pub unsafe fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal.v4f32.v8f16" + )] + fn _vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + _vfmlalq_low_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlsl2))] +pub unsafe fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl2.v2f32.v4f16" + )] + fn _vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + _vfmlsl_high_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlsl2))] +pub unsafe fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl2.v4f32.v8f16" + )] + fn _vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + _vfmlslq_high_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlsl_lane_high_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlsl_laneq_high_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x8_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 3); + vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlslq_lane_high_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlslq_laneq_high_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x8_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 3); + vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlsl_lane_low_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlsl_laneq_low_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x8_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 3); + vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlslq_lane_low_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlslq_laneq_low_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x8_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 3); + vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlsl))] +pub unsafe fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl.v2f32.v4f16" + )] + fn _vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + _vfmlsl_low_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlsl))] +pub unsafe fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl.v4f32.v8f16" + )] + fn _vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + _vfmlslq_low_f16(r, a, b) +} #[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)"] #[doc = "## Safety"] @@ -9198,6 +11774,74 @@ pub unsafe fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float6 let b: float64x1_t = simd_neg(b); vfma_f64(a, b, c) } +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmls, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfms_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmls, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfms_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 3); + vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmls, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsq_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmls, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsq_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) +} #[doc = "Floating-point fused multiply-subtract to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f32)"] #[doc = "## Safety"] @@ -9317,6 +11961,28 @@ pub unsafe fn vfms_laneq_f64( static_assert_uimm_bits!(LANE, 1); vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } +#[doc = "Floating-point fused Multiply-Subtract from accumulator."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmls))] +pub unsafe fn vfms_n_f16(a: float16x4_t, b: float16x4_t, c: f16) -> float16x4_t { + vfms_f16(a, b, vdup_n_f16(c)) +} +#[doc = "Floating-point fused Multiply-Subtract from accumulator."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmls))] +pub unsafe fn vfmsq_n_f16(a: float16x8_t, b: float16x8_t, c: f16) -> float16x8_t { + vfmsq_f16(a, b, vdupq_n_f16(c)) +} #[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f64)"] #[doc = "## Safety"] @@ -9329,6 +11995,45 @@ pub unsafe fn vfms_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t vfms_f64(a, b, vdup_n_f64(c)) } #[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmsub))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsh_f16(a: f16, b: f16, c: f16) -> f16 { + vfmah_f16(a, -b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsh_lane_f16(a: f16, b: f16, v: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + let c: f16 = simd_extract!(v, LANE as u32); + vfmsh_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsh_laneq_f16(a: f16, b: f16, v: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + let c: f16 = simd_extract!(v, LANE as u32); + vfmsh_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -9417,6 +12122,28 @@ pub unsafe fn vfmsd_laneq_f64(a: f64, b: f64, c: float64x2_t) - vfmad_laneq_f64::(a, -b, c) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(ldr))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + crate::ptr::read_unaligned(ptr.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(ldr))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + crate::ptr::read_unaligned(ptr.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -11106,6 +13833,24 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } _vmaxq_f64(a, b) } +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmax))] +pub unsafe fn vmaxh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.f16" + )] + fn _vmaxh_f16(a: f16, b: f16) -> f16; + } + _vmaxh_f16(a, b) +} #[doc = "Floating-point Maximum Number (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f64)"] #[doc = "## Safety"] @@ -11142,6 +13887,60 @@ pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } _vmaxnmq_f64(a, b) } +#[doc = "Floating-point Maximum Number"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnm))] +pub unsafe fn vmaxnmh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.f16" + )] + fn _vmaxnmh_f16(a: f16, b: f16) -> f16; + } + _vmaxnmh_f16(a, b) +} +#[doc = "Floating-point maximum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnmv))] +pub unsafe fn vmaxnmv_f16(a: float16x4_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmv.f16.v4f16" + )] + fn _vmaxnmv_f16(a: float16x4_t) -> f16; + } + _vmaxnmv_f16(a) +} +#[doc = "Floating-point maximum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnmv))] +pub unsafe fn vmaxnmvq_f16(a: float16x8_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmv.f16.v8f16" + )] + fn _vmaxnmvq_f16(a: float16x8_t) -> f16; + } + _vmaxnmvq_f16(a) +} #[doc = "Floating-point maximum number across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f32)"] #[doc = "## Safety"] @@ -11196,6 +13995,42 @@ pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 { } _vmaxnmvq_f32(a) } +#[doc = "Floating-point maximum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxv))] +pub unsafe fn vmaxv_f16(a: float16x4_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxv.f16.v4f16" + )] + fn _vmaxv_f16(a: float16x4_t) -> f16; + } + _vmaxv_f16(a) +} +#[doc = "Floating-point maximum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxv))] +pub unsafe fn vmaxvq_f16(a: float16x8_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxv.f16.v8f16" + )] + fn _vmaxvq_f16(a: float16x8_t) -> f16; + } + _vmaxvq_f16(a) +} #[doc = "Horizontal vector max."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_f32)"] #[doc = "## Safety"] @@ -11502,6 +14337,24 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } _vminq_f64(a, b) } +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmin))] +pub unsafe fn vminh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.f16" + )] + fn _vminh_f16(a: f16, b: f16) -> f16; + } + _vminh_f16(a, b) +} #[doc = "Floating-point Minimum Number (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f64)"] #[doc = "## Safety"] @@ -11538,6 +14391,60 @@ pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } _vminnmq_f64(a, b) } +#[doc = "Floating-point Minimum Number"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnm))] +pub unsafe fn vminnmh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.f16" + )] + fn _vminnmh_f16(a: f16, b: f16) -> f16; + } + _vminnmh_f16(a, b) +} +#[doc = "Floating-point minimum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnmv))] +pub unsafe fn vminnmv_f16(a: float16x4_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmv.f16.v4f16" + )] + fn _vminnmv_f16(a: float16x4_t) -> f16; + } + _vminnmv_f16(a) +} +#[doc = "Floating-point minimum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnmv))] +pub unsafe fn vminnmvq_f16(a: float16x8_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmv.f16.v8f16" + )] + fn _vminnmvq_f16(a: float16x8_t) -> f16; + } + _vminnmvq_f16(a) +} #[doc = "Floating-point minimum number across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f32)"] #[doc = "## Safety"] @@ -11592,6 +14499,42 @@ pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 { } _vminnmvq_f32(a) } +#[doc = "Floating-point minimum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminv))] +pub unsafe fn vminv_f16(a: float16x4_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminv.f16.v4f16" + )] + fn _vminv_f16(a: float16x4_t) -> f16; + } + _vminv_f16(a) +} +#[doc = "Floating-point minimum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminv))] +pub unsafe fn vminvq_f16(a: float16x8_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminv.f16.v8f16" + )] + fn _vminvq_f16(a: float16x8_t) -> f16; + } + _vminvq_f16(a) +} #[doc = "Horizontal vector min."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_f32)"] #[doc = "## Safety"] @@ -12770,6 +15713,51 @@ pub unsafe fn vmul_lane_f64(a: float64x1_t, b: float64x1_t) -> simd_mul(a, transmute::(simd_extract!(b, LANE as u32))) } #[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmul_laneq_f16(a: float16x4_t, b: float16x8_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 3); + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulq_laneq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_mul( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) +} +#[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -12818,6 +15806,45 @@ pub unsafe fn vmuld_lane_f64(a: f64, b: float64x1_t) -> f64 { let b: f64 = simd_extract!(b, LANE as u32); a * b } +#[doc = "Add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vmulh_f16(a: f16, b: f16) -> f16 { + a * b +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulh_lane_f16(a: f16, b: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + let b: f16 = simd_extract!(b, LANE as u32); + a * b +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulh_laneq_f16(a: f16, b: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + let b: f16 = simd_extract!(b, LANE as u32); + a * b +} #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s16)"] #[doc = "## Safety"] @@ -13231,6 +16258,42 @@ pub unsafe fn vmuld_laneq_f64(a: f64, b: float64x2_t) -> f64 { a * b } #[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmulx))] +pub unsafe fn vmulx_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmulx.v4f16" + )] + fn _vmulx_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vmulx_f16(a, b) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmulx))] +pub unsafe fn vmulxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmulx.v8f16" + )] + fn _vmulxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vmulxq_f16(a, b) +} +#[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -13303,6 +16366,96 @@ pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { _vmulxq_f64(a, b) } #[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulx_lane_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + vmulx_f16( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulx_laneq_f16(a: float16x4_t, b: float16x8_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 3); + vmulx_f16( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxq_lane_f16(a: float16x8_t, b: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + vmulxq_f16( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxq_laneq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + vmulxq_f16( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) +} +#[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -13399,6 +16552,28 @@ pub unsafe fn vmulx_laneq_f64(a: float64x1_t, b: float64x2_t) - static_assert_uimm_bits!(LANE, 1); vmulx_f64(a, transmute::(simd_extract!(b, LANE as u32))) } +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulx_n_f16(a: float16x4_t, b: f16) -> float16x4_t { + vmulx_f16(a, vdup_n_f16(b)) +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { + vmulxq_f16(a, vdupq_n_f16(b)) +} #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_f64)"] #[doc = "## Safety"] @@ -13488,6 +16663,50 @@ pub unsafe fn vmulxs_laneq_f32(a: f32, b: float32x4_t) -> f32 { vmulxs_f32(a, simd_extract!(b, LANE as u32)) } #[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmulx))] +pub unsafe fn vmulxh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmulx.f16" + )] + fn _vmulxh_f16(a: f16, b: f16) -> f16; + } + _vmulxh_f16(a, b) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxh_lane_f16(a: f16, b: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + vmulxh_f16(a, simd_extract!(b, LANE as u32)) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxh_laneq_f16(a: f16, b: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + vmulxh_f16(a, simd_extract!(b, LANE as u32)) +} +#[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -13555,6 +16774,17 @@ pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t { pub unsafe fn vnegd_s64(a: i64) -> i64 { a.wrapping_neg() } +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fneg))] +pub unsafe fn vnegh_f16(a: f16) -> f16 { + -a +} #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_f64)"] #[doc = "## Safety"] @@ -13618,6 +16848,24 @@ pub unsafe fn vpaddd_u64(a: uint64x2_t) -> u64 { vaddvq_u64(a) } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(faddp))] +pub unsafe fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.faddp.v8f16" + )] + fn _vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vpaddq_f16(a, b) +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -13837,6 +17085,78 @@ pub unsafe fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { let ret_val: uint64x2_t = transmute(vpaddq_s64(transmute(a), transmute(b))); simd_shuffle!(ret_val, ret_val, [1, 0]) } +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxp))] +pub unsafe fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v4f16" + )] + fn _vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vpmax_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxp))] +pub unsafe fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v8f16" + )] + fn _vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vpmaxq_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub unsafe fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v4f16" + )] + fn _vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vpmaxnm_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub unsafe fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v8f16" + )] + fn _vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vpmaxnmq_f16(a, b) +} #[doc = "Floating-point Maximum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"] #[doc = "## Safety"] @@ -14107,6 +17427,78 @@ pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 { } _vpmaxs_f32(a) } +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminp))] +pub unsafe fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v4f16" + )] + fn _vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vpmin_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminp))] +pub unsafe fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v8f16" + )] + fn _vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vpminq_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnmp))] +pub unsafe fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v4f16" + )] + fn _vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vpminnm_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnmp))] +pub unsafe fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v8f16" + )] + fn _vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vpminnmq_f16(a, b) +} #[doc = "Floating-point Minimum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"] #[doc = "## Safety"] @@ -19520,6 +22912,24 @@ pub unsafe fn vrecpes_f32(a: f32) -> f32 { } _vrecpes_f32(a) } +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(frecpe))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpeh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.f16" + )] + fn _vrecpeh_f16(a: f16) -> f16; + } + _vrecpeh_f16(a) +} #[doc = "Floating-point reciprocal step"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f64)"] #[doc = "## Safety"] @@ -19592,6 +23002,24 @@ pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 { } _vrecpss_f32(a, b) } +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(frecps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpsh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.f16" + )] + fn _vrecpsh_f16(a: f16, b: f16) -> f16; + } + _vrecpsh_f16(a, b) +} #[doc = "Floating-point reciprocal exponent"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxd_f64)"] #[doc = "## Safety"] @@ -19628,6 +23056,126 @@ pub unsafe fn vrecpxs_f32(a: f32) -> f32 { } _vrecpxs_f32(a) } +#[doc = "Floating-point reciprocal exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(frecpx))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpxh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpx.f16" + )] + fn _vrecpxh_f16(a: f16) -> f16; + } + _vrecpxh_f16(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] #[doc = "## Safety"] @@ -21369,6 +24917,42 @@ pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t { transmute(_vrnd64z_f64(simd_extract!(a, 0))) } #[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrnd_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.trunc.v4f16" + )] + fn _vrnd_f16(a: float16x4_t) -> float16x4_t; + } + _vrnd_f16(a) +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrndq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.trunc.v8f16" + )] + fn _vrndq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndq_f16(a) +} +#[doc = "Floating-point round to integral, toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21441,6 +25025,42 @@ pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t { _vrndq_f64(a) } #[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrnda_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.round.v4f16" + )] + fn _vrnda_f16(a: float16x4_t) -> float16x4_t; + } + _vrnda_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrndaq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.round.v8f16" + )] + fn _vrndaq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndaq_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21512,6 +25132,78 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t { } _vrndaq_f64(a) } +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndah_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrndah_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.round.f16" + )] + fn _vrndah_f16(a: f16) -> f16; + } + _vrndah_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrndh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.trunc.f16" + )] + fn _vrndh_f16(a: f16) -> f16; + } + _vrndh_f16(a) +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndi_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v4f16" + )] + fn _vrndi_f16(a: float16x4_t) -> float16x4_t; + } + _vrndi_f16(a) +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndiq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v8f16" + )] + fn _vrndiq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndiq_f16(a) +} #[doc = "Floating-point round to integral, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"] #[doc = "## Safety"] @@ -21584,6 +25276,60 @@ pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t { } _vrndiq_f64(a) } +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndih_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndih_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.f16" + )] + fn _vrndih_f16(a: f16) -> f16; + } + _vrndih_f16(a) +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndm_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.floor.v4f16" + )] + fn _vrndm_f16(a: float16x4_t) -> float16x4_t; + } + _vrndm_f16(a) +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndmq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.floor.v8f16" + )] + fn _vrndmq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndmq_f16(a) +} #[doc = "Floating-point round to integral, toward minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"] #[doc = "## Safety"] @@ -21656,6 +25402,24 @@ pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t { } _vrndmq_f64(a) } +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndmh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.floor.f16" + )] + fn _vrndmh_f16(a: f16) -> f16; + } + _vrndmh_f16(a) +} #[doc = "Floating-point round to integral, to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"] #[doc = "## Safety"] @@ -21692,6 +25456,24 @@ pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t { } _vrndnq_f64(a) } +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintn))] +pub unsafe fn vrndnh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.f16" + )] + fn _vrndnh_f16(a: f16) -> f16; + } + _vrndnh_f16(a) +} #[doc = "Floating-point round to integral, to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"] #[doc = "## Safety"] @@ -21711,6 +25493,42 @@ pub unsafe fn vrndns_f32(a: f32) -> f32 { _vrndns_f32(a) } #[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndp_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.ceil.v4f16" + )] + fn _vrndp_f16(a: float16x4_t) -> float16x4_t; + } + _vrndp_f16(a) +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndpq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.ceil.v8f16" + )] + fn _vrndpq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndpq_f16(a) +} +#[doc = "Floating-point round to integral, toward plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21782,6 +25600,60 @@ pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t { } _vrndpq_f64(a) } +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndph_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndph_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.ceil.f16" + )] + fn _vrndph_f16(a: f16) -> f16; + } + _vrndph_f16(a) +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndx_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.rint.v4f16" + )] + fn _vrndx_f16(a: float16x4_t) -> float16x4_t; + } + _vrndx_f16(a) +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndxq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.rint.v8f16" + )] + fn _vrndxq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndxq_f16(a) +} #[doc = "Floating-point round to integral exact, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"] #[doc = "## Safety"] @@ -21854,6 +25726,24 @@ pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t { } _vrndxq_f64(a) } +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndxh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.rint.f16" + )] + fn _vrndxh_f16(a: f16) -> f16; + } + _vrndxh_f16(a) +} #[doc = "Signed rounding shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"] #[doc = "## Safety"] @@ -22074,6 +25964,25 @@ pub unsafe fn vrsqrtes_f32(a: f32) -> f32 { } _vrsqrtes_f32(a) } +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(frsqrte))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrteh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.f16" + )] + fn _vrsqrteh_f16(a: f16) -> f16; + } + _vrsqrteh_f16(a) +} #[doc = "Floating-point reciprocal square root step"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"] #[doc = "## Safety"] @@ -22146,6 +26055,24 @@ pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 { } _vrsqrtss_f32(a, b) } +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrtsh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.f16" + )] + fn _vrsqrtsh_f16(a: f16, b: f16) -> f16; + } + _vrsqrtsh_f16(a, b) +} #[doc = "Signed rounding shift right and accumulate."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"] #[doc = "## Safety"] @@ -23293,6 +27220,28 @@ pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 { _vsqadds_u32(a.as_signed(), b).as_unsigned() } #[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fsqrt))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsqrt_f16(a: float16x4_t) -> float16x4_t { + simd_fsqrt(a) +} +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fsqrt))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsqrtq_f16(a: float16x8_t) -> float16x8_t { + simd_fsqrt(a) +} +#[doc = "Calculates the square root of each lane."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -23336,6 +27285,24 @@ pub unsafe fn vsqrt_f64(a: float64x1_t) -> float64x1_t { pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t { simd_fsqrt(a) } +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fsqrt))] +pub unsafe fn vsqrth_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.sqrt.f16" + )] + fn _vsqrth_f16(a: f16) -> f16; + } + _vsqrth_f16(a) +} #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] #[doc = "## Safety"] @@ -23705,6 +27672,30 @@ pub unsafe fn vsrid_n_u64(a: u64, b: u64) -> u64 { transmute(vsri_n_u64::(transmute(a), transmute(b))) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -25003,6 +28994,17 @@ pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 { pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 { a.wrapping_sub(b) } +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vsubh_f16(a: f16, b: f16) -> f16 { + a - b +} #[doc = "Signed Subtract Long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"] #[doc = "## Safety"] @@ -25821,6 +29823,28 @@ pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } #[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] +pub unsafe fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [0, 4, 2, 6]) +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] +pub unsafe fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) +} +#[doc = "Transpose vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -26075,6 +30099,28 @@ pub unsafe fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } #[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] +pub unsafe fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [1, 5, 3, 7]) +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] +pub unsafe fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) +} +#[doc = "Transpose vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -26669,6 +30715,28 @@ pub unsafe fn vusdotq_laneq_s32( vusdotq_s32(a, b, transmute(c)) } #[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] +pub unsafe fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [0, 2, 4, 6]) +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] +pub unsafe fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) +} +#[doc = "Unzip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -26923,6 +30991,28 @@ pub unsafe fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } #[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] +pub unsafe fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [1, 3, 5, 7]) +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] +pub unsafe fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) +} +#[doc = "Unzip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -27197,6 +31287,28 @@ pub unsafe fn vxarq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64 _vxarq_u64(a.as_signed(), b.as_signed(), IMM6 as i64).as_unsigned() } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] +pub unsafe fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [0, 4, 1, 5]) +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] +pub unsafe fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -27451,6 +31563,28 @@ pub unsafe fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { simd_shuffle!(a, b, [0, 2]) } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +pub unsafe fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [2, 6, 3, 7]) +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +pub unsafe fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index 868cb1937b55..522388b62703 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -594,6 +594,54 @@ pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2 simd_add(a, simd_cast(d)) } #[doc = "Absolute difference between the arguments of Floating"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fabd.v4f16" + )] + fn _vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vabd_f16(a, b) +} +#[doc = "Absolute difference between the arguments of Floating"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabdq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fabd.v8f16" + )] + fn _vabdq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vabdq_f16(a, b) +} +#[doc = "Absolute difference between the arguments of Floating"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1169,6 +1217,38 @@ pub unsafe fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { simd_cast(vabd_u32(a, b)) } #[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabs_f16(a: float16x4_t) -> float16x4_t { + simd_fabs(a) +} +#[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabsq_f16(a: float16x8_t) -> float16x8_t { + simd_fabs(a) +} +#[doc = "Floating-point absolute value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1400,6 +1480,54 @@ pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t { } _vabsq_s32(a) } +#[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabsh_f16(a: f16) -> f16 { + simd_extract!(vabs_f16(vdup_n_f16(a)), 0) +} +#[doc = "Floating-point Add (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vadd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_add(a, b) +} +#[doc = "Floating-point Add (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vadd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_add(a, b) +} #[doc = "Bitwise exclusive OR"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p8)"] #[doc = "## Safety"] @@ -1538,6 +1666,22 @@ pub unsafe fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { pub unsafe fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { simd_xor(a, b) } +#[doc = "Add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vadd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vaddh_f16(a: f16, b: f16) -> f16 { + a + b +} #[doc = "Bitwise exclusive OR"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p128)"] #[doc = "## Safety"] @@ -2038,6 +2182,54 @@ pub unsafe fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { simd_and(a, b) } #[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcage_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i16.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.v4i16.v4f16" + )] + fn _vcage_f16(a: float16x4_t, b: float16x4_t) -> int16x4_t; + } + _vcage_f16(a, b).as_unsigned() +} +#[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcageq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v8i16.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.v8i16.v8f16" + )] + fn _vcageq_f16(a: float16x8_t, b: float16x8_t) -> int16x8_t; + } + _vcageq_f16(a, b).as_unsigned() +} +#[doc = "Floating-point absolute compare greater than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2100,6 +2292,54 @@ pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { _vcageq_f32(a, b).as_unsigned() } #[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcagt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i16.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.v4i16.v4f16" + )] + fn _vcagt_f16(a: float16x4_t, b: float16x4_t) -> int16x4_t; + } + _vcagt_f16(a, b).as_unsigned() +} +#[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcagtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v8i16.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.v8i16.v8f16" + )] + fn _vcagtq_f16(a: float16x8_t, b: float16x8_t) -> int16x8_t; + } + _vcagtq_f16(a, b).as_unsigned() +} +#[doc = "Floating-point absolute compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2162,6 +2402,38 @@ pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { _vcagtq_f32(a, b).as_unsigned() } #[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcale_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + vcage_f16(b, a) +} +#[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcaleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + vcageq_f16(b, a) +} +#[doc = "Floating-point absolute compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2208,6 +2480,38 @@ pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { vcageq_f32(b, a) } #[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcalt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + vcagt_f16(b, a) +} +#[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcaltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + vcagtq_f16(b, a) +} +#[doc = "Floating-point absolute compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2254,6 +2558,38 @@ pub unsafe fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { vcagtq_f32(b, a) } #[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmeq) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceq_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_eq(a, b) +} +#[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmeq) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_eq(a, b) +} +#[doc = "Floating-point compare equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2622,6 +2958,38 @@ pub unsafe fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { simd_eq(a, b) } #[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcge_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_ge(a, b) +} +#[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgeq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_ge(a, b) +} +#[doc = "Floating-point compare greater than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2943,6 +3311,72 @@ pub unsafe fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { pub unsafe fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { simd_ge(a, b) } +#[doc = "Floating-point compare greater than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgez_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_ge(a, transmute(b)) +} +#[doc = "Floating-point compare greater than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgezq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_ge(a, transmute(b)) +} +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_gt(a, b) +} +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_gt(a, b) +} #[doc = "Floating-point compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f32)"] #[doc = "## Safety"] @@ -3265,6 +3699,72 @@ pub unsafe fn vcgt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { pub unsafe fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { simd_gt(a, b) } +#[doc = "Floating-point compare greater than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgtz_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_gt(a, transmute(b)) +} +#[doc = "Floating-point compare greater than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgtzq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_gt(a, transmute(b)) +} +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcle_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_le(a, b) +} +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_le(a, b) +} #[doc = "Floating-point compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f32)"] #[doc = "## Safety"] @@ -3587,6 +4087,40 @@ pub unsafe fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { pub unsafe fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { simd_le(a, b) } +#[doc = "Floating-point compare less than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcle.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmle) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclez_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_le(a, transmute(b)) +} +#[doc = "Floating-point compare less than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcle.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmle) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclezq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_le(a, transmute(b)) +} #[doc = "Count leading sign bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s8)"] #[doc = "## Safety"] @@ -3912,6 +4446,38 @@ pub unsafe fn vclsq_u32(a: uint32x4_t) -> int32x4_t { vclsq_s32(transmute(a)) } #[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_lt(a, b) +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_lt(a, b) +} +#[doc = "Floating-point compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -4233,6 +4799,40 @@ pub unsafe fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { simd_lt(a, b) } +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmlt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcltz_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_lt(a, transmute(b)) +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmlt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcltzq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_lt(a, transmute(b)) +} #[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s8)"] #[doc = "## Safety"] @@ -4993,7 +5593,19 @@ pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] ) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) +} +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5012,7 +5624,7 @@ pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { pub unsafe fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { simd_shuffle!(a, b, [0, 1, 2, 3]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5031,7 +5643,7 @@ pub unsafe fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { pub unsafe fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5050,7 +5662,7 @@ pub unsafe fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { pub unsafe fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5069,7 +5681,7 @@ pub unsafe fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t { pub unsafe fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t { simd_shuffle!(a, b, [0, 1, 2, 3]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5088,7 +5700,7 @@ pub unsafe fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t { pub unsafe fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { simd_shuffle!(a, b, [0, 1]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5107,7 +5719,7 @@ pub unsafe fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { pub unsafe fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5126,7 +5738,7 @@ pub unsafe fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { pub unsafe fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5145,7 +5757,7 @@ pub unsafe fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { pub unsafe fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { simd_shuffle!(a, b, [0, 1, 2, 3]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5164,7 +5776,7 @@ pub unsafe fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { pub unsafe fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { simd_shuffle!(a, b, [0, 1]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5183,7 +5795,7 @@ pub unsafe fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { pub unsafe fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5202,7 +5814,7 @@ pub unsafe fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { pub unsafe fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5222,6 +5834,41 @@ pub unsafe fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t { simd_shuffle!(a, b, [0, 1]) } #[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcreate_f16(a: u64) -> float16x4_t { + transmute(a) +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcreate_f16(a: u64) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5731,6 +6378,102 @@ pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t { pub unsafe fn vcreate_p64(a: u64) -> poly64x1_t { transmute(a) } +#[doc = "Floating-point convert to lower precision narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +# [cfg_attr (all (test , target_arch = "arm") , assert_instr (vcvt . f16 . f32))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_f16_f32(a: float32x4_t) -> float16x4_t { + simd_cast(a) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_f16_s16(a: int16x4_t) -> float16x4_t { + simd_cast(a) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_f16_s16(a: int16x8_t) -> float16x8_t { + simd_cast(a) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_f16_u16(a: uint16x4_t) -> float16x4_t { + simd_cast(a) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_f16_u16(a: uint16x8_t) -> float16x8_t { + simd_cast(a) +} +#[doc = "Floating-point convert to higher precision long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtl) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_f32_f16(a: float16x4_t) -> float32x4_t { + simd_cast(a) +} #[doc = "Fixed-point convert to floating-point"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_s32)"] #[doc = "## Safety"] @@ -5824,6 +6567,122 @@ pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t { simd_cast(a) } #[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_n_f16_s16(a: int16x4_t) -> float16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f16.v4i16" + )] + fn _vcvt_n_f16_s16(a: int16x4_t, n: i32) -> float16x4_t; + } + _vcvt_n_f16_s16(a, N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_n_f16_s16(a: int16x8_t) -> float16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.v8f16.v8i16" + )] + fn _vcvtq_n_f16_s16(a: int16x8_t, n: i32) -> float16x8_t; + } + _vcvtq_n_f16_s16(a, N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_n_f16_u16(a: uint16x4_t) -> float16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f16.v4i16" + )] + fn _vcvt_n_f16_u16(a: int16x4_t, n: i32) -> float16x4_t; + } + _vcvt_n_f16_u16(a.as_signed(), N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_n_f16_u16(a: uint16x8_t) -> float16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.v8f16.v8i16" + )] + fn _vcvtq_n_f16_u16(a: int16x8_t, n: i32) -> float16x8_t; + } + _vcvtq_n_f16_u16(a.as_signed(), N) +} +#[doc = "Fixed-point convert to floating-point"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5991,6 +6850,64 @@ pub unsafe fn vcvtq_n_f32_u32(a: uint32x4_t) -> float32x4_t { } _vcvtq_n_f32_u32(a.as_signed(), N) } +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_n_s16_f16(a: float16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i16.v4f16" + )] + fn _vcvt_n_s16_f16(a: float16x4_t, n: i32) -> int16x4_t; + } + _vcvt_n_s16_f16(a, N) +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_n_s16_f16(a: float16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.v8i16.v8f16" + )] + fn _vcvtq_n_s16_f16(a: float16x8_t, n: i32) -> int16x8_t; + } + _vcvtq_n_s16_f16(a, N) +} #[doc = "Floating-point convert to fixed-point, rounding toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"] #[doc = "## Safety"] @@ -6075,6 +6992,64 @@ pub unsafe fn vcvtq_n_s32_f32(a: float32x4_t) -> int32x4_t { } _vcvtq_n_s32_f32(a, N) } +#[doc = "Fixed-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_n_u16_f16(a: float16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i16.v4f16" + )] + fn _vcvt_n_u16_f16(a: float16x4_t, n: i32) -> int16x4_t; + } + _vcvt_n_u16_f16(a, N).as_unsigned() +} +#[doc = "Fixed-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_n_u16_f16(a: float16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.v8i16.v8f16" + )] + fn _vcvtq_n_u16_f16(a: float16x8_t, n: i32) -> int16x8_t; + } + _vcvtq_n_u16_f16(a, N).as_unsigned() +} #[doc = "Floating-point convert to fixed-point, rounding toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"] #[doc = "## Safety"] @@ -6160,6 +7135,38 @@ pub unsafe fn vcvtq_n_u32_f32(a: float32x4_t) -> uint32x4_t { _vcvtq_n_u32_f32(a, N).as_unsigned() } #[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_s16_f16(a: float16x4_t) -> int16x4_t { + simd_cast(a) +} +#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_s16_f16(a: float16x8_t) -> int16x8_t { + simd_cast(a) +} +#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -6222,6 +7229,38 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { _vcvtq_s32_f32(a) } #[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_u16_f16(a: float16x4_t) -> uint16x4_t { + simd_cast(a) +} +#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_u16_f16(a: float16x8_t) -> uint16x8_t { + simd_cast(a) +} +#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -6528,6 +7567,46 @@ pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4 _vdotq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned() } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdupq_lane_f16(a: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(N, 2); + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7073,6 +8152,46 @@ pub unsafe fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { a } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { + static_assert_uimm_bits!(N, 3); + simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7617,6 +8736,38 @@ pub unsafe fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { static_assert_uimm_bits!(N, 1); transmute::(simd_extract!(a, N as u32)) } +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdup_n_f16(a: f16) -> float16x4_t { + float16x4_t::splat(a) +} +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdupq_n_f16(a: f16) -> float16x8_t { + float16x8_t::splat(a) +} #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"] #[doc = "## Safety"] @@ -8086,6 +9237,30 @@ pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { simd_xor(a, b) } #[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + match N & 0b11 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} +#[doc = "Extract vector from pair of vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8383,6 +9558,34 @@ pub unsafe fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_ } } #[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + match N & 0b111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} +#[doc = "Extract vector from pair of vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8935,6 +10138,48 @@ pub unsafe fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t _ => unreachable_unchecked(), } } +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f16")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f16")] + fn _vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vfma_f16(b, c, a) +} +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v8f16")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v8f16")] + fn _vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vfmaq_f16(b, c, a) +} #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"] #[doc = "## Safety"] @@ -9038,6 +10283,42 @@ pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t vfmaq_f32(a, b, vdupq_n_f32_vfp4(c)) } #[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + let b: float16x4_t = simd_neg(b); + vfma_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + let b: float16x8_t = simd_neg(b); + vfmaq_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -9131,6 +10412,66 @@ pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { vfmsq_f32(a, b, vdupq_n_f32_vfp4(c)) } +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vget_high_f16(a: float16x8_t) -> float16x4_t { + simd_shuffle!(a, a, [4, 5, 6, 7]) +} +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vget_low_f16(a: float16x8_t) -> float16x4_t { + simd_shuffle!(a, a, [0, 1, 2, 3]) +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vget_lane_f16(a: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + simd_extract!(a, LANE as u32) +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vgetq_lane_f16(a: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + simd_extract!(a, LANE as u32) +} #[doc = "Halving add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"] #[doc = "## Safety"] @@ -9875,6 +11216,254 @@ pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { } _vhsubq_u32(a.as_signed(), b.as_signed()).as_unsigned() } +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t { + let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t { + let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + transmute(vld1_v4f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + let ret_val: float16x4_t = transmute(vld1_v4f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + transmute(vld1q_v8f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + let ret_val: float16x8_t = transmute(vld1q_v8f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v4f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f16.p0f16")] + fn _vld1_f16_x2(a: *const f16) -> float16x4x2_t; + } + _vld1_f16_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v4f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f16.p0f16")] + fn _vld1_f16_x3(a: *const f16) -> float16x4x3_t; + } + _vld1_f16_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v4f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f16.p0f16")] + fn _vld1_f16_x4(a: *const f16) -> float16x4x4_t; + } + _vld1_f16_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v8f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8f16.p0f16")] + fn _vld1q_f16_x2(a: *const f16) -> float16x8x2_t; + } + _vld1q_f16_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v8f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8f16.p0f16")] + fn _vld1q_f16_x3(a: *const f16) -> float16x8x3_t; + } + _vld1q_f16_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v8f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8f16.p0f16")] + fn _vld1q_f16_x4(a: *const f16) -> float16x8x4_t; + } + _vld1q_f16_x4(a) +} #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] #[doc = "## Safety"] @@ -10546,6 +12135,42 @@ pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t { } _vld1q_f32_x4(a) } +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_lane_f16(ptr: *const f16, src: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_lane_f16(ptr: *const f16, src: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"] #[doc = "## Safety"] @@ -13636,6 +15261,118 @@ unsafe fn vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t { } _vld1q_v8i16(a, b) } +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")] + fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t; + } + _vld1_v4f16(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")] + fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t; + } + _vld1q_v8f16(a, b) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0f16")] + fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t; + } + _vld2_dup_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0f16")] + fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t; + } + _vld2q_dup_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v4f16.p0f16" + )] + fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t; + } + _vld2_dup_f16(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v8f16.p0f16" + )] + fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t; + } + _vld2q_dup_f16(a as _) +} #[doc = "Load single 2-element structure and replicate to all lanes of two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] #[doc = "## Safety"] @@ -14523,6 +16260,84 @@ pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); ret_val } +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0f16")] + fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t; + } + _vld2_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0f16")] + fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t; + } + _vld2q_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v4f16.p0f16" + )] + fn _vld2_f16(ptr: *const f16) -> float16x4x2_t; + } + _vld2_f16(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v8f16.p0f16" + )] + fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t; + } + _vld2q_f16(a as _) +} #[doc = "Load multiple 2-element structures to two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] #[doc = "## Safety"] @@ -14804,6 +16619,110 @@ pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { _vld2q_s32(a as _) } #[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")] + fn _vld2_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x2_t; + } + _vld2_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")] + fn _vld2q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x2_t; + } + _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0" + )] + fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16) + -> float16x4x2_t; + } + _vld2_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0" + )] + fn _vld2q_lane_f16( + a: float16x8_t, + b: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x2_t; + } + _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -15921,6 +17840,84 @@ pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t { ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); ret_val } +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0f16")] + fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t; + } + _vld3_dup_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0f16")] + fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t; + } + _vld3q_dup_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v4f16.p0f16" + )] + fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t; + } + _vld3_dup_f16(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v8f16.p0f16" + )] + fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t; + } + _vld3q_dup_f16(a as _) +} #[doc = "Load single 3-element structure and replicate to all lanes of three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] #[doc = "## Safety"] @@ -16826,6 +18823,84 @@ pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); ret_val } +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0f16")] + fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t; + } + _vld3_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0f16")] + fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t; + } + _vld3q_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v4f16.p0f16" + )] + fn _vld3_f16(ptr: *const f16) -> float16x4x3_t; + } + _vld3_f16(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v8f16.p0f16" + )] + fn _vld3q_f16(ptr: *const f16) -> float16x8x3_t; + } + _vld3q_f16(a as _) +} #[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] #[doc = "## Safety"] @@ -17106,6 +19181,118 @@ pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { } _vld3q_s32(a as *const i8, 4) } +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")] + fn _vld3_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x3_t; + } + _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")] + fn _vld3q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x3_t; + } + _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0" + )] + fn _vld3_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x3_t; + } + _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0" + )] + fn _vld3q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x3_t; + } + _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} #[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] #[doc = "## Safety"] @@ -18295,6 +20482,84 @@ pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) - } _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) } +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0f16")] + fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + } + _vld4_dup_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0f16")] + fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + } + _vld4q_dup_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4f16.p0f16" + )] + fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t; + } + _vld4_dup_f16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8f16.p0f16" + )] + fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t; + } + _vld4q_dup_f16(a as _) +} #[doc = "Load single 4-element structure and replicate to all lanes of four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] #[doc = "## Safety"] @@ -19218,6 +21483,84 @@ pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); ret_val } +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0f16")] + fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + } + _vld4_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0f16")] + fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + } + _vld4q_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v4f16.p0f16" + )] + fn _vld4_f16(ptr: *const f16) -> float16x4x4_t; + } + _vld4_f16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v8f16.p0f16" + )] + fn _vld4q_f16(ptr: *const f16) -> float16x8x4_t; + } + _vld4q_f16(a as _) +} #[doc = "Load multiple 4-element structures to four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] #[doc = "## Safety"] @@ -19498,6 +21841,122 @@ pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { } _vld4q_s32(a as *const i8, 4) } +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")] + fn _vld4_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x4_t; + } + _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")] + fn _vld4q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0" + )] + fn _vld4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x4_t; + } + _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0" + )] + fn _vld4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} #[doc = "Load multiple 4-element structures to four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] #[doc = "## Safety"] @@ -20720,6 +23179,54 @@ pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t { ret_val } #[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmax) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v4f16" + )] + fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vmax_f16(a, b) +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmax) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v8f16" + )] + fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vmaxq_f16(a, b) +} +#[doc = "Maximum (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21154,6 +23661,54 @@ pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { _vmaxq_u32(a.as_signed(), b.as_signed()).as_unsigned() } #[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmaxnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v4f16" + )] + fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vmaxnm_f16(a, b) +} +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmaxnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v8f16" + )] + fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vmaxnmq_f16(a, b) +} +#[doc = "Floating-point Maximum Number (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21216,6 +23771,54 @@ pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { _vmaxnmq_f32(a, b) } #[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmin) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v4f16" + )] + fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vmin_f16(a, b) +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmin) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v8f16" + )] + fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vminq_f16(a, b) +} +#[doc = "Minimum (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21650,6 +24253,54 @@ pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { _vminq_u32(a.as_signed(), b.as_signed()).as_unsigned() } #[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v4f16" + )] + fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vminnm_f16(a, b) +} +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v8f16" + )] + fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vminnmq_f16(a, b) +} +#[doc = "Floating-point Minimum Number (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -25209,6 +27860,70 @@ pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x } _vmmlaq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned() } +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmov_n_f16(a: f16) -> float16x4_t { + vdup_n_f16(a) +} +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmovq_n_f16(a: f16) -> float16x8_t { + vdupq_n_f16(a) +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_mul(a, b) +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_mul(a, b) +} #[doc = "Multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"] #[doc = "## Safety"] @@ -25255,6 +27970,61 @@ pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { simd_mul(a, b) } +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmul_lane_f16(a: float16x4_t, v: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_mul( + a, + simd_shuffle!(v, v, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulq_lane_f16(a: float16x8_t, v: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + simd_mul( + a, + simd_shuffle!( + v, + v, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) +} #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"] #[doc = "## Safety"] @@ -25850,6 +28620,38 @@ pub unsafe fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> ) } #[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t { + simd_mul(a, vdup_n_f16(b)) +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { + simd_mul(a, vdupq_n_f16(b)) +} +#[doc = "Vector multiply by scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -26939,6 +29741,38 @@ pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { _vmull_u32(a.as_signed(), b.as_signed()).as_unsigned() } #[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fneg) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vneg_f16(a: float16x4_t) -> float16x4_t { + simd_neg(a) +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fneg) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vnegq_f16(a: float16x8_t) -> float16x8_t { + simd_neg(a) +} +#[doc = "Negate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -27875,6 +30709,30 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { x } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(faddp) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.faddp.v4f16" + )] + fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vpadd_f16(a, b) +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -34777,6 +37635,54 @@ pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { simd_shuffle!(ret_val, ret_val, [1, 0]) } #[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpe_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v4f16" + )] + fn _vrecpe_f16(a: float16x4_t) -> float16x4_t; + } + _vrecpe_f16(a) +} +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpeq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v8f16" + )] + fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t; + } + _vrecpeq_f16(a) +} +#[doc = "Reciprocal estimate."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -34901,6 +37807,54 @@ pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t { _vrecpeq_u32(a.as_signed()).as_unsigned() } #[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v4f16" + )] + fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vrecps_f16(a, b) +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v8f16" + )] + fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vrecpsq_f16(a, b) +} +#[doc = "Floating-point reciprocal step"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -34963,6 +37917,1810 @@ pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { _vrecpsq_f32(a, b) } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { + let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -48776,6 +53534,38 @@ pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { let ret_val: poly16x8_t = transmute(a); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrev64_f16(a: float16x4_t) -> float16x4_t { + simd_shuffle!(a, a, [3, 2, 1, 0]) +} +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrev64q_f16(a: float16x8_t) -> float16x8_t { + simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) +} #[doc = "Rounding halving add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"] #[doc = "## Safety"] @@ -49149,6 +53939,54 @@ pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { _vrhaddq_u32(a.as_signed(), b.as_signed()).as_unsigned() } #[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frintn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrndn_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frintn.v4f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f16")] + fn _vrndn_f16(a: float16x4_t) -> float16x4_t; + } + _vrndn_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frintn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrndnq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frintn.v8f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v8f16")] + fn _vrndnq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndnq_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -50310,6 +55148,54 @@ pub unsafe fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { transmute(vrshrn_n_s64::(transmute(a))) } #[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrte) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrte_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v4f16" + )] + fn _vrsqrte_f16(a: float16x4_t) -> float16x4_t; + } + _vrsqrte_f16(a) +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrte) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v8f16" + )] + fn _vrsqrteq_f16(a: float16x8_t) -> float16x8_t; + } + _vrsqrteq_f16(a) +} +#[doc = "Reciprocal square-root estimate."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -50434,6 +55320,54 @@ pub unsafe fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t { _vrsqrteq_u32(a.as_signed()).as_unsigned() } #[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrts) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v4f16" + )] + fn _vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vrsqrts_f16(a, b) +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrts) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v8f16" + )] + fn _vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vrsqrtsq_f16(a, b) +} +#[doc = "Floating-point reciprocal square root step"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -51142,6 +56076,42 @@ pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { simd_shuffle!(ret_val, ret_val, [1, 0]) } #[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vset_lane_f16(a: f16, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(b, LANE as u32, a) +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(b, LANE as u32, a) +} +#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -54836,6 +59806,280 @@ pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x )) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { + vst1_v4f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { + vst1q_v8f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f16.v4f16")] + fn _vst1_f16_x2(ptr: *mut f16, a: float16x4_t, b: float16x4_t); + } + _vst1_f16_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f16.v8f16")] + fn _vst1q_f16_x2(ptr: *mut f16, a: float16x8_t, b: float16x8_t); + } + _vst1q_f16_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v4f16.p0f16" + )] + fn _vst1_f16_x2(a: float16x4_t, b: float16x4_t, ptr: *mut f16); + } + _vst1_f16_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v8f16.p0f16" + )] + fn _vst1q_f16_x2(a: float16x8_t, b: float16x8_t, ptr: *mut f16); + } + _vst1q_f16_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f16.v4f16")] + fn _vst1_f16_x3(ptr: *mut f16, a: float16x4_t, b: float16x4_t, c: float16x4_t); + } + _vst1_f16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f16.v8f16")] + fn _vst1q_f16_x3(ptr: *mut f16, a: float16x8_t, b: float16x8_t, c: float16x8_t); + } + _vst1q_f16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4f16.p0f16" + )] + fn _vst1_f16_x3(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut f16); + } + _vst1_f16_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v8f16.p0f16" + )] + fn _vst1q_f16_x3(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut f16); + } + _vst1q_f16_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f16.v4f16")] + fn _vst1_f16_x4( + ptr: *mut f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ); + } + _vst1_f16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f16.v8f16")] + fn _vst1q_f16_x4( + ptr: *mut f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ); + } + _vst1q_f16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4f16.p0f16" + )] + fn _vst1_f16_x4( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ptr: *mut f16, + ); + } + _vst1_f16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8f16.p0f16" + )] + fn _vst1q_f16_x4( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ptr: *mut f16, + ); + } + _vst1q_f16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -55286,38 +60530,6 @@ pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst1))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.v2f32.p0")] - fn _vst1_f32_x3(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t); - } - _vst1_f32_x3(a, b.0, b.1, b.2) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst1))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.v4f32.p0")] - fn _vst1q_f32_x3(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t); - } - _vst1q_f32_x3(a, b.0, b.1, b.2) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr(test, assert_instr(st1))] @@ -55446,6 +60658,42 @@ pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { _vst1q_f32_x4(b.0, b.1, b.2, b.3, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_lane_f16(a: *mut f16, b: float16x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_lane_f16(a: *mut f16, b: float16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -57996,6 +63244,40 @@ unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32) { } _vst1q_v8i16(addr, val, align) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f16.p0")] + fn _vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32); + } + _vst1_v4f16(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8f16.p0")] + fn _vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32); + } + _vst1q_v8f16(addr, val, align) +} #[doc = "Store multiple single-element structures from one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"] #[doc = "## Safety"] @@ -58022,6 +63304,78 @@ pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { *a = simd_extract!(b, LANE as u32); } #[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v4f16.p0i8" + )] + fn _vst2_f16(a: float16x4_t, b: float16x4_t, ptr: *mut i8); + } + _vst2_f16(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v8f16.p0i8" + )] + fn _vst2q_f16(a: float16x8_t, b: float16x8_t, ptr: *mut i8); + } + _vst2q_f16(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4f16")] + fn _vst2_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, size: i32); + } + _vst2_f16(a as _, b.0, b.1, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8f16")] + fn _vst2q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, size: i32); + } + _vst2q_f16(a as _, b.0, b.1, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -58302,6 +63656,86 @@ pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { _vst2q_s32(a as _, b.0, b.1, 4) } #[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4f16.p0i8" + )] + fn _vst2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *mut i8); + } + _vst2_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8f16.p0i8" + )] + fn _vst2q_lane_f16(a: float16x8_t, b: float16x8_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4f16")] + fn _vst2_lane_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, n: i32, size: i32); + } + _vst2_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8f16")] + fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32); + } + _vst2q_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -59086,6 +64520,78 @@ pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) { vst2q_s16(transmute(a), transmute(b)) } #[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4f16")] + fn _vst3_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, size: i32); + } + _vst3_f16(a as _, b.0, b.1, b.2, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8f16")] + fn _vst3q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, size: i32); + } + _vst3q_f16(a as _, b.0, b.1, b.2, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v4f16.p0i8" + )] + fn _vst3_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut i8); + } + _vst3_f16(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v8f16.p0i8" + )] + fn _vst3q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut i8); + } + _vst3q_f16(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -59366,6 +64872,100 @@ pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { _vst3q_s32(b.0, b.1, b.2, a as _) } #[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4f16")] + fn _vst3_lane_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i32, + size: i32, + ); + } + _vst3_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8f16")] + fn _vst3q_lane_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v4f16.p0i8" + )] + fn _vst3_lane_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, n: i64, ptr: *mut i8); + } + _vst3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v8f16.p0i8" + )] + fn _vst3q_lane_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -60192,6 +65792,92 @@ pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) { vst3q_s16(transmute(a), transmute(b)) } #[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4f16")] + fn _vst4_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + size: i32, + ); + } + _vst4_f16(a as _, b.0, b.1, b.2, b.3, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8f16")] + fn _vst4q_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + size: i32, + ); + } + _vst4q_f16(a as _, b.0, b.1, b.2, b.3, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v4f16.p0i8" + )] + fn _vst4_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, d: float16x4_t, ptr: *mut i8); + } + _vst4_f16(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v8f16.p0i8" + )] + fn _vst4q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, d: float16x8_t, ptr: *mut i8); + } + _vst4q_f16(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -60521,6 +66207,116 @@ pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { _vst4q_s32(b.0, b.1, b.2, b.3, a as _) } #[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4f16")] + fn _vst4_lane_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ); + } + _vst4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8f16")] + fn _vst4q_lane_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4f16.p0i8" + )] + fn _vst4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v8f16.p0i8" + )] + fn _vst4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -61410,6 +67206,38 @@ pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) { vst4q_s16(transmute(a), transmute(b)) } #[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_sub(a, b) +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_sub(a, b) +} +#[doc = "Subtract"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -63384,6 +69212,42 @@ pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } #[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) +} +#[doc = "Transpose elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -64413,6 +70277,42 @@ pub unsafe fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4 _vusmmlaq_s32(a, b.as_signed(), c) } #[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) +} +#[doc = "Unzip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -64887,6 +70787,42 @@ pub unsafe fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { transmute((a0, b0)) } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs index 98a3a87b611a..cf24be9a747b 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs @@ -4783,6 +4783,24 @@ pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_ simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) } +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vbsl_f16(a: uint16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + let not = int16x4_t::splat(-1); + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) +} + /// Bitwise Select. #[inline] #[target_feature(enable = "neon")] @@ -5096,6 +5114,24 @@ pub unsafe fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8 )) } +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vbslq_f16(a: uint16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + let not = int16x8_t::splat(-1); + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) +} + /// Bitwise Select. (128-bit) #[inline] #[target_feature(enable = "neon")] diff --git a/library/stdarch/crates/intrinsic-test/missing_arm.txt b/library/stdarch/crates/intrinsic-test/missing_arm.txt index 5fc97acb8a6f..04c09a27d90d 100644 --- a/library/stdarch/crates/intrinsic-test/missing_arm.txt +++ b/library/stdarch/crates/intrinsic-test/missing_arm.txt @@ -108,6 +108,74 @@ vsri_n_p64 vsriq_n_p64 vtst_p64 vtstq_p64 +vaddh_f16 +vsubh_f16 +vabsh_f16 +vdivh_f16 +vmulh_f16 +vfmsh_f16 +vfmah_f16 +vminnmh_f16 +vmaxnmh_f16 +vrndh_f16 +vrndnh_f16 +vrndih_f16 +vrndah_f16 +vrndph_f16 +vrndmh_f16 +vrndxh_f16 +vsqrth_f16 +vnegh_f16 +vcvth_f16_s32 +vcvth_s32_f16 +vcvth_n_f16_s32 +vcvth_n_s32_f16 +vcvth_f16_u32 +vcvth_u32_f16 +vcvth_n_f16_u32 +vcvth_n_u32_f16 +vcvtah_s32_f16 +vcvtah_u32_f16 +vcvtmh_s32_f16 +vcvtmh_u32_f16 +vcvtpq_s16_f16 +vcvtpq_u16_f16 +vcvtp_s16_f16 +vcvtp_u16_f16 +vcvtph_s32_f16 +vcvtph_u32_f16 +vcvtnh_u32_f16 +vcvtnh_s32_f16 +vfmlsl_low_f16 +vfmlslq_low_f16 +vfmlsl_high_f16 +vfmlslq_high_f16 +vfmlsl_lane_high_f16 +vfmlsl_laneq_high_f16 +vfmlslq_lane_high_f16 +vfmlslq_laneq_high_f16 +vfmlsl_lane_low_f16 +vfmlsl_laneq_low_f16 +vfmlslq_lane_low_f16 +vfmlslq_laneq_low_f16 +vfmlal_low_f16 +vfmlalq_low_f16 +vfmlal_high_f16 +vfmlalq_high_f16 +vfmlal_lane_low_f16 +vfmlal_laneq_low_f16 +vfmlalq_lane_low_f16 +vfmlalq_laneq_low_f16 +vfmlal_lane_high_f16 +vfmlal_laneq_high_f16 +vfmlalq_lane_high_f16 +vfmlalq_laneq_high_f16 +vreinterpret_f16_p64 +vreinterpretq_f16_p64 +vreinterpret_p64_f16 +vreinterpretq_p64_f16 +vreinterpret_p128_f16 +vreinterpretq_p128_f16 # Present in Clang header but triggers an ICE due to lack of backend support. vcmla_f32 @@ -134,6 +202,31 @@ vcmlaq_rot270_laneq_f32 vcmlaq_rot90_f32 vcmlaq_rot90_lane_f32 vcmlaq_rot90_laneq_f32 +vcmla_f16 +vcmlaq_f16 +vcmla_laneq_f16 +vcmla_lane_f16 +vcmla_laneq_f16 +vcmlaq_lane_f16 +vcmlaq_laneq_f16 +vcmla_rot90_f16 +vcmlaq_rot90_f16 +vcmla_rot180_f16 +vcmlaq_rot180_f16 +vcmla_rot270_f16 +vcmlaq_rot270_f16 +vcmla_rot90_lane_f16 +vcmla_rot90_laneq_f16 +vcmlaq_rot90_lane_f16 +vcmlaq_rot90_laneq_f16 +vcmla_rot180_lane_f16 +vcmla_rot180_laneq_f16 +vcmlaq_rot180_lane_f16 +vcmlaq_rot180_laneq_f16 +vcmla_rot270_lane_f16 +vcmla_rot270_laneq_f16 +vcmlaq_rot270_lane_f16 +vcmlaq_rot270_laneq_f16 # Implemented in stdarch for A64 only, Clang support both A32/A64 vadd_s64 @@ -182,4 +275,46 @@ vrndpq_f32 vrndq_f32 vrndq_f32 vrndx_f32 -vrndxq_f32 \ No newline at end of file +vrndxq_f32 +vrnda_f16 +vrnda_f16 +vrndaq_f16 +vrndaq_f16 +vrnd_f16 +vrnd_f16 +vrndi_f16 +vrndi_f16 +vrndiq_f16 +vrndiq_f16 +vrndm_f16 +vrndm_f16 +vrndmq_f16 +vrndmq_f16 +vrndns_f16 +vrndp_f16 +vrndpq_f16 +vrndq_f16 +vrndx_f16 +vrndxq_f16 +vpmin_f16 +vpmax_f16 +vcaddq_rot270_f16 +vcaddq_rot90_f16 +vcadd_rot270_f16 +vcadd_rot90_f16 +vcvtm_s16_f16 +vcvtmq_s16_f16 +vcvtm_u16_f16 +vcvtmq_u16_f16 +vcvtaq_s16_f16 +vcvtaq_u16_f16 +vcvtnq_s16_f16 +vcvtnq_u16_f16 +vcvtn_s16_f16 +vcvtn_u16_f16 +vcvtaq_s16_f16 +vcvtaq_u16_f16 +vcvta_s16_f16 +vcvta_u16_f16 +vceqz_f16 +vceqzq_f16 diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs index b64b9463cb6c..57df40bb2fda 100644 --- a/library/stdarch/crates/intrinsic-test/src/main.rs +++ b/library/stdarch/crates/intrinsic-test/src/main.rs @@ -194,7 +194,7 @@ fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, target: &str) -> #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_f16))] +#![feature(stdarch_neon_f16)] #![allow(non_upper_case_globals)] use core_arch::arch::{target_arch}::*; diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index 3ce3e4fcb4d9..f3924b0f9409 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -5,7 +5,7 @@ arch_cfgs: # Generate big endian shuffles auto_big_endian: true -# Repeatedly used anchors +# Repeatedly used anchors # #[stable(feature = "neon_intrinsics", since = "1.59.0")] neon-stable: &neon-stable FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] @@ -60,6 +60,17 @@ neon-aes: &neon-aes neon-i8mm: &neon-i8mm FnCall: [target_feature, ['enable = "neon,i8mm"']] +# #[target_feature(enable = "neon,fp16")] +neon-fp16: &neon-fp16 + FnCall: [target_feature, ['enable = "neon,fp16"']] + +# #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +enable-fhm: &enable-fhm + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fhm"']] }]] + +enable-fcma: &enable-fcma + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fcma"']] }]] + #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))] neon-unstable-i8mm: &neon-unstable-i8mm FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']] }, { FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] } ]] @@ -71,6 +82,10 @@ neon-unstable-fcma: &neon-unstable-fcma aarch64-crc-stable: &aarch64-crc-stable FnCall: [stable, ['feature = "stdarch_aarch64_crc32"', 'since = "1.80.0"']] +# #[unstable(feature = "stdarch_neon_f16", issue = "136306")] +neon-unstable-f16: &neon-unstable-f16 + FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']] + intrinsics: - name: "vaddd_{type}" doc: Add @@ -171,6 +186,27 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - 0 + - name: "vabd{type[0]}" + doc: "Floating-point absolute difference" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fabd] + safety: + unsafe: [neon] + types: + - ['h_f16', 'f16'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vabd_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - 0 + - name: "vabdl_high{neon_type[0].noq}" doc: Signed Absolute difference Long arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -354,6 +390,28 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - '0' + + - name: "vceq{type[0]}" + doc: "Floating-point compare equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vceq_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + - name: "vceqd_{type[0]}" doc: "Compare bitwise equal" arguments: ["a: {type[0]}", "b: {type[0]}"] @@ -534,6 +592,28 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - '0' + + - name: "vcgt{type[0]}" + doc: "Floating-point compare greater than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - 'simd_extract!' + - - FnCall: + - "vcgt_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + - name: "vclt{neon_type[0].no}" doc: "Compare signed less than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -600,6 +680,28 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - '0' + + - name: "vcle{type[0]}" + doc: "Floating-point compare less than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcle_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + - name: "vcge{neon_type[0].no}" doc: "Compare signed greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -721,6 +823,26 @@ intrinsics: - - FnCall: ["vdup_n_{type[1]}", [a]] - '0' + - name: "vclez{type[0]}" + doc: "Floating-point compare less than or equal to zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vclez_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + - name: "vcltz{neon_type[0].no}" doc: "Compare signed less than zero" arguments: ["a: {neon_type[0]}"] @@ -787,6 +909,26 @@ intrinsics: - - FnCall: ["vdup_n_{type[1]}", [a]] - '0' + - name: "vcltz{type[0]}" + doc: "Floating-point compare less than zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcltz_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + - name: "vcltzd_s64" doc: "Compare less than zero" arguments: ["a: {type[0]}"] @@ -843,6 +985,27 @@ intrinsics: - link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}" arch: aarch64,arm64ec + - name: "vcagt{type[0]}" + doc: "Floating-point absolute compare greater than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16", i32] + compose: + - LLVMLink: + name: "vcagt{type[0]}" + return_type: "{type[3]}" + links: + - link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}" + arch: aarch64,arm64ec + - '_vcagth_f16(a, b).as_unsigned() as u16' + - name: "vcage{neon_type[0].no}" doc: "Floating-point absolute compare greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -881,6 +1044,28 @@ intrinsics: - link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}" arch: aarch64,arm64ec + + - name: "vcage{type[0]}" + doc: "Floating-point absolute compare greater than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16", i32] + compose: + - LLVMLink: + name: "vcage{type[0]}" + return_type: "{type[3]}" + links: + - link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}" + arch: aarch64,arm64ec + - "_vcageh_f16(a, b).as_unsigned() as u16" + - name: "vcalt{neon_type[0].no}" doc: "Floating-point absolute compare less than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -911,6 +1096,21 @@ intrinsics: compose: - FnCall: ["vcagt{type[0]}", [b, a]] + - name: "vcalt{type[0]}" + doc: "Floating-point absolute compare less than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: ["vcagt{type[0]}", [b, a]] + - name: "vcale{neon_type[0].no}" doc: "Floating-point absolute compare less than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -941,6 +1141,21 @@ intrinsics: compose: - FnCall: ["vcage{type[0]}", [b, a]] + - name: "vcale{type[0]}" + doc: "Floating-point absolute compare less than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: ["vcage{type[0]}", [b, a]] + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -1027,6 +1242,119 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [i32, f16, 'h'] + - [i64, f16, 'h'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: + - "a: {type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxs2fp.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N]] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[4]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [f16, s16, 'h', i32, i16] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[3]}_{type[0]}::(a) as i16" + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [u32, f16, 'h'] + - [u64, f16, 'h'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: + - "a: {type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a.as_signed(), N]] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [i16, f16, 'h', 'i32', 'as i32'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[1]}_{type[3]}::(a {type[4]}) as {type[1]}" + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [u16, f16, 'h', u32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[1]}_{type[3]}::(a as {type[3]}) as {type[1]}" + + - name: "vcvt{type[2]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {type[0]}"] @@ -1053,6 +1381,7 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vcvt{type[2]}", [a, N]] + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -1135,6 +1464,77 @@ intrinsics: compose: - Identifier: ["a as {type[1]}", Symbol] + + - name: "vcvt{type[2]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[3]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["s16", "f16", "h_f16_s16", i16] + - ["s32", "f16", "h_f16_s32", i32] + - ["s64", "f16", "h_f16_s64", i64] + compose: + - Identifier: ["a as {type[1]}", Symbol] + + - name: "vcvt{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to signed fixed-point" + arguments: ["a: {type[0]}"] + return_type: "{type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "s16", "h", i16, 'a as i16'] + - ["f16", "s32", "h", i32, 'a as i32'] + - ["f16", "s64", "h", i64, 'a as i64'] + compose: + - Identifier: ["{type[4]}", Symbol] + + - name: "vcvt{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned fixed-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", "h", 'a as u16'] + - ["f16", "u32", "h", 'a as u32'] + - ["f16", "u64", "h", 'a as u64'] + compose: + - Identifier: ["{type[3]}", Symbol] + + + - name: "vcvt{type[2]}" + doc: "Unsigned fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["u16", "f16", "h_f16_u16"] + - ["u32", "f16", "h_f16_u32"] + - ["u64", "f16", "h_f16_u64"] + compose: + - Identifier: ["a as {type[1]}", Symbol] + + - name: "vcvt_f64_f32" doc: "Floating-point convert to higher precision long" arguments: ["a: {neon_type[0]}"] @@ -1171,8 +1571,44 @@ intrinsics: - '[2, 3]' - FnCall: [simd_cast, [b]] + - name: "vcvt_high_f16_f32" + doc: "Floating-point convert to lower precision" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x8_t, float16x4_t, float32x4_t] + compose: + - FnCall: + - vcombine_f16 + - - a + - FnCall: [vcvt_f16_f32, [b]] + + - name: "vcvt_high_f32_f16" + doc: "Floating-point convert to higher precision" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float32x4_t, float16x8_t] + compose: + - FnCall: + - vcvt_f32_f16 + - - FnCall: [vget_high_f16, [a]] + + - name: "vcvt_f32_f64" - doc: "Floating-point convert to lower precision narrow" + doc: "Floating-point convert" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: @@ -1306,6 +1742,77 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vcvt{type[2]}", [a, N]] + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - ["f16", "i32", 'h', '16'] + - ["f16", "i64", 'h', '16'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxs.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N]] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [f16, u16, 'h', u32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[3]}_{type[0]}::(a) as {type[1]}" + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - ["f16", "u32", 'h', '16'] + - ["f16", "u64", 'h', '16'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N]] + - name: "vcvt{type[2]}" doc: "Floating-point convert to fixed-point, rounding toward zero" arguments: ["a: {neon_type[0]}"] @@ -1375,6 +1882,27 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtas.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcvta{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to away" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvta_{neon_type[1]}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtas.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - name: "vcvta{type[2]}" doc: "Floating-point convert to integer, rounding to nearest with ties to away" arguments: ["a: {type[0]}"] @@ -1394,6 +1922,81 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}" arch: aarch64,arm64ec + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u32", 'h_u32_f16'] + - ["f16", "u64", 'h_u64_f16'] + + compose: + - LLVMLink: + name: "vcvta{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtau.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i32", 'h_s32_f16'] + - ["f16", "i64", 'h_s64_f16'] + compose: + - LLVMLink: + name: "vcvta{type[2]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i16", 'h_s16_f16', 's32'] + compose: + - 'vcvtah_{type[3]}_f16(a) as i16' + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", 'h_u16_f16', 'u32'] + compose: + - 'vcvtah_{type[3]}_f16(a) as u16' + - name: "vcvta{type[2]}" doc: "Floating-point convert to integer, rounding to nearest with ties to away" arguments: ["a: {type[0]}"] @@ -1453,6 +2056,121 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" arch: aarch64,arm64ec + + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to even" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtns.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtnu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i32", 'h'] + - ["f16", "i64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i16", 'h', 'i32'] + compose: + - 'vcvtnh_{type[3]}_f16(a) as i16' + + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u32", 'h'] + - ["f16", "u64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", 'h', 'u32'] + compose: + - 'vcvtnh_{type[3]}_f16(a) as u16' + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to signed integer, rounding toward minus infinity" arguments: ["a: {neon_type[0]}"] @@ -1474,6 +2192,49 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtms.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding toward minus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding toward minus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtmu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{type[2]}" doc: "Floating-point convert to signed integer, rounding toward minus infinity" arguments: ["a: {type[0]}"] @@ -1656,6 +2417,121 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtpu.{type[3]}.{type[0]}" arch: aarch64,arm64ec + + - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to plus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to plus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtpu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i32", 'h'] + - ["f16", "i64", 'h'] + compose: + - LLVMLink: + name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i16", 'h', 'i32'] + compose: + - 'vcvtph_{type[3]}_f16(a) as i16' + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u32", 'h'] + - ["f16", "u64", 'h'] + compose: + - LLVMLink: + name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", 'h', 'u32'] + compose: + - 'vcvtph_{type[3]}_f16(a) as u16' + - name: "vdup{neon_type.laneq_nox}" doc: "Set all vector lanes to the same value" arguments: ["a: {neon_type}"] @@ -1793,10 +2669,49 @@ intrinsics: - FnCall: [static_assert_uimm_bits!, [N, 3]] - FnCall: [simd_extract!, [a, 'N as u32']] + - name: "vdup{type[2]}" doc: "Set all vector lanes to the same value" arguments: ["a: {neon_type[0]}"] return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, "f16", h_lane_f16] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 2]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + + - name: "vdup{type[2]}" + doc: "Extract an element from a vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x8_t, "f16", h_laneq_f16] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 4]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + + - name: "vdup{type[2]}" + doc: "Extract an element from a vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 8']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] @@ -2008,6 +2923,22 @@ intrinsics: compose: - MethodCall: [a, wrapping_neg, []] + + - name: "vnegh_{type}" + doc: Negate + arguments: ["a: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fneg] + safety: + unsafe: [neon] + types: + - f16 + compose: + - '-a' + - name: "vneg{neon_type.no}" doc: Negate arguments: ["a: {neon_type}"] @@ -2225,6 +3156,48 @@ intrinsics: - link: "llvm.rint.{neon_type}" arch: aarch64,arm64ec + + - name: "vrndx{neon_type.no}" + doc: "Floating-point round to integral exact, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintx] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.rint.{neon_type}" + links: + - link: "llvm.rint.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndx{type[1]}{type[0]}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintx] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.rint.{type[0]}" + links: + - link: "llvm.rint.{type[0]}" + arch: aarch64,arm64ec + + - name: "vrnda{neon_type.no}" doc: "Floating-point round to integral, to nearest with ties to away" arguments: ["a: {neon_type}"] @@ -2245,6 +3218,47 @@ intrinsics: - link: "llvm.round.{neon_type}" arch: aarch64,arm64ec + + - name: "vrnda{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frinta] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.round.{neon_type}" + links: + - link: "llvm.round.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrnda{type[1]}{type[0]}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frinta] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.round.{type[0]}" + links: + - link: "llvm.round.{type[0]}" + arch: aarch64,arm64ec + - name: "vrndn{neon_type.no}" doc: "Floating-point round to integral, to nearest with ties to even" arguments: ["a: {neon_type}"] @@ -2280,6 +3294,25 @@ intrinsics: - link: "llvm.roundeven.{type}" arch: aarch64,arm64ec + - name: "vrndn{type[1]}{type[0]}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintn] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.roundeven.{type[0]}" + links: + - link: "llvm.roundeven.{type[0]}" + arch: aarch64,arm64ec + - name: "vrndm{neon_type.no}" doc: "Floating-point round to integral, toward minus infinity" arguments: ["a: {neon_type}"] @@ -2300,6 +3333,49 @@ intrinsics: - link: "llvm.floor.{neon_type}" arch: aarch64,arm64ec + + - name: "vrndm{neon_type.no}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintm] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.floor.{neon_type}" + links: + - link: "llvm.floor.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndm{type[1]}{type[0]}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintm] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.floor.{type[0]}" + links: + - link: "llvm.floor.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vrndp{neon_type.no}" doc: "Floating-point round to integral, toward plus infinity" arguments: ["a: {neon_type}"] @@ -2320,6 +3396,46 @@ intrinsics: - link: "llvm.ceil.{neon_type}" arch: aarch64,arm64ec + + - name: "vrndp{neon_type.no}" + doc: "Floating-point round to integral, toward plus infinity" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintp] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.ceil.{neon_type}" + links: + - link: "llvm.ceil.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrndp{type[1]}{type[0]}" + doc: "Floating-point round to integral, toward plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintp] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.ceil.{type[0]}" + links: + - link: "llvm.ceil.{type[0]}" + arch: aarch64,arm64ec + - name: "vrnd{neon_type.no}" doc: "Floating-point round to integral, toward zero" arguments: ["a: {neon_type}"] @@ -2340,6 +3456,47 @@ intrinsics: - link: "llvm.trunc.{neon_type}" arch: aarch64,arm64ec + - name: "vrnd{neon_type.no}" + doc: "Floating-point round to integral, toward zero" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintz] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.trunc.{neon_type}" + links: + - link: "llvm.trunc.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrnd{type[1]}{type[0]}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintz] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.trunc.{type[0]}" + links: + - link: "llvm.trunc.{type[0]}" + arch: aarch64,arm64ec + + - name: "vrndi{neon_type.no}" doc: "Floating-point round to integral, using current rounding mode" arguments: ["a: {neon_type}"] @@ -2360,6 +3517,48 @@ intrinsics: - link: "llvm.nearbyint.{neon_type}" arch: aarch64,arm64ec + + - name: "vrndi{neon_type.no}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frinti] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.nearbyint.{neon_type}" + links: + - link: "llvm.nearbyint.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndi{type[1]}{type[0]}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + # TODO: double check me + assert_instr: [frinti] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.nearbyint.{type[0]}" + links: + - link: "llvm.nearbyint.{type[0]}" + arch: aarch64,arm64ec + - name: "vqadd{type[1]}" doc: Saturating add arguments: ["a: {type[0]}", "b: {type[0]}"] @@ -4265,6 +5464,28 @@ intrinsics: - link: "llvm.aarch64.neon.fmulx.{neon_type}" arch: aarch64,arm64ec + + - name: "vmulx{neon_type.no}" + doc: Floating-point multiply extended + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmulx] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmulx.{neon_type.no}" + links: + - link: "llvm.aarch64.neon.fmulx.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmulx{type[0]}" doc: Floating-point multiply extended arguments: ["a: {type[1]}", "b: {type[1]}"] @@ -4283,6 +5504,27 @@ intrinsics: - link: "llvm.aarch64.neon.fmulx.{type[1]}" arch: aarch64,arm64ec + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmulx] + safety: + unsafe: [neon] + types: + - ["h_f16", "f16"] + compose: + - LLVMLink: + name: "fmulx.{type[1]}" + links: + - link: "llvm.aarch64.neon.fmulx.{type[1]}" + arch: aarch64,arm64ec + + - name: "vmulx_lane_f64" doc: Floating-point multiply extended arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -4408,6 +5650,36 @@ intrinsics: - b - "{type[5]}" + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - FnCall: + - "vmulx{type[4]}" + - - a + - FnCall: + - "simd_shuffle!" + - - b + - b + - "{type[5]}" + + - name: "vmulx{type[0]}" doc: Floating-point multiply extended arguments: ["a: {type[1]}", "b: {neon_type[2]}"] @@ -4433,6 +5705,52 @@ intrinsics: - - b - "{type[5]}" + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - ['h_lane_f16', f16, float16x4_t, '2', 'h_f16', "LANE as u32"] + - ['h_laneq_f16', f16, float16x8_t, '3', 'h_f16', "LANE as u32"] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - FnCall: + - "vmulx{type[4]}" + - - a + - FnCall: + - "simd_extract!" + - - b + - "{type[5]}" + + + - name: "vmulx{neon_type[0].N}" + doc: "Vector multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, "f16"] + - [float16x8_t, "f16"] + compose: + - FnCall: + - vmulx{neon_type[0].no} + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + - name: "vfma{neon_type.no}" doc: Floating-point fused Multiply-Add to accumulator(vector) arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -4516,6 +5834,28 @@ intrinsics: - "vdupq_n_f64" - - c + - name: "vfma{neon_type[0].N}" + doc: Floating-point fused Multiply-Subtract from accumulator. + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmla] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: + - "vdup{neon_type[0].N}" + - - c + - name: "vdiv{neon_type.no}" doc: "Divide" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -4532,6 +5872,37 @@ intrinsics: compose: - FnCall: [simd_div, [a, b]] + - name: "vdiv{neon_type.no}" + doc: "Divide" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fdiv] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_div, [a, b]] + + - name: "vdiv{type[1]}_{type[0]}" + doc: Divide + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [f16, 'h'] + compose: + - 'a / b' + - name: "vsub{neon_type.no}" doc: "Subtract" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -4560,6 +5931,21 @@ intrinsics: compose: - MethodCall: [a, wrapping_sub, [b]] + - name: "vsub{type[0]}" + doc: "Subtract" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ['h_f16', 'f16'] + compose: + - 'a - b' + - name: "vaddv{neon_type[0].no}" doc: Floating-point add across vector arguments: ["a: {neon_type[0]}"] @@ -4849,6 +6235,48 @@ intrinsics: - link: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" arch: aarch64,arm64ec + - name: "vcadd{neon_type.rot270}" + doc: "Floating-point complex add" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *enable-fcma + - *neon-unstable-f16 + assert_instr: [fcadd] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vcadd.rot270.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcadd.rot270.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcadd{neon_type.rot90}" + doc: "Floating-point complex add" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *enable-fcma + - *neon-unstable-f16 + assert_instr: [fcadd] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vcadd.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" + arch: aarch64,arm64ec + - name: "vcmla{neon_type.no}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -4870,6 +6298,27 @@ intrinsics: - link: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" arch: aarch64,arm64ec + - name: "vcmla{neon_type.no}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + arch: aarch64,arm64ec + - name: "vcmla{neon_type.rot90}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -4891,6 +6340,27 @@ intrinsics: - link: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" arch: aarch64,arm64ec + - name: "vcmla{neon_type.rot90}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + arch: aarch64,arm64ec + - name: "vcmla{neon_type.rot270}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -4912,6 +6382,28 @@ intrinsics: - link: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" arch: aarch64,arm64ec + + - name: "vcmla{neon_type.rot270}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + arch: aarch64,arm64ec + - name: "vcmla{neon_type[0].laneq_nox}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] @@ -4935,6 +6427,30 @@ intrinsics: - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + - name: "vcmla{neon_type[0].laneq_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + - name: "vcmla{neon_type[0].rot90_laneq}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] @@ -4958,6 +6474,30 @@ intrinsics: - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + - name: "vcmla{neon_type[0].rot90_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + - name: "vcmla{neon_type[0].rot90_lane}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] @@ -4981,6 +6521,30 @@ intrinsics: - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + - name: "vcmla{neon_type[0].rot90_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + - name: "vcmla{neon_type.rot180}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -5002,6 +6566,29 @@ intrinsics: - link: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" arch: aarch64,arm64ec + + - name: "vcmla{neon_type.rot180}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type[0].rot180_laneq}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] @@ -5025,6 +6612,32 @@ intrinsics: - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + - name: "vcmla{neon_type[0].rot180_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, + '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]' + ] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + - name: "vcmla{type[3]}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] @@ -5048,6 +6661,32 @@ intrinsics: - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + - name: "vcmla{type[3]}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f16'] + - [float16x8_t, float16x4_t, + '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f16' + ] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + - name: "vcmla{neon_type[0].rot270_laneq}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] @@ -5071,6 +6710,30 @@ intrinsics: - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + - name: "vcmla{neon_type[0].rot270_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + - name: "vcmla{neon_type[0].lane_nox}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] @@ -5094,6 +6757,31 @@ intrinsics: - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + + - name: "vcmla{neon_type[0].lane_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + - name: "vcmla{neon_type[0].rot270_lane}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] @@ -5114,6 +6802,28 @@ intrinsics: - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot270_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + - name: "vdot{neon_type[0].laneq_nox}" doc: Dot product arithmetic (indexed) arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] @@ -5194,6 +6904,28 @@ intrinsics: - link: "llvm.aarch64.neon.fmax.{neon_type}" arch: aarch64,arm64ec + + - name: "vmaxh_{type}" + doc: Maximum (vector) + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmax] + safety: + unsafe: [neon] + types: + - f16 + compose: + - LLVMLink: + name: "vmaxh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmax.{type}" + arch: aarch64,arm64ec + + + - name: "vmaxnm{neon_type.no}" doc: Floating-point Maximum Number (vector) arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5212,6 +6944,47 @@ intrinsics: - link: "llvm.aarch64.neon.fmaxnm.{neon_type}" arch: aarch64,arm64ec + + - name: "vmaxnmh_{type}" + doc: Floating-point Maximum Number + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxnm] + safety: + unsafe: [neon] + types: + - f16 + compose: + - LLVMLink: + name: "vmaxh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxnm.{type}" + arch: aarch64,arm64ec + + + - name: "vminnmh_{type}" + doc: Floating-point Minimum Number + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminnm] + safety: + unsafe: [neon] + types: + - f16 + compose: + - LLVMLink: + name: "vminh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminnm.{type}" + arch: aarch64,arm64ec + + - name: "vmaxnmv{neon_type[0].no}" doc: Floating-point maximum number across vector arguments: ["a: {neon_type[0]}"] @@ -5247,6 +7020,89 @@ intrinsics: - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vmaxnmv{neon_type[0].no}" + doc: Floating-point maximum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxnmv] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fmaxnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vminnmv{neon_type[0].no}" + doc: Floating-point minimum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminnmv] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fminnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vmaxv{neon_type[0].no}" + doc: Floating-point maximum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxv] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fmaxv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vminv{neon_type[0].no}" + doc: Floating-point minimum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminv] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fminv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fminv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - name: "vpmax{type[0]}" doc: "Floating-point maximum pairwise" arguments: ["a: {neon_type[1]}"] @@ -5283,6 +7139,27 @@ intrinsics: - link: "llvm.aarch64.neon.fmin.{neon_type}" arch: aarch64,arm64ec + + - name: "vminh_{type}" + doc: Minimum (vector) + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmin] + safety: + unsafe: [neon] + types: + - f16 + compose: + - LLVMLink: + name: "vminh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmin.{type}" + arch: aarch64,arm64ec + + - name: "vminnm{neon_type.no}" doc: "Floating-point Minimum Number (vector)" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5394,6 +7271,111 @@ intrinsics: - link: "llvm.aarch64.neon.faddp.{neon_type}" arch: aarch64,arm64ec + + - name: "vpadd{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [faddp] + safety: + unsafe: [neon] + types: + - float16x8_t + compose: + - LLVMLink: + name: "faddp.{neon_type}" + links: + - link: "llvm.aarch64.neon.faddp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpmax{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxp] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmaxp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpmaxnm{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxnmp] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmaxnmp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxnmp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpmin{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminp] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fminp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpminnm{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminnmp] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fminnmp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminnmp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpadd{type[0]}" doc: "Floating-point add pairwise" arguments: ["a: {neon_type[1]}"] @@ -6874,6 +8856,41 @@ intrinsics: compose: - FnCall: [simd_fsqrt, [a]] + - name: "vsqrt{neon_type.no}" + doc: "Calculates the square root of each lane." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_fsqrt, [a]] + + - name: "vsqrt{type[1]}{type[0]}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fsqrt] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.sqrt.{type[0]}" + links: + - link: "llvm.sqrt.{type[0]}" + arch: aarch64,arm64ec + - name: "vrsqrts{type[0]}" doc: "Floating-point reciprocal square root step" arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] @@ -6912,6 +8929,27 @@ intrinsics: - link: "llvm.aarch64.neon.frsqrts.{type[1]}" arch: aarch64,arm64ec + + - name: "vrsqrts{type[0]}" + doc: "Floating-point reciprocal square root step" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrsqrts{type[0]}" + links: + - link: "llvm.aarch64.neon.frsqrts.{type[1]}" + arch: aarch64,arm64ec + + - name: "vrecpe{type[0]}" doc: "Reciprocal estimate." arguments: ["a: {type[1]}"] @@ -6950,6 +8988,27 @@ intrinsics: - link: "llvm.aarch64.neon.frecpe.{type[1]}" arch: aarch64,arm64ec + + - name: "vrecpe{type[0]}" + doc: "Reciprocal estimate." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrecpe{type[0]}" + links: + - link: "llvm.aarch64.neon.frecpe.{type[1]}" + arch: aarch64,arm64ec + + - name: "vrecps{type[0]}" doc: "Floating-point reciprocal step" arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] @@ -6988,6 +9047,27 @@ intrinsics: - link: "llvm.aarch64.neon.frecps.{type[1]}" arch: aarch64,arm64ec + + - name: "vrecps{type[0]}" + doc: "Floating-point reciprocal step" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrecps{type[0]}" + links: + - link: "llvm.aarch64.neon.frecps.{type[1]}" + arch: aarch64,arm64ec + + - name: "vrecpx{type[0]}" doc: "Floating-point reciprocal exponent" arguments: ["a: {type[1]}"] @@ -7007,6 +9087,27 @@ intrinsics: - link: "llvm.aarch64.neon.frecpx.{type[1]}" arch: aarch64,arm64ec + + - name: "vrecpx{type[0]}" + doc: "Floating-point reciprocal exponent" + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrecpxs{type[0]}" + links: + - link: "llvm.aarch64.neon.frecpx.{type[1]}" + arch: aarch64,arm64ec + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation arguments: ["a: {type[0]}"] @@ -7081,6 +9182,27 @@ intrinsics: compose: - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [float64x1_t, float16x4_t] + - [float16x4_t, float64x1_t] + # q + - [float64x2_t, float16x8_t] + - [float16x8_t, float64x2_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vrshld_s64" doc: "Signed rounding shift left" arguments: ["a: {type}", "b: {type}"] @@ -7973,6 +10095,23 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vtrn1{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[0, 4, 2, 6]'] + - [float16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vtrn1{neon_type[0].no}" doc: Transpose vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8021,6 +10160,22 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vtrn2{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[1, 5, 3, 7]'] + - [float16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vtrn2{neon_type[0].no}" doc: Transpose vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8076,6 +10231,22 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vzip2{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[2, 6, 3, 7]'] + - [float16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vzip1{neon_type[0].no}" doc: Zip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8111,6 +10282,23 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vzip1{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[0, 4, 1, 5]'] + - [float16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vuzp1{neon_type[0].no}" doc: Unzip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8159,6 +10347,22 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vuzp1{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[0, 2, 4, 6]'] + - [float16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vuzp2{neon_type[0].no}" doc: Unzip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8211,6 +10415,26 @@ intrinsics: - b - "{type[1]}" + - name: "vuzp2{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[1, 3, 5, 7]'] + - [float16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] + compose: + - FnCall: + - "simd_shuffle!" + - - a + - b + - "{type[1]}" + - name: "vabal_high_{neon_type[1]}" doc: "Unsigned Absolute difference and Accumulate Long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] @@ -8485,6 +10709,76 @@ intrinsics: - b - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + - name: "vfma{type[3]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '2', '_lane_f16'] + - [float16x4_t, float16x8_t, '3', '_laneq_f16'] + - [float16x8_t, float16x4_t, '2', 'q_lane_f16'] + - [float16x8_t, float16x8_t, '3', 'q_laneq_f16'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + + # vfms lane f16 + - name: "vfms{type[3]}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '2', '_lane_f16'] + - [float16x4_t, float16x8_t, '3', '_laneq_f16'] + - [float16x8_t, float16x4_t, '2', 'q_lane_f16'] + - [float16x8_t, float16x8_t, '3', 'q_laneq_f16'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - "vfms{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + + - name: "vfms{type[1]}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "h_f16"] + compose: + - FnCall: ["vfma{type[1]}", [a, -b, c]] + + - name: "vfma_lane_f64" doc: "Floating-point fused multiply-add to accumulator" arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -8598,6 +10892,68 @@ intrinsics: - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}] - FnCall: ["_vfmad_lane_f64", [b, c, a]] + + - name: "vfma{type[1]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "h_f16"] + compose: + - LLVMLink: + name: "_vfma_{type[1]}" + links: + - link: "llvm.fma.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vfma{type[1]}", [b, c, a]] + + + - name: "vfmah_lane{type[2]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["f16", float16x4_t, '_f16', '2'] + - ["f16", float16x8_t, 'q_f16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}] + - FnCall: ["vfmah_{type[0]}", [a, b, c]] + + - name: "vfmsh_lane{type[2]}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["f16", float16x4_t, '_f16', '2'] + - ["f16", float16x8_t, 'q_f16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}] + - FnCall: ["vfmsh_{type[0]}", [a, b, c]] + - name: "vfms_f64" doc: "Floating-point fused multiply-subtract from accumulator" arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -8734,6 +11090,7 @@ intrinsics: compose: - FnCall: ["vfma{type[2]}::", ['a', '-b', 'c']] + - name: "vceqz{neon_type[0].no}" doc: "Floating-point compare bitwise equal to zero" arguments: ["a: {neon_type[0]}"] @@ -8752,6 +11109,23 @@ intrinsics: - Let: [b, '{type[2]}', '{type[3]}'] - FnCall: [simd_eq, [a, {FnCall: [transmute, [b]]}]] + - name: "vceqz{neon_type[0].no}" + doc: "Floating-point compare bitwise equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, 'f16x4', 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, 'f16x8', 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, '{type[2]}', '{type[3]}'] + - FnCall: [simd_eq, [a, {FnCall: [transmute, [b]]}]] + - name: "vceqz{type[2]}" doc: "Floating-point compare bitwise equal to zero" arguments: ["a: {type[0]}"] @@ -8772,6 +11146,26 @@ intrinsics: - - FnCall: ["vdup_n_{type[0]}", [a]] - '0' + - name: "vceqz{type[2]}" + doc: "Floating-point compare bitwise equal to zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", "h_f16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vceqz_{type[0]}" + - - FnCall: ["vdup_n_{type[0]}", [a]] + - '0' + - name: "vceqzd_{type[2]}" doc: "Compare bitwise equal to zero" arguments: ["a: {type[0]}"] @@ -8881,6 +11275,28 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - '0' + + - name: "vcge{type[0]}" + doc: "Floating-point compare greater than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcge_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + - name: "vcge{neon_type[0].no}" doc: "Floating-point compare greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -9009,6 +11425,27 @@ intrinsics: - - FnCall: ["vdup_n_{type[1]}", [a]] - '0' + + - name: "vcgez{type[0]}" + doc: "Floating-point compare greater than or equal to zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcgez_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + - name: "vclezd_s64" doc: "Compare less than or equal to zero" arguments: ["a: {type[0]}"] @@ -9126,6 +11563,26 @@ intrinsics: - - FnCall: ["vdup_n_{type[1]}", [a]] - '0' + - name: "vcgtz{type[0]}" + doc: "Floating-point compare greater than zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - "simd_extract!" + - - FnCall: + - "vcgtz_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to unsigned fixed-point, rounding toward zero" arguments: ["a: {neon_type[0]}"] @@ -9265,6 +11722,47 @@ intrinsics: - - a - FnCall: [simd_shuffle!, [b, b, '[LANE as u32, LANE as u32]']] + + # vmulq_laneq_f16 + - name: "vmul{type[2]}{neon_type[1].no}" + doc: "Floating-point multiply" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, '_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32]"] + - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]"] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '3']] + - FnCall: + - simd_mul + - - a + - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + + + - name: "vmul{type[1]}_{type[0]}" + doc: Add + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [f16, 'h'] + compose: + - 'a * b' + + - name: "vmul{type[2]}" doc: "Floating-point multiply" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] @@ -9285,6 +11783,28 @@ intrinsics: - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] - Identifier: ['a * b', Symbol] + + - name: "vmul{type[2]}" + doc: "Floating-point multiply" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["f16", float16x4_t, "h_lane_f16", '2'] + - ["f16", float16x8_t, "h_laneq_f16", '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] + - Identifier: ['a * b', Symbol] + + - name: "vrsrad_n_s64" doc: "Signed rounding shift right and accumulate." arguments: ["a: {type}", "b: {type}"] @@ -9417,6 +11937,28 @@ intrinsics: - FnCall: ["vdup_n_{type[0]}", [b]] - '0' + + - name: "vclt{type[2]}" + doc: "Floating-point compare less than" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", 'h_f16'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vclt_{type[0]}" + - - FnCall: ["vdup_n_{type[0]}", [a]] + - FnCall: ["vdup_n_{type[0]}", [b]] + - '0' + - name: "vabdl_high_{neon_type[0]}" doc: "Unsigned Absolute difference Long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -9471,6 +12013,30 @@ intrinsics: - b - FnCall: ["vdup{neon_type[1].N}", [c]] + + - name: "vfms{neon_type[0].N}" + doc: Floating-point fused Multiply-Subtract from accumulator. + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmls] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - FnCall: + - "vfms{neon_type[0].no}" + - - a + - b + - FnCall: + - "vdup{neon_type[0].N}" + - - c + + - name: "vpminnm{type[0]}" doc: "Floating-point minimum number pairwise" arguments: ["a: {neon_type[1]}"] @@ -9711,6 +12277,28 @@ intrinsics: - link: "llvm.aarch64.neon.frsqrte.{type[1]}" arch: aarch64,arm64ec + + - name: "vrsqrte{type[0]}" + doc: "Reciprocal square-root estimate." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16"] + compose: + - LLVMLink: + name: "vrsqrte{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.frsqrte.{type[1]}" + arch: aarch64,arm64ec + + - name: "vpminnm{neon_type.no}" doc: "Floating-point Minimum Number Pairwise (vector)." arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -9777,6 +12365,28 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtau.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcvta{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to away" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvta{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtau.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to signed fixed-point, rounding toward zero" arguments: ["a: {neon_type[0]}"] @@ -9796,6 +12406,79 @@ intrinsics: - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i32", 'h'] + - ["f16", "i64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i16", 'h', 'i32'] + compose: + - 'vcvtmh_{type[3]}_f16(a) as i16' + + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u32", 'h'] + - ["f16", "u64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", 'h', 'u32'] + compose: + - 'vcvtmh_{type[3]}_f16(a) as u16' + - name: "vmlal_high_n_{neon_type[1]}" doc: "Multiply-add long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] @@ -10860,6 +13543,27 @@ intrinsics: - cast - [] + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [target_feature, ['enable = "{type[2]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*const f16', float16x4_t, "neon,fp16"] + - ['*const f16', float16x8_t, "neon,fp16"] + compose: + - FnCall: + - 'crate::ptr::read_unaligned' + - - MethodCall: + - ptr + - cast + - [] + - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures from one, two, three, or four registers." arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] @@ -10906,6 +13610,28 @@ intrinsics: - [] - a + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] + attr: + - FnCall: [target_feature, ['enable = "{type[2]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]] + - FnCall: [allow, ['clippy::cast_ptr_alignment']] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*mut f16', float16x4_t, "neon,fp16"] + - ['*mut f16', float16x8_t, "neon,fp16"] + compose: + - FnCall: + - 'crate::ptr::write_unaligned' + - - MethodCall: + - ptr + - cast + - [] + - a + - name: "__crc32d" doc: "CRC32 single round checksum for quad words (64 bits)." arguments: ["crc: {type[0]}", "data: {type[1]}"] @@ -11650,3 +14376,196 @@ intrinsics: - FnCall: - transmute - - b + + - name: "vfmlal{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlal2] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, '_high_'] + - [float32x4_t, float16x8_t, 'q_high_'] + compose: + - LLVMLink: + name: "vfmlal{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlal2.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + + - name: "vfmlal{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlal2, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlal{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + + - name: "vfmlal{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlal] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, '_low_'] + - [float32x4_t, float16x8_t, 'q_low_'] + compose: + - LLVMLink: + name: "vfmlal{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlal.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + + - name: "vfmlal{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlal, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlal{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + + - name: "vfmlsl{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlsl2] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, '_high_'] + - [float32x4_t, float16x8_t, 'q_high_'] + compose: + - LLVMLink: + name: "vfmlsl{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlsl2.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vfmlsl{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlsl2, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlsl{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + + - name: "vfmlsl{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlsl] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, '_low_'] + - [float32x4_t, float16x8_t, 'q_low_'] + compose: + - LLVMLink: + name: "vfmlsl{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlsl.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vfmlsl{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlsl, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlsl{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 5d20bfc90cad..2668efdb2468 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -45,7 +45,7 @@ neon-stable-not-arm: &neon-stable-not-arm #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] neon-unstable-is-arm: &neon-unstable-is-arm - FnCall: [ cfg_attr, ['target_arch = "arm"', *neon-unstable]] + FnCall: [ cfg_attr, ['target_arch = "arm"', *neon-unstable]] # #[cfg_attr(all(test, not(target_env = "msvc"))] msvc-disabled: &msvc-disabled @@ -63,6 +63,13 @@ neon-aes: &neon-aes neon-i8mm: &neon-i8mm FnCall: [target_feature, ['enable = "neon,i8mm"']] +# #[target_feature(enable = "neon,fp16")] +neon-fp16: &neon-fp16 + FnCall: [target_feature, ['enable = "neon,fp16"']] + +enable-fcma: &enable-fcma + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fcma"']] }]] + #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))] neon-unstable-i8mm: &neon-unstable-i8mm FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']] }, { FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] } ]] @@ -77,6 +84,10 @@ arm-crc-unstable: &arm-crc-unstable aarch64-crc-stable: &aarch64-crc-stable FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "stdarch_aarch64_crc32"', 'since = "1.80.0"']]}]] +# #[unstable(feature = "stdarch_neon_f16", issue = "136306")] +neon-unstable-f16: &neon-unstable-f16 + FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']] + intrinsics: - name: "vand{neon_type.no}" doc: Vector bitwise and @@ -263,6 +274,30 @@ intrinsics: - link: "llvm.aarch64.neon.fabd.{neon_type}" arch: aarch64,arm64ec + - name: "vabd{neon_type.no}" + doc: Absolute difference between the arguments of Floating + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fabd.{neon_type}" + links: + - link: "llvm.arm.neon.vabds.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fabd.{neon_type}" + arch: aarch64,arm64ec + - name: "vabdl{neon_type[0].noq}" doc: Signed Absolute difference Long arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -379,6 +414,25 @@ intrinsics: compose: - FnCall: [simd_eq, [a, b]] + + - name: "vceq{neon_type[0].no}" + doc: "Floating-point compare equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vceq.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmeq]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_eq, [a, b]] + - name: "vtst{neon_type[0].no}" doc: "Signed compare bitwise Test bits nonzero" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -425,6 +479,46 @@ intrinsics: compose: - FnCall: [simd_fabs, [a]] + - name: "vabs{neon_type.no}" + doc: "Floating-point absolute value" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vabs]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fabs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_fabs, [a]] + + - name: "vabs{type[0]}" + doc: "Floating-point absolute value" + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vabs]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fabs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['h_f16', 'f16'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vabs_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - 0 + - name: "vcgt{neon_type[0].no}" doc: "Compare signed greater than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -487,6 +581,45 @@ intrinsics: compose: - FnCall: [simd_gt, [a, b]] + + - name: "vcgt{neon_type[0].no}" + doc: "Floating-point compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_gt, [a, b]] + + + - name: "vcgtz{neon_type[0].no}" + doc: "Floating-point compare greater than zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: [simd_gt, [a, {FnCall: [transmute, [b]]}]] + - name: "vclt{neon_type[0].no}" doc: "Compare signed less than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -549,6 +682,47 @@ intrinsics: compose: - FnCall: [simd_le, [a, b]] + + - name: "vcle{neon_type[0].no}" + doc: "Floating-point compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_le, [a, b]] + + - name: "vclez{neon_type[0].no}" + doc: "Floating-point compare less than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcle.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmle]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_le + - - a + - FnCall: [transmute, [b]] + - name: "vcge{neon_type[0].no}" doc: "Compare signed greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -721,6 +895,31 @@ intrinsics: - link: "llvm.aarch64.neon.facgt.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcagt{neon_type[0].no}" + doc: "Floating-point absolute compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcagt{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vacgt.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.facgt.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - name: "vcage{neon_type[0].no}" doc: "Floating-point absolute compare greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -745,6 +944,30 @@ intrinsics: - link: "llvm.aarch64.neon.facge.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + - name: "vcage{neon_type[0].no}" + doc: "Floating-point absolute compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcage{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vacge.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.facge.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - name: "vcalt{neon_type[0].no}" doc: "Floating-point absolute compare less than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -763,6 +986,24 @@ intrinsics: compose: - FnCall: ["vcagt{neon_type[0].no}", [b, a]] + - name: "vcalt{neon_type[0].no}" + doc: "Floating-point absolute compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: ["vcagt{neon_type[0].no}", [b, a]] + - name: "vcale{neon_type[0].no}" doc: "Floating-point absolute compare less than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -781,6 +1022,25 @@ intrinsics: compose: - FnCall: ["vcage{neon_type[0].no}", [b, a]] + + - name: "vcale{neon_type[0].no}" + doc: "Floating-point absolute compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: ["vcage{neon_type[0].no}", [b, a]] + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -799,6 +1059,24 @@ intrinsics: compose: - FnCall: [simd_cast, [a]] + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [scvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [int16x4_t, float16x4_t] + - [int16x8_t, float16x8_t] + compose: + - FnCall: [simd_cast, [a]] + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -817,6 +1095,24 @@ intrinsics: compose: - FnCall: [simd_cast, [a]] + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ucvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [uint16x4_t, float16x4_t] + - [uint16x8_t, float16x8_t] + compose: + - FnCall: [simd_cast, [a]] + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -845,6 +1141,128 @@ intrinsics: arch: arm - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]] + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [uint32x2_t, float32x2_t] + - [uint32x4_t, float32x4_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ucvtf, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [uint16x4_t, float16x4_t] + - [uint16x8_t, float16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]] + + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Floating-point convert to signed fixed-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzs, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to unsigned fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzu, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]] + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -873,6 +1291,38 @@ intrinsics: arch: arm - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [scvtf, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [int16x4_t, float16x4_t] + - [int16x8_t, float16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -900,33 +1350,6 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] - - name: "vcvt{neon_type[1].N}_{neon_type[0]}" - doc: "Fixed-point convert to floating-point" - arguments: ["a: {neon_type[0]}"] - return_type: "{neon_type[1]}" - attr: - - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] - - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - static_defs: ['const N: i32'] - safety: - unsafe: [neon] - types: - - [uint32x2_t, float32x2_t] - - [uint32x4_t, float32x4_t] - compose: - - FnCall: [static_assert!, ['N >= 1 && N <= 32']] - - LLVMLink: - name: "vcvt{neon_type[1].N}_{neon_type[0]}" - arguments: - - "a: {neon_type[0]}" - - "n: i32" - links: - - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" - arch: aarch64,arm64ec - - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]] - - name: "vcvt{type[2]}" doc: "Floating-point convert to fixed-point, rounding toward zero" arguments: ["a: {neon_type[0]}"] @@ -1131,6 +1554,68 @@ intrinsics: - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"', 'N = 4']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[3]}{neon_type[0]}" + doc: "Create a new vector with all lanes set to a value" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, f16, 'float16x4', '_n_'] + - [float16x8_t, f16, 'float16x8', 'q_n_'] + compose: + - "{type[2]}_t::splat(a)" + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"', 'N = 2']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[0]}" doc: "Set all vector lanes to the same value" arguments: ["a: {neon_type[1]}"] @@ -1339,6 +1824,47 @@ intrinsics: - Identifier: ["{type[1]}", Symbol] - Identifier: ["{type[2]}", Symbol] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ext, 'N = 3']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }'] + compose: + - Identifier: ["{type[1]}", Symbol] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ext, 'N = 7']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }'] + compose: + - Identifier: ["{type[1]}", Symbol] + + + - name: "vext{neon_type[0].no}" doc: "Extract vector from pair of vectors" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -1789,6 +2315,24 @@ intrinsics: compose: - FnCall: [simd_neg, [a]] + - name: "vneg{neon_type[0].no}" + doc: Negate + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vneg.{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fneg]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, 'f16'] + - [float16x8_t, 'f16'] + compose: + - FnCall: [simd_neg, [a]] + - name: "vqneg{neon_type[0].no}" doc: Signed saturating negate arguments: ["a: {neon_type[0]}"] @@ -2089,6 +2633,30 @@ intrinsics: - link: "llvm.arm.neon.vrintn.{neon_type}" arch: arm + - name: "vrndn{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to even" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrintn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frintn]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.frinn.{neon_type}" + links: + - link: "llvm.aarch64.neon.frintn.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vrintn.{neon_type}" + arch: arm + - name: "vqadd{neon_type.no}" doc: Saturating add arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -2333,6 +2901,76 @@ intrinsics: - transmute - - a + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t] + - ["*const f16", float16x8x2_t] + - ["*const f16", float16x4x3_t] + - ["*const f16", float16x8x3_t] + - ["*const f16", float16x4x4_t] + - ["*const f16", float16x8x4_t] + compose: + - LLVMLink: + name: "vld1x{neon_type[1].tuple}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0f{neon_type[1].base}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0f{neon_type[1].base}" + arch: arm + + - name: "vld1{type[2]}_{neon_type[1]}" + doc: "Load one single-element structure to one lane of one register" + arguments: ["ptr: {type[0]}", "src: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4_t, '_lane', '2'] + - ["*const f16", float16x8_t, 'q_lane', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: [simd_insert!, [src, "LANE as u32", "*ptr"]] + + - name: "vld1{type[2]}_{neon_type[1]}" + doc: "Load one single-element structure and replicate to all lanes of one register" + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vld1"]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4_t, '_dup', 'f16x4', "[0, 0, 0, 0]"] + - ["*const f16", float16x8_t, 'q_dup', 'f16x8', "[0, 0, 0, 0, 0, 0, 0, 0]"] + compose: + - Let: [x, "{neon_type[1]}", "vld1{neon_type[1].lane_nox}::<0>(ptr, transmute({type[3]}::splat(0.)))"] + - FnCall: [simd_shuffle!, [x, x, "{type[4]}"]] + + - name: "vld2{neon_type[1].nox}" doc: Load multiple 2-element structures to two registers arguments: ["a: {type[0]}"] @@ -2905,6 +3543,399 @@ intrinsics: - "_vld2{neon_type[1].dup_nox}" - - "a as _" + - name: "vld2{neon_type[1].nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + - "2" + + - name: "vld2{neon_type[1].nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - *neon-fp16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as _" + - "2" + + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as _" + + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld2', 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x2_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const f16" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld2{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "LANE" + - "2" + + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x2_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i64" + - "ptr: *const f16" + links: + - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "LANE as i64" + - "a as _" + + + - name: "vld3{neon_type[1].nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld3{neon_type[1].nox}" + - - "a as _" + - "2" + + - name: "vld3{neon_type[1].nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld3{neon_type[1].nox}" + - - "a as _" + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld3{neon_type[1].dup_nox}" + - - "a as _" + - "2" + + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld3r.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld3{neon_type[1].dup_nox}" + - - "a as _" + + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld3', 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x3_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "ptr: *const f16" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld3{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "b.2" + - "LANE" + - "2" + + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x3_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "n: i64" + - "ptr: *const f16" + links: + - link: "llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld3{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "b.2" + - "LANE as i64" + - "a as _" + - name: "vld3{neon_type[1].lane_nox}" doc: "Load multiple 3-element structures to two registers" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] @@ -3841,6 +4872,31 @@ intrinsics: - FnCall: [simd_extract!, [b, 'LANE as u32']] - Identifier: [';', Symbol] + + - name: "vst1{neon_type[1].lane_nox}" + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + types: + - ['*mut f16', float16x4_t, '2'] + - ['*mut f16', float16x8_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - Assign: + - "*a" + - FnCall: [simd_extract!, [b, 'LANE as u32']] + - Identifier: [';', Symbol] + + - name: 'vst1{neon_type[1].no}' doc: "Store multiple single-element structures from one, two, three, or four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4057,6 +5113,34 @@ intrinsics: arch: arm - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']] + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst1] + types: + - [f16, float16x4x4_t, float16x4_t] + - [f16, float16x8x4_t, float16x8_t] + compose: + - LLVMLink: + name: 'st1x4.{neon_type[1]}' + arguments: + - 'ptr: *mut {type[0]}' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + links: + - link: 'llvm.arm.neon.vst1x4.p0{type[0]}.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']] + - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4174,6 +5258,33 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [st2] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, float16x4_t] + - [f16, float16x8x2_t, float16x8_t] + compose: + - LLVMLink: + name: 'st2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + + - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4235,6 +5346,37 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, '2', float16x4_t] + - [f16, float16x8x2_t, '3', float16x8_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst2.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2lane.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']] + + - name: "vst2{neon_type[1].lane_nox}" doc: "Store multiple 2-element structures from two registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4296,6 +5438,35 @@ intrinsics: arch: arm - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', "{type[3]}"]] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst2] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, float16x4_t, '2'] + - [f16, float16x8x2_t, float16x8_t, '2'] + compose: + - LLVMLink: + name: 'vst2.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst2.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', "{type[3]}"]] + + - name: "vst2{neon_type[1].lane_nox}" doc: "Store multiple 2-element structures from two registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4331,6 +5502,39 @@ intrinsics: arch: arm - FnCall: ['_vst2{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'LANE', "{type[4]}"]] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, '2', float16x4_t, '2'] + - [f16, float16x8x2_t, '1', float16x8_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst2lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst2lane.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'LANE', "{type[4]}"]] + + - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4510,6 +5714,36 @@ intrinsics: arch: arm - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst3] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, float16x4_t, '2'] + - [f16, float16x8x3_t, float16x8_t, '2'] + compose: + - LLVMLink: + name: 'vst3.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst3.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]] + + - name: "vst3{neon_type[1].lane_nox}" doc: "Store multiple 3-element structures from three registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4546,6 +5780,40 @@ intrinsics: arch: arm - FnCall: ['_vst3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', "{type[4]}"]] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, '2', float16x4_t, '4'] + - [f16, float16x8x3_t, '3', float16x8_t, '4'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst3lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst3lane.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', "{type[4]}"]] + + - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4575,6 +5843,34 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [st3] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, float16x4_t] + - [f16, float16x8x3_t, float16x8_t] + compose: + - LLVMLink: + name: 'vst3.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + + - name: "vst3{neon_type[1].lane_nox}" doc: "Store multiple 3-element structures from three registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4609,6 +5905,38 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, '2', float16x4_t] + - [f16, float16x8x3_t, '3', float16x8_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst3.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3lane.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4791,6 +6119,37 @@ intrinsics: arch: arm - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', "{type[3]}"]] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst4] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, float16x4_t, '2'] + - [f16, float16x8x4_t, float16x8_t, '2'] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst4.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', "{type[3]}"]] + + - name: "vst4{neon_type[1].lane_nox}" doc: "Store multiple 4-element structures from four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4828,6 +6187,40 @@ intrinsics: arch: arm - FnCall: ['_vst4{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', 'LANE', "{type[4]}"]] + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, '2', float16x4_t, '2'] + - [f16, float16x8x4_t, '3', float16x8_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst4lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'd: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst4lane.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', 'LANE', "{type[4]}"]] + + - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4858,6 +6251,35 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [st4] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, float16x4_t] + - [f16, float16x8x4_t, float16x8_t] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + + - name: "vst4{neon_type[1].lane_nox}" doc: "Store multiple 4-element structures from four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4893,6 +6315,39 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, '2', float16x4_t] + - [f16, float16x8x4_t, '3', float16x8_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst4.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'd: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4lane.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + - name: "vusdot{neon_type[0].no}" doc: "Dot product vector form with unsigned and signed integers" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] @@ -5024,6 +6479,26 @@ intrinsics: compose: - FnCall: [simd_mul, [a, b]] + + - name: "vmul{neon_type[1].no}" + doc: Multiply + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmul.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [f16, float16x4_t] + - [f16, float16x8_t] + compose: + - FnCall: [simd_mul, [a, b]] + + - name: "vmul{neon_type[0].lane_nox}" doc: Multiply arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] @@ -5054,6 +6529,32 @@ intrinsics: - - a - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]] + + - name: "vmul{neon_type[0].lane_nox}" + doc: Multiply + arguments: ["a: {neon_type[0]}", "v: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [float16x8_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - simd_mul + - - a + - FnCall: ["simd_shuffle!", [v, v, "{type[3]}"]] + + - name: "vmul{neon_type[0].laneq_nox}" doc: Multiply arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] @@ -5261,6 +6762,33 @@ intrinsics: arch: arm - FnCall: ["_vfma{neon_type.no}", [b, c, a]] + + - name: "vfma{neon_type.no}" + doc: Floating-point fused Multiply-Add to accumulator (vector) + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vfma]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmla]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fma.{neon_type}" + links: + - link: "llvm.fma.{neon_type}" + arch: aarch64 + - link: "llvm.fma.{neon_type}" + arch: arm + - FnCall: ["_vfma{neon_type.no}", [b, c, a]] + + - name: "vfma{neon_type[0].N}" doc: Floating-point fused Multiply-Add to accumulator(vector) arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] @@ -5368,6 +6896,64 @@ intrinsics: compose: - FnCall: [simd_sub, [a, b]] + + - name: "vsub{neon_type[1].no}" + doc: "Subtract" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsub.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fsub]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['f16', float16x4_t] + - ['f16', float16x8_t] + compose: + - FnCall: [simd_sub, [a, b]] + + + - name: "vadd{neon_type.no}" + doc: Floating-point Add (vector). + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vadd.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fadd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: + - simd_add + - - a + - b + + - name: "vadd{type[0]}" + doc: Add + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vadd.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fadd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['h_f16', 'f16'] + compose: + - 'a + b' + - name: "vadd{neon_type.no}" doc: Bitwise exclusive OR arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5894,6 +7480,32 @@ intrinsics: - link: "llvm.aarch64.neon.fmax.{neon_type}" arch: aarch64,arm64ec + + - name: "vmax{neon_type.no}" + doc: Maximum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmax]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vmax.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxs.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmax.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmaxnm{neon_type.no}" doc: Floating-point Maximum Number (vector) arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5918,6 +7530,57 @@ intrinsics: - link: "llvm.aarch64.neon.fmaxnm.{neon_type}" arch: aarch64,arm64ec + + - name: "vmaxnm{neon_type.no}" + doc: Floating-point Maximum Number (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmaxnm]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmaxnm]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmaxnm.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxnm.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmaxnm.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vminnm{neon_type.no}" + doc: Floating-point Minimum Number (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vminnm]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fminnm]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fminnm.{neon_type}" + links: + - link: "llvm.arm.neon.vminnm.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fminnm.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmin{neon_type.no}" doc: "Minimum (vector)" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5998,6 +7661,33 @@ intrinsics: - link: "llvm.aarch64.neon.fmin.{neon_type}" arch: aarch64,arm64ec + + - name: "vmin{neon_type.no}" + doc: Minimum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmin]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vmin.{neon_type}" + links: + - link: "llvm.arm.neon.vmins.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmin.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vminnm{neon_type.no}" doc: "Floating-point Minimum Number (vector)" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -6045,6 +7735,30 @@ intrinsics: - link: "llvm.aarch64.neon.faddp.{neon_type}" arch: aarch64,arm64ec + - name: "vpadd{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [faddp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + compose: + - LLVMLink: + name: "faddp.{neon_type}" + links: + - link: "llvm.arm.neon.vpadd.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.faddp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vqdmull{neon_type[0].noq}" doc: "Signed saturating doubling multiply long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -6896,6 +8610,32 @@ intrinsics: - link: "llvm.aarch64.neon.frsqrts.{neon_type}" arch: aarch64,arm64ec + + - name: "vrsqrts{neon_type.no}" + doc: "Floating-point reciprocal square root step" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - *neon-fp16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsqrts]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frsqrts]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrsqrts{neon_type.no}" + links: + - link: "llvm.arm.neon.vrsqrts.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frsqrts.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrecpe{neon_type.no}" doc: "Reciprocal estimate." arguments: ["a: {neon_type}"] @@ -6920,6 +8660,32 @@ intrinsics: - link: "llvm.aarch64.neon.frecpe.{neon_type}" arch: aarch64,arm64ec + + - name: "vrecpe{neon_type.no}" + doc: "Reciprocal estimate." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrecpe]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frecpe]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrecpe{neon_type.no}" + links: + - link: "llvm.arm.neon.vrecpe.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frecpe.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrecps{neon_type.no}" doc: "Floating-point reciprocal step" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -6944,6 +8710,32 @@ intrinsics: - link: "llvm.aarch64.neon.frecps.{neon_type}" arch: aarch64,arm64ec + + - name: "vrecps{neon_type.no}" + doc: "Floating-point reciprocal step" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrecps]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frecps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrecps{neon_type.no}" + links: + - link: "llvm.arm.neon.vrecps.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frecps.{neon_type}" + arch: aarch64,arm64ec + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation arguments: ["a: {type[0]}"] @@ -7253,6 +9045,111 @@ intrinsics: compose: - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + # non-q + - [float32x2_t, float16x4_t] + - [poly16x4_t, float16x4_t] + - [poly8x8_t, float16x4_t] + - [int8x8_t, float16x4_t] + - [int16x4_t, float16x4_t] + - [int32x2_t, float16x4_t] + - [int64x1_t, float16x4_t] + - [uint8x8_t, float16x4_t] + - [uint16x4_t, float16x4_t] + - [uint32x2_t, float16x4_t] + - [uint64x1_t, float16x4_t] + - [float16x4_t, float32x2_t] + - [float16x4_t, poly16x4_t] + - [float16x4_t, poly8x8_t] + - [float16x4_t, int8x8_t] + - [float16x4_t, int16x4_t] + - [float16x4_t, int32x2_t] + - [float16x4_t, int64x1_t] + - [float16x4_t, uint8x8_t] + - [float16x4_t, uint16x4_t] + - [float16x4_t, uint32x2_t] + - [float16x4_t, uint64x1_t] + # q + - [float32x4_t, float16x8_t] + - [poly16x8_t, float16x8_t] + - [poly8x16_t, float16x8_t] + - [int8x16_t, float16x8_t] + - [int16x8_t, float16x8_t] + - [int32x4_t, float16x8_t] + - [int64x2_t, float16x8_t] + - [uint8x16_t, float16x8_t] + - [uint16x8_t, float16x8_t] + - [uint32x4_t, float16x8_t] + - [uint64x2_t, float16x8_t] + - [float16x8_t, float32x4_t] + - [float16x8_t, poly16x8_t] + - [float16x8_t, poly8x16_t] + - [float16x8_t, int8x16_t] + - [float16x8_t, int16x8_t] + - [float16x8_t, int32x4_t] + - [float16x8_t, int64x2_t] + - [float16x8_t, uint8x16_t] + - [float16x8_t, uint16x8_t] + - [float16x8_t, uint32x4_t] + - [float16x8_t, uint64x2_t] + compose: + - FnCall: [transmute, [a]] + + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [poly64x1_t, float16x4_t] + - [float16x4_t, poly64x1_t] + # q + - [poly64x2_t, float16x8_t] + - [poly128_t, float16x8_t] + - [float16x8_t, poly128_t] + - [float16x8_t, poly64x2_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vrev64{neon_type[0].no}" + doc: Reverse elements in 64-bit doublewords + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrev64]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [rev64]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, "[3, 2, 1, 0]"] + - [float16x8_t, "[3, 2, 1, 0, 7, 6, 5, 4]"] + compose: + - FnCall: [simd_shuffle!, [a, a, "{type[1]}"]] + - name: "vrshl{neon_type.no}" doc: "Signed rounding shift left" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -7562,6 +9459,23 @@ intrinsics: compose: - FnCall: [transmute, [a]] + - name: "vcreate_{neon_type[1]}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["u64", float16x4_t] + compose: + - FnCall: [transmute, [a]] + - name: "vcreate_p64" doc: "Insert vector element from another vector element" arguments: ["a: {type[0]}"] @@ -7619,6 +9533,29 @@ intrinsics: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + - name: "vset{neon_type[1].lane_nox}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop, LANE = 0]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["f16", float16x4_t, '2'] + - ["f16", float16x8_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + - name: "vset_lane_{neon_type[0]}" doc: "Insert vector element from another vector element" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] @@ -8043,6 +9980,36 @@ intrinsics: - transmute - - Identifier: ['(a1, b1)', Symbol] + + - name: "vtrn{neon_type[0].no}" + doc: "Transpose elements" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vtrn]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [trn]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] + - [float16x8_t, float16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] + compose: + - Let: + - a1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[2]}"]] + - Let: + - b1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - Identifier: ['(a1, b1)', Symbol] + + - name: "vtrn{neon_type[0].no}" doc: "Transpose elements" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8168,6 +10135,35 @@ intrinsics: - transmute - - '(a0, b0)' + + - name: "vzip{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vzip.16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] + - [float16x8_t, float16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + - name: "vuzp{neon_type[0].no}" doc: Unzip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8209,6 +10205,36 @@ intrinsics: - transmute - - '(a0, b0)' + + - name: "vuzp{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] + - [float16x8_t, float16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + + - name: "vuzp{neon_type[0].no}" doc: Unzip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8430,19 +10456,75 @@ intrinsics: arch: arm - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1']] + + # vst1_f16_x2 - arm - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures to one, two, three, or four registers" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] attr: - FnCall: [cfg, ['target_arch = "arm"']] - - FnCall: [target_feature, ['enable = "neon,v7"']] + - *neon-v7 - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]] - - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - ['*mut f32', float32x2x3_t, float32x2_t] - - ['*mut f32', float32x4x3_t, float32x4_t] + - ['*mut f16', float16x4x2_t, float16x4_t] + - ['*mut f16', float16x8x2_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "ptr: {type[0]}" + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + links: + - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0f16.{neon_type[2]}" + arch: arm + - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1']] + + + # vst1_f16_x2 - aarch64 + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*mut f16", float16x4x2_t, float16x4_t] + - ["*mut f16", float16x8x2_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x2.{neon_type[2]}.p0f16" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', a]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, ['target_arch = "arm"']] + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*mut f16', float16x4x3_t, float16x4_t] + - ['*mut f16', float16x8x3_t, float16x8_t] compose: - LLVMLink: name: "vst1{neon_type[1].no}" @@ -8452,7 +10534,7 @@ intrinsics: - "b: {neon_type[2]}" - "c: {neon_type[2]}" links: - - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.{neon_type[2]}.p0" + - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0f16.{neon_type[2]}" arch: arm - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1', 'b.2']] @@ -8505,6 +10587,34 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', a]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*mut f16", float16x4x3_t, float16x4_t] + - ["*mut f16", float16x8x3_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x3.{neon_type[2]}.p0f16" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', a]] + + - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures to one, two, three, or four registers" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] @@ -8531,6 +10641,52 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'b.3', a]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*mut f16", float16x4x4_t, float16x4_t] + - ["*mut f16", float16x8x4_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "d: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x4.{neon_type[2]}.p0f16" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'b.3', a]] + + +# - name: "vst1{neon_type[1].no}" +# doc: "Store a single-element structures to one register." +# arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] +# attr: +# - *neon-v7 +# - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst1]]}]] +# - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st1]]}]] +# - *neon-fp16 +# - *neon-unstable-f16 +# safety: +# unsafe: [neon] +# types: +# - ["*mut f16", float16x4_t] +# - ["*mut f16", float16x8_t] +# compose: +# - FnCall: [core::ptr::write_unaligned, ['ptr.cast()', a]] + - name: "vfms{neon_type.no}" doc: "Floating-point fused multiply-subtract from accumulator" arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -8632,6 +10788,47 @@ intrinsics: compose: - FnCall: [simd_ge, [a, b]] + - name: "vcge{neon_type[0].no}" + doc: "Floating-point compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_ge, [a, b]] + + + - name: "vcgez{neon_type[0].no}" + doc: "Floating-point compare greater than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_ge + - - a + - FnCall: [transmute, [b]] + - name: "vclt{neon_type.no}" doc: "Compare unsigned less than" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -8857,6 +11054,61 @@ intrinsics: - link: "llvm.fptoui.sat.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtzu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: + - simd_cast + - - a + + - name: "vcvt_f16_{neon_type[0]}" + doc: "Floating-point convert to lower precision narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt.f16.f32]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtn]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float32x4_t, float16x4_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt_f32_f16" + doc: "Floating-point convert to higher precision long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtl]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, float32x4_t] + compose: + - FnCall: [simd_cast, [a]] + - name: "vmla{neon_type[0].N}" doc: "Vector multiply accumulate with scalar" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] @@ -9215,6 +11467,29 @@ intrinsics: - - a - FnCall: ["vdup{neon_type[0].N}", [b]] + + - name: "vmul{neon_type[0].N}" + doc: "Vector multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmul]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, "f16"] + - [float16x8_t, "f16"] + compose: + - FnCall: + - simd_mul + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + + - name: "vmul{type[2]}" doc: "Floating-point multiply" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] @@ -9310,6 +11585,47 @@ intrinsics: compose: - FnCall: [simd_lt, [a, b]] + - name: "vclt{neon_type[0].no}" + doc: "Floating-point compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_lt, [a, b]] + + + - name: "vcltz{neon_type[0].no}" + doc: "Floating-point compare less than" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vclt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmlt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_lt + - - a + - FnCall: [transmute, [b]] + - name: "vabdl_{neon_type[0]}" doc: "Unsigned Absolute difference Long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -9403,6 +11719,27 @@ intrinsics: - b - FnCall: ["vdup{neon_type[0].N}_vfp4", [c]] + + - name: "vfms{neon_type.no}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmls]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - Let: [b, "{neon_type}", {FnCall: [simd_neg, [b]]}] + - FnCall: ["vfma{neon_type.no}", [a, b, c]] + - name: "vqdmulh{neon_type[0].laneq_nox}" doc: "Vector saturating doubling multiply high by scalar" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] @@ -9503,6 +11840,32 @@ intrinsics: - link: "llvm.aarch64.neon.frsqrte.{neon_type}" arch: aarch64,arm64ec + + - name: "vrsqrte{neon_type.no}" + doc: "Reciprocal square-root estimate." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - *neon-fp16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsqrte]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frsqrte]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrsqrte{neon_type.no}" + links: + - link: "llvm.arm.neon.vrsqrte.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frsqrte.{neon_type}" + arch: aarch64,arm64ec + + - name: "vqshlu{neon_type[0].N}" doc: "Signed saturating shift left unsigned" arguments: ["a: {neon_type[0]}"] @@ -9594,6 +11957,27 @@ intrinsics: - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtzs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - FnCall: + - simd_cast + - - a + - name: "vqmovn_{neon_type[0]}" doc: "Unsigned saturating extract narrow" arguments: ["a: {neon_type[0]}"] @@ -9794,7 +12178,7 @@ intrinsics: - - FnCall: ["vld4{neon_type[2].dup_nox}", [{FnCall: [transmute, [a]]}]] - name: "vld1{type[0]}" - visibility: private + visibility: private doc: "Load multiple single-element structures to one, two, three, or four registers" arguments: ["a: {type[1]}", "b: {type[2]}"] return_type: "{neon_type[3]}" @@ -9824,6 +12208,32 @@ intrinsics: arch: arm - FnCall: ["_vld1{type[0]}", [a, b]] + + - name: "vld1{type[0]}" + visibility: private + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[1]}", "b: {type[2]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg, ['target_arch = "arm"']] + - FnCall: [target_feature, ['enable = "neon,v7"']] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["_v4f16", "*const i8", "i32", "float16x4_t"] + - ["q_v8f16", "*const i8", "i32", "float16x8_t"] + compose: + - LLVMLink: + name: "vld1.{type[0]}" + links: + - link: "llvm.arm.neon.vld1.{neon_type[3]}" + arch: arm + - FnCall: ["_vld1{type[0]}", [a, b]] + + - name: "vld1{neon_type[1].no}" doc: "Load multiple single-element structures to one, two, three, or four registers." arguments: ["ptr: {type[0]}"] @@ -9885,6 +12295,29 @@ intrinsics: - - 'ptr as *const i8' - '{type[4]}' + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers." + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + safety: + unsafe: [neon] + attr: + - FnCall: [cfg, ['target_arch = "arm"']] + - FnCall: [target_feature, ['enable = "{type[3]}"']] + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] + types: + - ['*const f16', float16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v4f16'] + - ['*const f16', float16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v8f16'] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld1{type[5]}" + - - 'ptr as *const i8' + - '{type[4]}' + - name: "vld1{neon_type[1].no}" doc: "Load multiple single-element structures to one, two, three, or four registers." arguments: ["ptr: {type[0]}"] @@ -10158,8 +12591,210 @@ intrinsics: - FnCall: [transmute, ["b.3"]] - c + - name: "vld4{neon_type[1].nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld4{neon_type[1].nox}" + - - "a as _" + - "2" + + - name: "vld4{neon_type[1].nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld4{neon_type[1].nox}" + - - "a as _" + + - name: "vld4{neon_type[1].dup_nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4dup.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld4{neon_type[1].dup_nox}" + - - "a as _" + - "2" + + + - name: "vld4{neon_type[1].dup_nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld4r.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld4{neon_type[1].dup_nox}" + - - "a as _" + + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld4', 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x4_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "ptr: *const f16" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "d: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld4{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "b.2" + - "b.3" + - "LANE" + - "2" + + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x4_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "d: {neon_type[3]}" + - "n: i64" + - "ptr: *const f16" + links: + - link: "llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld4{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "b.2" + - "b.3" + - "LANE as i64" + - "a as _" + - name: "vcombine{neon_type[0].noq}" - doc: "Vector combine" + doc: Join two smaller vectors into a single larger vector arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: @@ -10230,7 +12865,7 @@ intrinsics: arch: aarch64,arm64ec - link: "llvm.arm.neon.aesd" arch: arm - + - name: "vaesmcq_u8" doc: "AES mix columns." arguments: ["data: {neon_type[0]}"] @@ -11544,7 +14179,7 @@ intrinsics: - FnCall: [transmute, ['a.2']] - FnCall: [transmute, ['a.3']] - FnCall: [transmute, [b]] - + - name: "vst1{type[0]}" visibility: private doc: "Store multiple single-element structures from one, two, three, or four registers." @@ -11574,6 +14209,28 @@ intrinsics: - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0" arch: arm + - name: "vst1{type[0]}" + visibility: private + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: {type[3]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]] + types: + - ['_v4f16', '* const i8', float16x4_t, i32, '16'] + - ['q_v8f16', '* const i8', float16x8_t, i32, '16'] + compose: + - LLVMLink: + name: "_vst1{type[0]}" + links: + - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0" + arch: arm + - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures from one, two, three, or four registers." arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] @@ -11616,6 +14273,29 @@ intrinsics: - '{type[3]}' - '{type[4]}' + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[2]}"']]}]] + types: + - ['*mut f16', float16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v4f16'] + - ['*mut f16', float16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v8f16'] + compose: + - FnCall: + - "vst1{type[5]}" + - - 'ptr as *const i8' + - '{type[3]}' + - '{type[4]}' + + - name: "vshiftins{type[0]}" visibility: private doc: "Shift Right and Insert (immediate)" @@ -11780,3 +14460,75 @@ intrinsics: - - a - b - FnCall: ["{type[5]}", ["{type[6]}"]] + + - name: "vcombine{neon_type[0].no}" + doc: Join two smaller vectors into a single larger vector + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t] + compose: + - FnCall: [simd_shuffle!, [a, b, '[0, 1, 2, 3, 4, 5, 6, 7]']] + + - name: "vget_{type[2]}_{neon_type[0]}" + doc: Duplicate vector element to vector + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, 'low', "[0, 1, 2, 3]"] + - [float16x4_t, float16x8_t, 'high', "[4, 5, 6, 7]"] + compose: + - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]] + + - name: "vget{type[2]}" + doc: Duplicate vector element to scalar + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *neon-fp16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["1"]] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16, '_lane_f16', '2'] + - [float16x8_t, f16, 'q_lane_f16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: [simd_extract!, [a, "LANE as u32"]] + + - name: "vmov{neon_type[0].N}" + doc: "Duplicate element to vector" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - FnCall: ["vdup{neon_type[0].N}", [a]]