diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 629946c5015d..4e10639b902e 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -3166,6 +3166,275 @@ pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { vpminnmq_f32_(a, b) } +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshl))] +pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 { + let a: int8x8_t = vdup_n_s8(a); + let b: int8x8_t = vdup_n_s8(b); + simd_extract(vqrshl_s8(a, b), 0) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshl))] +pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 { + let a: int16x4_t = vdup_n_s16(a); + let b: int16x4_t = vdup_n_s16(b); + simd_extract(vqrshl_s16(a, b), 0) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshl))] +pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 { + let a: int32x2_t = vdup_n_s32(a); + let b: int32x2_t = vdup_n_s32(b); + simd_extract(vqrshl_s32(a, b), 0) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshl))] +pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 { + let a: int64x1_t = vdup_n_s64(a); + let b: int64x1_t = vdup_n_s64(b); + simd_extract(vqrshl_s64(a, b), 0) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshl))] +pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 { + let a: uint8x8_t = vdup_n_u8(a); + let b: int8x8_t = vdup_n_s8(b); + simd_extract(vqrshl_u8(a, b), 0) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshl))] +pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 { + let a: uint16x4_t = vdup_n_u16(a); + let b: int16x4_t = vdup_n_s16(b); + simd_extract(vqrshl_u16(a, b), 0) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshl))] +pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 { + let a: uint32x2_t = vdup_n_u32(a); + let b: int32x2_t = vdup_n_s32(b); + simd_extract(vqrshl_u32(a, b), 0) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshl))] +pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 { + let a: uint64x1_t = vdup_n_u64(a); + let b: int64x1_t = vdup_n_s64(b); + simd_extract(vqrshl_u64(a, b), 0) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrnh_n_s16(a: i16) -> i8 { + static_assert!(N : i32 where N >= 1 && N <= 8); + let a: int16x8_t = vdupq_n_s16(a); + simd_extract(vqrshrn_n_s16::(a), 0) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrns_n_s32(a: i32) -> i16 { + static_assert!(N : i32 where N >= 1 && N <= 16); + let a: int32x4_t = vdupq_n_s32(a); + simd_extract(vqrshrn_n_s32::(a), 0) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrnd_n_s64(a: i64) -> i32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + let a: int64x2_t = vdupq_n_s64(a); + simd_extract(vqrshrn_n_s64::(a), 0) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vqrshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vqrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vqrshrn_n_s64::(b), [0, 1, 2, 3]) +} + +/// Unsigned saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrnh_n_u16(a: u16) -> u8 { + static_assert!(N : i32 where N >= 1 && N <= 8); + let a: uint16x8_t = vdupq_n_u16(a); + simd_extract(vqrshrn_n_u16::(a), 0) +} + +/// Unsigned saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrns_n_u32(a: u32) -> u16 { + static_assert!(N : i32 where N >= 1 && N <= 16); + let a: uint32x4_t = vdupq_n_u32(a); + simd_extract(vqrshrn_n_u32::(a), 0) +} + +/// Unsigned saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrnd_n_u64(a: u64) -> u32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + let a: uint64x2_t = vdupq_n_u64(a); + simd_extract(vqrshrn_n_u64::(a), 0) +} + +/// Unsigned saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vqrshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Unsigned saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vqrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Unsigned saturating rounded shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vqrshrn_n_u64::(b), [0, 1, 2, 3]) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrunh_n_s16(a: i16) -> u8 { + static_assert!(N : i32 where N >= 1 && N <= 8); + let a: int16x8_t = vdupq_n_s16(a); + simd_extract(vqrshrun_n_s16::(a), 0) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshruns_n_s32(a: i32) -> u16 { + static_assert!(N : i32 where N >= 1 && N <= 16); + let a: int32x4_t = vdupq_n_s32(a); + simd_extract(vqrshrun_n_s32::(a), 0) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrund_n_s64(a: i64) -> u32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + let a: int64x2_t = vdupq_n_s64(a); + simd_extract(vqrshrun_n_s64::(a), 0) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqrshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vqrshrun_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqrshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vqrshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqrshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vqrshrun_n_s64::(b), [0, 1, 2, 3]) +} + /// Calculates the square root of each lane. #[inline] #[target_feature(enable = "neon")] @@ -7919,6 +8188,231 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlb_s8() { + let a: i8 = 1; + let b: i8 = 2; + let e: i8 = 4; + let r: i8 = transmute(vqrshlb_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlh_s16() { + let a: i16 = 1; + let b: i16 = 2; + let e: i16 = 4; + let r: i16 = transmute(vqrshlh_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshls_s32() { + let a: i32 = 1; + let b: i32 = 2; + let e: i32 = 4; + let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshld_s64() { + let a: i64 = 1; + let b: i64 = 2; + let e: i64 = 4; + let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlb_u8() { + let a: u8 = 1; + let b: i8 = 2; + let e: u8 = 4; + let r: u8 = transmute(vqrshlb_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlh_u16() { + let a: u16 = 1; + let b: i16 = 2; + let e: u16 = 4; + let r: u16 = transmute(vqrshlh_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshls_u32() { + let a: u32 = 1; + let b: i32 = 2; + let e: u32 = 4; + let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshld_u64() { + let a: u64 = 1; + let b: i64 = 2; + let e: u64 = 4; + let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrnh_n_s16() { + let a: i16 = 4; + let e: i8 = 1; + let r: i8 = transmute(vqrshrnh_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrns_n_s32() { + let a: i32 = 4; + let e: i16 = 1; + let r: i16 = transmute(vqrshrns_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrnd_n_s64() { + let a: i64 = 4; + let e: i32 = 1; + let r: i32 = transmute(vqrshrnd_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_high_n_s16() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 6, 7); + let b: i16x8 = i16x8::new(8, 12, 24, 28, 48, 52, 56, 60); + let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15); + let r: i8x16 = transmute(vqrshrn_high_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_high_n_s32() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(8, 12, 24, 28); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 6, 7); + let r: i16x8 = transmute(vqrshrn_high_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_high_n_s64() { + let a: i32x2 = i32x2::new(0, 1); + let b: i64x2 = i64x2::new(8, 12); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vqrshrn_high_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrnh_n_u16() { + let a: u16 = 4; + let e: u8 = 1; + let r: u8 = transmute(vqrshrnh_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrns_n_u32() { + let a: u32 = 4; + let e: u16 = 1; + let r: u16 = transmute(vqrshrns_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrnd_n_u64() { + let a: u64 = 4; + let e: u32 = 1; + let r: u32 = transmute(vqrshrnd_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_high_n_u16() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 6, 7); + let b: u16x8 = u16x8::new(8, 12, 24, 28, 48, 52, 56, 60); + let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15); + let r: u8x16 = transmute(vqrshrn_high_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_high_n_u32() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(8, 12, 24, 28); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 6, 7); + let r: u16x8 = transmute(vqrshrn_high_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_high_n_u64() { + let a: u32x2 = u32x2::new(0, 1); + let b: u64x2 = u64x2::new(8, 12); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vqrshrn_high_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrunh_n_s16() { + let a: i16 = 4; + let e: u8 = 1; + let r: u8 = transmute(vqrshrunh_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshruns_n_s32() { + let a: i32 = 4; + let e: u16 = 1; + let r: u16 = transmute(vqrshruns_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrund_n_s64() { + let a: i64 = 4; + let e: u32 = 1; + let r: u32 = transmute(vqrshrund_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrun_high_n_s16() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 6, 7); + let b: i16x8 = i16x8::new(8, 12, 24, 28, 48, 52, 56, 60); + let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15); + let r: u8x16 = transmute(vqrshrun_high_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrun_high_n_s32() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(8, 12, 24, 28); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 6, 7); + let r: u16x8 = transmute(vqrshrun_high_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrun_high_n_s64() { + let a: u32x2 = u32x2::new(0, 1); + let b: i64x2 = i64x2::new(8, 12); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vqrshrun_high_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vsqrt_f32() { let a: f32x2 = f32x2::new(4.0, 9.0); diff --git a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs index 2075ae8c6474..195a3121c3c9 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs @@ -5886,6 +5886,550 @@ pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { vminnmq_f32_(a, b) } +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i8")] + fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqrshl_s8_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v16i8")] + fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqrshlq_s8_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i16")] + fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqrshl_s16_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i16")] + fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqrshlq_s16_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i32")] + fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqrshl_s32_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i32")] + fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqrshlq_s32_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v1i64")] + fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vqrshl_s64_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i64")] + fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vqrshlq_s64_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i8")] + fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vqrshl_u8_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v16i8")] + fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vqrshlq_u8_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i16")] + fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vqrshl_u16_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i16")] + fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vqrshlq_u16_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i32")] + fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vqrshl_u32_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i32")] + fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vqrshlq_u32_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v1i64")] + fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vqrshl_u64_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i64")] + fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vqrshlq_u64_(a, b) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")] + fn vqrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } +vqrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v8i8")] + fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + } +vqrshrn_n_s16_(a, N) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")] + fn vqrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } +vqrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v4i16")] + fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; + } +vqrshrn_n_s32_(a, N) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")] + fn vqrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } +vqrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v2i32")] + fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; + } +vqrshrn_n_s64_(a, N) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] + fn vqrshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } +vqrshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16)) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v8i8")] + fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t; + } +vqrshrn_n_u16_(a, N) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] + fn vqrshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } +vqrshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32)) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v4i16")] + fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t; + } +vqrshrn_n_u32_(a, N) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] + fn vqrshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } +vqrshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64)) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v2i32")] + fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t; + } +vqrshrn_n_u64_(a, N) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")] + fn vqrshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } +vqrshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v8i8")] + fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t; + } +vqrshrun_n_s16_(a, N) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")] + fn vqrshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } +vqrshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v4i16")] + fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t; + } +vqrshrun_n_s32_(a, N) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")] + fn vqrshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } +vqrshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v2i32")] + fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t; + } +vqrshrun_n_s64_(a, N) +} + /// Reciprocal square-root estimate. #[inline] #[target_feature(enable = "neon")] @@ -13474,6 +14018,222 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_s8() { + let a: i8x8 = i8x8::new(-128, 0x7F, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x8 = i8x8::new(-128, 0x7F, 8, 12, 16, 20, 24, 28); + let r: i8x8 = transmute(vqrshl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_s8() { + let a: i8x16 = i8x16::new(-128, 0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x16 = i8x16::new(-128, 0x7F, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: i8x16 = transmute(vqrshlq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_s16() { + let a: i16x4 = i16x4::new(-32768, 0x7F_FF, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(-32768, 0x7F_FF, 8, 12); + let r: i16x4 = transmute(vqrshl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_s16() { + let a: i16x8 = i16x8::new(-32768, 0x7F_FF, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(-32768, 0x7F_FF, 8, 12, 16, 20, 24, 28); + let r: i16x8 = transmute(vqrshlq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_s32() { + let a: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); + let r: i32x2 = transmute(vqrshl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_s32() { + let a: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 8, 12); + let r: i32x4 = transmute(vqrshlq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_s64() { + let a: i64x1 = i64x1::new(-9223372036854775808); + let b: i64x1 = i64x1::new(2); + let e: i64x1 = i64x1::new(-9223372036854775808); + let r: i64x1 = transmute(vqrshl_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_s64() { + let a: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let b: i64x2 = i64x2::new(2, 2); + let e: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let r: i64x2 = transmute(vqrshlq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_u8() { + let a: u8x8 = u8x8::new(0, 0xFF, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x8 = u8x8::new(0, 0xFF, 8, 12, 16, 20, 24, 28); + let r: u8x8 = transmute(vqrshl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_u8() { + let a: u8x16 = u8x16::new(0, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x16 = u8x16::new(0, 0xFF, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: u8x16 = transmute(vqrshlq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_u16() { + let a: u16x4 = u16x4::new(0, 0xFF_FF, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: u16x4 = u16x4::new(0, 0xFF_FF, 8, 12); + let r: u16x4 = transmute(vqrshl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_u16() { + let a: u16x8 = u16x8::new(0, 0xFF_FF, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u16x8 = u16x8::new(0, 0xFF_FF, 8, 12, 16, 20, 24, 28); + let r: u16x8 = transmute(vqrshlq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_u32() { + let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let b: i32x2 = i32x2::new(2, 2); + let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vqrshl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_u32() { + let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 8, 12); + let r: u32x4 = transmute(vqrshlq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_u64() { + let a: u64x1 = u64x1::new(0); + let b: i64x1 = i64x1::new(2); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vqrshl_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_u64() { + let a: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let b: i64x2 = i64x2::new(2, 2); + let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vqrshlq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_s16() { + let a: i16x8 = i16x8::new(-32768, 4, 8, 12, 16, 20, 24, 28); + let e: i8x8 = i8x8::new(-128, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vqrshrn_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_s32() { + let a: i32x4 = i32x4::new(-2147483648, 4, 8, 12); + let e: i16x4 = i16x4::new(-32768, 1, 2, 3); + let r: i16x4 = transmute(vqrshrn_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_s64() { + let a: i64x2 = i64x2::new(-9223372036854775808, 4); + let e: i32x2 = i32x2::new(-2147483648, 1); + let r: i32x2 = transmute(vqrshrn_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_u16() { + let a: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vqrshrn_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_u32() { + let a: u32x4 = u32x4::new(0, 4, 8, 12); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vqrshrn_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_u64() { + let a: u64x2 = u64x2::new(0, 4); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vqrshrn_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrun_n_s16() { + let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vqrshrun_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrun_n_s32() { + let a: i32x4 = i32x4::new(0, 4, 8, 12); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vqrshrun_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrun_n_s64() { + let a: i64x2 = i64x2::new(0, 4); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vqrshrun_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vrsqrte_f32() { let a: f32x2 = f32x2::new(1.0, 2.0); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index d122594fb175..33418b012209 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -2007,6 +2007,197 @@ aarch64 = fminnmp link-aarch64 = fminnmp._EXT_ generate float32x4_t:float32x4_t:float32x4_t +/// Signed saturating rounding shift left +name = vqrshl +a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate MIN, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 + +aarch64 = sqrshl +link-aarch64 = sqrshl._EXT_ + +arm = vqrshl +link-arm = vqrshifts._EXT_ +generate int*_t, int64x*_t + +/// Signed saturating rounding shift left +name = vqrshl +multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a +multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b +multi_fn = simd_extract, {vqrshl-in_ntt-noext, a, b}, 0 +a = 1 +b = 2 +validate 4 + +aarch64 = sqrshl +generate i8, i16, i32, i64 + +/// Unsigned signed saturating rounding shift left +name = vqrshl +out-suffix +a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate 0, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 + +aarch64 = uqrshl +link-aarch64 = uqrshl._EXT_ + +arm = vqrshl +link-arm = vqrshiftu._EXT_ +generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t +generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t + +/// Unsigned signed saturating rounding shift left +name = vqrshl +out-suffix +multi_fn = vdup_n-out_ntt-noext, a:out_ntt, a +multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b +multi_fn = simd_extract, {vqrshl-out_ntt-noext, a, b}, 0 +a = 1 +b = 2 +validate 4 + +aarch64 = uqrshl +generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64 + +/// Signed saturating rounded shift right narrow +name = vqrshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +a = MIN, 4, 8, 12, 16, 20, 24, 28 +n = 2 +validate MIN, 1, 2, 3, 4, 5, 6, 7 + +aarch64 = sqrshrn +link-aarch64 = sqrshrn._EXT2_ +const-aarch64 = N + +arm = vqrshrn +link-arm = vqrshiftns._EXT2_ +const-arm = -N as ttn +generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t + +/// Signed saturating rounded shift right narrow +name = vqrshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a +multi_fn = simd_extract, {vqrshrn_n-in_ntt-::, a}, 0 +a = 4 +n = 2 +validate 1 + +aarch64 = sqrshrn +generate i16:i8, i32:i16, i64:i32 + +/// Signed saturating rounded shift right narrow +name = vqrshrn_high +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_shuffle-out_len-noext, a, {vqrshrn_n-noqself-::, b}, {asc-0-out_len} +a = 0, 1, 2, 3, 2, 3, 6, 7 +b = 8, 12, 24, 28, 48, 52, 56, 60 +n = 2 +validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15 + +aarch64 = sqrshrn2 +generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t + +/// Unsigned signed saturating rounded shift right narrow +name = vqrshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +a = MIN, 4, 8, 12, 16, 20, 24, 28 +n = 2 +validate 0, 1, 2, 3, 4, 5, 6, 7 + +aarch64 = uqrshrn +link-aarch64 = uqrshrn._EXT2_ +const-aarch64 = N + +arm = vqrshrn +link-arm = vqrshiftnu._EXT2_ +const-arm = -N as ttn +generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t + +/// Unsigned saturating rounded shift right narrow +name = vqrshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a +multi_fn = simd_extract, {vqrshrn_n-in_ntt-::, a}, 0 +a = 4 +n = 2 +validate 1 + +aarch64 = uqrshrn +generate u16:u8, u32:u16, u64:u32 + +/// Unsigned saturating rounded shift right narrow +name = vqrshrn_high +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_shuffle-out_len-noext, a, {vqrshrn_n-noqself-::, b}, {asc-0-out_len} +a = 0, 1, 2, 3, 2, 3, 6, 7 +b = 8, 12, 24, 28, 48, 52, 56, 60 +n = 2 +validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15 + +aarch64 = uqrshrn2 +generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t + +/// Signed saturating rounded shift right unsigned narrow +name = vqrshrun +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +a = 0, 4, 8, 12, 16, 20, 24, 28 +n = 2 +validate 0, 1, 2, 3, 4, 5, 6, 7 + +aarch64 = sqrshrun +link-aarch64 = sqrshrun._EXT2_ +const-aarch64 = N + +arm = vqrshrun +link-arm = vqrshiftnsu._EXT2_ +const-arm = -N as ttn +generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t + +/// Signed saturating rounded shift right unsigned narrow +name = vqrshrun +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a +multi_fn = simd_extract, {vqrshrun_n-in_ntt-::, a}, 0 +a = 4 +n = 2 +validate 1 + +aarch64 = sqrshrun +generate i16:u8, i32:u16, i64:u32 + +/// Signed saturating rounded shift right unsigned narrow +name = vqrshrun_high +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_shuffle-out_len-noext, a, {vqrshrun_n-noqself-::, b}, {asc-0-out_len} +a = 0, 1, 2, 3, 2, 3, 6, 7 +b = 8, 12, 24, 28, 48, 52, 56, 60 +n = 2 +validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15 + +aarch64 = sqrshrun2 +generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t + /// Calculates the square root of each lane. name = vsqrt fn = simd_fsqrt diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index 371bf71058c7..5ba466bd735f 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -88,10 +88,13 @@ fn type_len(t: &str) -> usize { fn type_bits(t: &str) -> usize { match t { - "int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t" => 8, - "int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t" => 16, - "int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" => 32, - "int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t" => 64, + "int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t" + | "i8" | "u8" => 8, + "int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t" + | "i16" | "u16" => 16, + "int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" | "i32" | "u32" => 32, + "int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t" + | "i64" | "u64" => 64, _ => panic!("unknown type: {}", t), } } @@ -220,6 +223,34 @@ fn type_to_n_suffix(t: &str) -> &str { } } +fn type_to_noq_n_suffix(t: &str) -> &str { + match t { + "int8x8_t" | "int8x16_t" => "_n_s8", + "int16x4_t" | "int16x8_t" => "_n_s16", + "int32x2_t" | "int32x4_t" => "_n_s32", + "int64x1_t" | "int64x2_t" => "_n_s64", + "uint8x8_t" | "uint8x16_t" => "_n_u8", + "uint16x4_t" | "uint16x8_t" => "_n_u16", + "uint32x2_t" | "uint32x4_t" => "_n_u32", + "uint64x1_t" | "uint64x2_t" => "_n_u64", + "float16x4_t" | "float16x8_t" => "_n_f16", + "float32x2_t" | "float32x4_t" => "_n_f32", + "float64x1_t" | "float64x2_t" => "_n_f64", + "poly8x8_t" | "poly8x16_t" => "_n_p8", + "poly16x4_t" | "poly16x8_t" => "_n_p16", + "poly64x1_t" | "poly64x2_t" => "_n_p64", + "i8" => "b_n_s8", + "i16" => "h_n_s16", + "i32" => "s_n_s32", + "i64" => "d_n_s64", + "u8" => "b_n_u8", + "u16" => "h_n_u16", + "u32" => "s_n_u32", + "u64" => "d_n_u64", + _ => panic!("unknown type: {}", t), + } +} + fn type_to_lane_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String { let mut str = String::new(); let suf = type_to_suffix(out_t); @@ -323,6 +354,7 @@ enum Suffix { NoQ, NoQDouble, NSuffix, + NoQNSuffix, OutSuffix, Lane, } @@ -381,30 +413,52 @@ fn type_to_global_type(t: &str) -> &str { fn type_to_native_type(t: &str) -> &str { match t { - "int8x8_t" => "i8", - "int8x16_t" => "i8", - "int16x4_t" => "i16", - "int16x8_t" => "i16", - "int32x2_t" => "i32", - "int32x4_t" => "i32", - "int64x1_t" => "i64", - "int64x2_t" => "i64", - "uint8x8_t" => "u8", - "uint8x16_t" => "u8", - "uint16x4_t" => "u16", - "uint16x8_t" => "u16", - "uint32x2_t" => "u32", - "uint32x4_t" => "u32", - "uint64x1_t" => "u64", - "uint64x2_t" => "u64", - "float16x4_t" => "f16", - "float16x8_t" => "f16", - "float32x2_t" => "f32", - "float32x4_t" => "f32", - "float64x1_t" => "f64", - "float64x2_t" => "f64", - "poly64x1_t" => "u64", - "poly64x2_t" => "u64", + "int8x8_t" | "int8x16_t" | "i8" => "i8", + "int16x4_t" | "int16x8_t" | "i16" => "i16", + "int32x2_t" | "int32x4_t" | "i32" => "i32", + "int64x1_t" | "int64x2_t" | "i64" => "i64", + "uint8x8_t" | "uint8x16_t" | "u8" => "u8", + "uint16x4_t" | "uint16x8_t" | "u16" => "u16", + "uint32x2_t" | "uint32x4_t" | "u32" => "u32", + "uint64x1_t" | "uint64x2_t" | "u64" => "u64", + "float16x4_t" | "float16x8_t" => "f16", + "float32x2_t" | "float32x4_t" => "f32", + "float64x1_t" | "float64x2_t" => "f64", + "poly64x1_t" | "poly64x2_t" => "u64", + _ => panic!("unknown type: {}", t), + } +} + +fn native_type_to_type(t: &str) -> &str { + match t { + "i8" => "int8x8_t", + "i16" => "int16x4_t", + "i32" => "int32x2_t", + "i64" => "int64x1_t", + "u8" => "uint8x8_t", + "u16" => "uint16x4_t", + "u32" => "uint32x2_t", + "u64" => "uint64x1_t", + "f16" => "float16x4_t", + "f32" => "float32x2_t", + "f64" => "float64x1_t", + _ => panic!("unknown type: {}", t), + } +} + +fn native_type_to_long_type(t: &str) -> &str { + match t { + "i8" => "int8x16_t", + "i16" => "int16x8_t", + "i32" => "int32x4_t", + "i64" => "int64x2_t", + "u8" => "uint8x16_t", + "u16" => "uint16x8_t", + "u32" => "uint32x4_t", + "u64" => "uint64x2_t", + "f16" => "float16x8_t", + "f32" => "float32x4_t", + "f64" => "float64x2_t", _ => panic!("unknown type: {}", t), } } @@ -437,6 +491,14 @@ fn type_to_ext(t: &str) -> &str { "poly8x16_t" => "v16i8", "poly16x4_t" => "v4i16", "poly16x8_t" => "v8i16", + "i8" => "v8i8", + "i16" => "v4i16", + "i32" => "v2i32", + "i64" => "v1i64", + "u8" => "v8i8", + "u16" => "v4i16", + "u32" => "v2i32", + "u64" => "v1i64", /* "poly64x1_t" => "i64x1", "poly64x2_t" => "i64x2", @@ -757,6 +819,7 @@ fn gen_aarch64( current_name: &str, current_aarch64: &Option, link_aarch64: &Option, + const_aarch64: &Option, constn: &Option, in_t: &[&str; 3], out_t: &str, @@ -788,6 +851,7 @@ fn gen_aarch64( type_to_noq_double_suffixes(out_t, in_t[1]) ), NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])), + NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])), OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])), }; @@ -799,25 +863,21 @@ fn gen_aarch64( ) } current_fn - } else if !multi_fn.is_empty() { - if link_aarch64.is_some() { - panic!( - "[{}] Can't specify link and (multi) fn at the same time.", - name - ) - } - String::new() + } else if link_aarch64.is_some() { + format!("{}_", name) } else { - if link_aarch64.is_none() { + if multi_fn.is_empty() { panic!( "[{}] Either (multi) fn or link-aarch have to be specified.", name ) } - format!("{}_", name) + String::new() }; let current_aarch64 = current_aarch64.clone().unwrap(); - let ext_c = if let Some(link_aarch64) = link_aarch64.clone() { + let mut ext_c = String::new(); + let mut ext_c_const = String::new(); + if let Some(link_aarch64) = link_aarch64.clone() { let ext = type_to_ext(in_t[0]); let ext2 = type_to_ext(out_t); let link_aarch64 = if link_aarch64.starts_with("llvm") { @@ -827,7 +887,7 @@ fn gen_aarch64( link.push_str(&link_aarch64); link.replace("_EXT_", ext).replace("_EXT2_", ext2) }; - format!( + ext_c = format!( r#"#[allow(improper_ctypes)] extern "C" {{ #[cfg_attr(target_arch = "aarch64", link_name = "{}")] @@ -849,9 +909,32 @@ fn gen_aarch64( _ => unimplemented!("unknown para_num"), }, out_t - ) - } else { - String::new() + ); + if const_aarch64.is_some() { + ext_c_const = format!( + r#"#[allow(improper_ctypes)] + extern "C" {{ + #[cfg_attr(target_arch = "aarch64", link_name = "{}")] + fn {}({}) -> {}; + }} + "#, + link_aarch64, + current_fn, + match para_num { + 1 => { + format!("a: {}, n: i32", in_t[0]) + } + 2 => { + format!("a: {}, b: {}, n: i32", in_t[0], in_t[1]) + } + 3 => { + format!("a: {}, b: {}, c: {}, n: i32", in_t[0], in_t[1], in_t[2]) + } + _ => unimplemented!("unknown para_num"), + }, + out_t + ); + } }; let multi_calls = if !multi_fn.is_empty() { let mut calls = String::new(); @@ -891,60 +974,93 @@ fn gen_aarch64( } else { String::new() }; - let call = match (multi_calls.len(), para_num, fixed.len()) { - (0, 1, 0) => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ - {}{}(a) -}}"#, - name, const_declare, in_t[0], out_t, ext_c, current_fn, - ), - (0, 1, _) => { - let fixed: Vec = fixed.iter().take(type_len(in_t[0])).cloned().collect(); - format!( + let call = if let Some(const_aarch64) = const_aarch64 { + match para_num { + 1 => format!( r#"pub unsafe fn {}{}(a: {}) -> {} {{ - let b{}; - {}{}(a, transmute(b)) + {} + {}{}(a, {}) }}"#, name, const_declare, in_t[0], out_t, - values(in_t[0], &fixed), - ext_c, + multi_calls, + ext_c_const, current_fn, - ) + const_aarch64 + ), + 2 => format!( + r#"pub unsafe fn {}{}(a: {}) -> {} {{ + {}{}{}(a, b, {}) +}}"#, + name, + const_declare, + in_t[0], + out_t, + multi_calls, + ext_c_const, + current_fn, + const_aarch64 + ), + _ => String::new(), } - (0, 2, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ + } else { + match (multi_calls.len(), para_num, fixed.len()) { + (0, 1, 0) => format!( + r#"pub unsafe fn {}{}(a: {}) -> {} {{ + {}{}(a) +}}"#, + name, const_declare, in_t[0], out_t, ext_c, current_fn, + ), + (0, 1, _) => { + let fixed: Vec = fixed.iter().take(type_len(in_t[0])).cloned().collect(); + format!( + r#"pub unsafe fn {}{}(a: {}) -> {} {{ + let b{}; + {}{}(a, transmute(b)) +}}"#, + name, + const_declare, + in_t[0], + out_t, + values(in_t[0], &fixed), + ext_c, + current_fn, + ) + } + (0, 2, _) => format!( + r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ {}{}(a, b) }}"#, - name, const_declare, in_t[0], in_t[1], out_t, ext_c, current_fn, - ), - (0, 3, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ + name, const_declare, in_t[0], in_t[1], out_t, ext_c, current_fn, + ), + (0, 3, _) => format!( + r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ {}{}(a, b, c) }}"#, - name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, - ), - (_, 1, _) => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ + name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, + ), + (_, 1, _) => format!( + r#"pub unsafe fn {}{}(a: {}) -> {} {{ {}{} }}"#, - name, const_declare, in_t[0], out_t, ext_c, multi_calls, - ), - (_, 2, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ + name, const_declare, in_t[0], out_t, ext_c, multi_calls, + ), + (_, 2, _) => format!( + r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ {}{} }}"#, - name, const_declare, in_t[0], in_t[1], out_t, ext_c, multi_calls, - ), - (_, 3, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ + name, const_declare, in_t[0], in_t[1], out_t, ext_c, multi_calls, + ), + (_, 3, _) => format!( + r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ {}{} }}"#, - name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, - ), - (_, _, _) => String::new(), + name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, + ), + (_, _, _) => String::new(), + } }; let function = format!( r#" @@ -1074,6 +1190,8 @@ fn gen_arm( link_arm: &Option, current_aarch64: &Option, link_aarch64: &Option, + const_arm: &Option, + const_aarch64: &Option, constn: &Option, in_t: &[&str; 3], out_t: &str, @@ -1106,6 +1224,7 @@ fn gen_arm( type_to_noq_double_suffixes(out_t, in_t[1]) ), NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])), + NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])), OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])), }; @@ -1126,69 +1245,118 @@ fn gen_arm( ) } current_fn - } else if !multi_fn.is_empty() { - if link_aarch64.is_some() || link_arm.is_some() { - panic!( - "[{}] Can't specify link and function at the same time. multi_fn / {:?} / {:?}", - name, link_aarch64, link_arm - ) - } - String::new() + } else if link_aarch64.is_some() || link_arm.is_some() { + format!("{}_", name) } else { - if link_aarch64.is_none() && link_arm.is_none() { + if multi_fn.is_empty() { panic!( "[{}] Either fn or link-arm and link-aarch have to be specified.", name ) } - format!("{}_", name) + String::new() }; - let ext_c = - if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) { - let ext = type_to_ext(in_t[0]); - let ext2 = type_to_ext(out_t); - let link_arm = if link_arm.starts_with("llvm") { - link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2) - } else { - let mut link = String::from("llvm.arm.neon."); - link.push_str(&link_arm); - link.replace("_EXT_", ext).replace("_EXT2_", ext2) - }; - let link_aarch64 = if link_aarch64.starts_with("llvm") { - link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2) - } else { - let mut link = String::from("llvm.aarch64.neon."); - link.push_str(&link_aarch64); - link.replace("_EXT_", ext).replace("_EXT2_", ext2) - }; - format!( - r#"#[allow(improper_ctypes)] + let mut ext_c = String::new(); + let mut ext_c_const_arm = String::new(); + let mut ext_c_const_aarch64 = String::new(); + if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) { + let ext = type_to_ext(in_t[0]); + let ext2 = type_to_ext(out_t); + let link_arm = if link_arm.starts_with("llvm") { + link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2) + } else { + let mut link = String::from("llvm.arm.neon."); + link.push_str(&link_arm); + link.replace("_EXT_", ext).replace("_EXT2_", ext2) + }; + let link_aarch64 = if link_aarch64.starts_with("llvm") { + link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2) + } else { + let mut link = String::from("llvm.aarch64.neon."); + link.push_str(&link_aarch64); + link.replace("_EXT_", ext).replace("_EXT2_", ext2) + }; + ext_c = format!( + r#"#[allow(improper_ctypes)] extern "C" {{ #[cfg_attr(target_arch = "arm", link_name = "{}")] #[cfg_attr(target_arch = "aarch64", link_name = "{}")] fn {}({}) -> {}; }} +"#, + link_arm, + link_aarch64, + current_fn, + match para_num { + 1 => { + format!("a: {}", in_t[0]) + } + 2 => { + format!("a: {}, b: {}", in_t[0], in_t[1]) + } + 3 => { + format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2]) + } + _ => unimplemented!("unknown para_num"), + }, + out_t + ); + if const_arm.is_some() { + ext_c_const_arm = format!( + r#" + #[allow(improper_ctypes)] + extern "C" {{ + #[cfg_attr(target_arch = "arm", link_name = "{}")] + fn {}({}) -> {}; + }} "#, link_arm, - link_aarch64, current_fn, match para_num { 1 => { - format!("a: {}", in_t[0]) + format!("a: {}, n: {}", in_t[0], in_t[0]) } 2 => { - format!("a: {}, b: {}", in_t[0], in_t[1]) + format!("a: {}, b: {}, n: {}", in_t[0], in_t[1], in_t[1]) } 3 => { - format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2]) + format!( + "a: {}, b: {}, c: {}, n: {}", + in_t[0], in_t[1], in_t[2], in_t[2] + ) } _ => unimplemented!("unknown para_num"), }, out_t - ) - } else { - String::new() - }; + ); + } + if const_aarch64.is_some() { + ext_c_const_aarch64 = format!( + r#" + #[allow(improper_ctypes)] + extern "C" {{ + #[cfg_attr(target_arch = "aarch64", link_name = "{}")] + fn {}({}) -> {}; + }} +"#, + link_aarch64, + current_fn, + match para_num { + 1 => { + format!("a: {}, n: i32", in_t[0]) + } + 2 => { + format!("a: {}, b: {}, n: i32", in_t[0], in_t[1]) + } + 3 => { + format!("a: {}, b: {}, c: {}, n: i32", in_t[0], in_t[1], in_t[2]) + } + _ => unimplemented!("unknown para_num"), + }, + out_t + ); + } + }; let multi_calls = if !multi_fn.is_empty() { let mut calls = String::new(); for i in 0..multi_fn.len() { @@ -1282,8 +1450,99 @@ fn gen_arm( ), (_, _, _) => String::new(), }; - let function = format!( - r#" + let call_const_arm = if let Some(const_arm) = const_arm { + let const_arm = const_arm.replace("ttn", type_to_native_type(in_t[1])); + let mut cnt = String::from(in_t[1]); + cnt.push_str("("); + for i in 0..type_len(in_t[1]) { + if i != 0 { + cnt.push_str(", "); + } + cnt.push_str(&const_arm); + } + cnt.push_str(")"); + match para_num { + 1 => format!( + r#"pub unsafe fn {}{}(a: {}) -> {} {{ + {}{}{}(a, {}) +}}"#, + name, const_declare, in_t[0], out_t, multi_calls, ext_c_const_arm, current_fn, cnt + ), + 2 => format!( + r#"pub unsafe fn {}{}(a: {}) -> {} {{ + {}{}{}(a, b, {}) +}}"#, + name, const_declare, in_t[0], out_t, multi_calls, ext_c_const_arm, current_fn, cnt + ), + _ => String::new(), + } + } else { + String::new() + }; + let call_const_aarch64 = if let Some(const_aarch64) = const_aarch64 { + match para_num { + 1 => format!( + r#"pub unsafe fn {}{}(a: {}) -> {} {{ + {}{}{}(a, {}) +}}"#, + name, + const_declare, + in_t[0], + out_t, + multi_calls, + ext_c_const_aarch64, + current_fn, + const_aarch64 + ), + 2 => format!( + r#"pub unsafe fn {}{}(a: {}) -> {} {{ + {}{}{}(a, b, {}) +}}"#, + name, + const_declare, + in_t[0], + out_t, + multi_calls, + ext_c_const_aarch64, + current_fn, + const_aarch64 + ), + _ => String::new(), + } + } else { + String::new() + }; + let function = if const_arm.is_some() && const_aarch64.is_some() { + format!( + r#" +{} +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}{}))]{} +{} + +{} +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}{}))]{} +{} +"#, + current_comment, + expand_intrinsic(¤t_arm, in_t[1]), + const_assert, + const_legacy, + call_const_arm, + current_comment, + expand_intrinsic(¤t_aarch64, in_t[1]), + const_assert, + const_legacy, + call_const_aarch64, + ) + } else { + format!( + r#" {} #[inline] #[target_feature(enable = "neon")] @@ -1292,15 +1551,16 @@ fn gen_arm( #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}{}))]{} {} "#, - current_comment, - current_target, - expand_intrinsic(¤t_arm, in_t[1]), - const_assert, - expand_intrinsic(¤t_aarch64, in_t[1]), - const_assert, - const_legacy, - call, - ); + current_comment, + current_target, + expand_intrinsic(¤t_arm, in_t[1]), + const_assert, + expand_intrinsic(¤t_aarch64, in_t[1]), + const_assert, + const_legacy, + call, + ) + }; let test = gen_test( &name, in_t, @@ -1611,6 +1871,18 @@ fn get_call( re = Some((re_params[0].clone(), out_t.to_string())); } else if re_params[1] == "half" { re = Some((re_params[0].clone(), type_to_half(in_t[1]).to_string())); + } else if re_params[1] == "in_ntt" { + re = Some(( + re_params[0].clone(), + native_type_to_type(in_t[1]).to_string(), + )); + } else if re_params[1] == "in_long_ntt" { + re = Some(( + re_params[0].clone(), + native_type_to_long_type(in_t[1]).to_string(), + )); + } else if re_params[1] == "out_ntt" { + re = Some((re_params[0].clone(), native_type_to_type(out_t).to_string())); } else { re = Some((re_params[0].clone(), re_params[1].clone())); } @@ -1669,6 +1941,10 @@ fn get_call( fn_name.push_str(&(type_len(in_t[1]) / 2).to_string()); } else if fn_format[1] == "nout" { fn_name.push_str(type_to_n_suffix(out_t)); + } else if fn_format[1] == "in_ntt" { + fn_name.push_str(type_to_suffix(native_type_to_type(in_t[1]))); + } else if fn_format[1] == "out_ntt" { + fn_name.push_str(type_to_suffix(native_type_to_type(out_t))); } else { fn_name.push_str(&fn_format[1]); }; @@ -1725,6 +2001,8 @@ fn main() -> io::Result<()> { let mut current_aarch64: Option = None; let mut link_arm: Option = None; let mut link_aarch64: Option = None; + let mut const_arm: Option = None; + let mut const_aarch64: Option = None; let mut constn: Option = None; let mut para_num = 2; let mut suffix: Suffix = Normal; @@ -1810,6 +2088,8 @@ mod test { current_aarch64 = None; link_aarch64 = None; link_arm = None; + const_aarch64 = None; + const_arm = None; current_tests = Vec::new(); constn = None; para_num = 2; @@ -1842,6 +2122,8 @@ mod test { suffix = NoQDouble; } else if line.starts_with("n-suffix") { suffix = NSuffix; + } else if line.starts_with("noq-n-suffix") { + suffix = NoQNSuffix; } else if line.starts_with("out-suffix") { suffix = OutSuffix; } else if line.starts_with("lane-suffixes") { @@ -1861,8 +2143,12 @@ mod test { current_tests.push((a.clone(), b.clone(), c.clone(), n.clone(), e)); } else if line.starts_with("link-aarch64 = ") { link_aarch64 = Some(String::from(&line[15..])); + } else if line.starts_with("const-aarch64 = ") { + const_aarch64 = Some(String::from(&line[16..])); } else if line.starts_with("link-arm = ") { link_arm = Some(String::from(&line[11..])); + } else if line.starts_with("const-arm = ") { + const_arm = Some(String::from(&line[12..])); } else if line.starts_with("target = ") { target = match Some(String::from(&line[9..])) { Some(input) => match input.as_str() { @@ -1921,6 +2207,8 @@ mod test { &link_arm, ¤t_aarch64, &link_aarch64, + &const_arm, + &const_aarch64, &constn, &in_t, &out_t, @@ -1940,6 +2228,7 @@ mod test { ¤t_name, ¤t_aarch64, &link_aarch64, + &const_aarch64, &constn, &in_t, &out_t,