From 7cc2822ce8acaa08748a121d473b964e55c44f18 Mon Sep 17 00:00:00 2001 From: usamoi Date: Sun, 26 Jan 2025 20:56:33 +0800 Subject: [PATCH] mark AVX intrinsics as safe Mark all AVX SIMD-computing intrinsics as safe, except for those involving memory operations and register operations. `AVX+SHA512`, `AVX+SM3` and `AVX+SM4` intrinsics are missing. --- .../stdarch/crates/core_arch/src/x86/avx.rs | 948 +++++++++--------- .../crates/core_arch/src/x86_64/avx.rs | 8 +- 2 files changed, 503 insertions(+), 453 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs index f78ca6d83913..5a8ed54fae1e 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -30,8 +30,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { - simd_add(a, b) +pub fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_add(a, b) } } /// Adds packed single-precision (32-bit) floating-point elements in `a` and @@ -42,8 +42,8 @@ pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { - simd_add(a, b) +pub fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_add(a, b) } } /// Computes the bitwise AND of a packed double-precision (64-bit) @@ -55,10 +55,12 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { // See https://github.com/rust-lang/stdarch/issues/71 #[cfg_attr(test, assert_instr(vandp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { - let a: u64x4 = transmute(a); - let b: u64x4 = transmute(b); - transmute(simd_and(a, b)) +pub fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_and(a, b)) + } } /// Computes the bitwise AND of packed single-precision (32-bit) floating-point @@ -69,10 +71,12 @@ pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { - let a: u32x8 = transmute(a); - let b: u32x8 = transmute(b); - transmute(simd_and(a, b)) +pub fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_and(a, b)) + } } /// Computes the bitwise OR packed double-precision (64-bit) floating-point @@ -84,10 +88,12 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { // See . #[cfg_attr(test, assert_instr(vorp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { - let a: u64x4 = transmute(a); - let b: u64x4 = transmute(b); - transmute(simd_or(a, b)) +pub fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_or(a, b)) + } } /// Computes the bitwise OR packed single-precision (32-bit) floating-point @@ -98,10 +104,12 @@ pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { - let a: u32x8 = transmute(a); - let b: u32x8 = transmute(b); - transmute(simd_or(a, b)) +pub fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_or(a, b)) + } } /// Shuffles double-precision (64-bit) floating-point elements within 128-bit @@ -113,18 +121,20 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m256d { +pub fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - b, - [ - MASK as u32 & 0b1, - ((MASK as u32 >> 1) & 0b1) + 4, - ((MASK as u32 >> 2) & 0b1) + 2, - ((MASK as u32 >> 3) & 0b1) + 6, - ], - ) + unsafe { + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b1, + ((MASK as u32 >> 1) & 0b1) + 4, + ((MASK as u32 >> 2) & 0b1) + 2, + ((MASK as u32 >> 3) & 0b1) + 6, + ], + ) + } } /// Shuffles single-precision (32-bit) floating-point elements in `a` within @@ -136,22 +146,24 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m2 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - b, - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - ((MASK as u32 >> 4) & 0b11) + 8, - ((MASK as u32 >> 6) & 0b11) + 8, - (MASK as u32 & 0b11) + 4, - ((MASK as u32 >> 2) & 0b11) + 4, - ((MASK as u32 >> 4) & 0b11) + 12, - ((MASK as u32 >> 6) & 0b11) + 12, - ], - ) + unsafe { + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ) + } } /// Computes the bitwise NOT of packed double-precision (64-bit) floating-point @@ -162,10 +174,12 @@ pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandnp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { - let a: u64x4 = transmute(a); - let b: u64x4 = transmute(b); - transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b)) +pub fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b)) + } } /// Computes the bitwise NOT of packed single-precision (32-bit) floating-point @@ -177,10 +191,12 @@ pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandnps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { - let a: u32x8 = transmute(a); - let b: u32x8 = transmute(b); - transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b)) +pub fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b)) + } } /// Compares packed double-precision (64-bit) floating-point elements @@ -191,8 +207,8 @@ pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { - vmaxpd(a, b) +pub fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vmaxpd(a, b) } } /// Compares packed single-precision (32-bit) floating-point elements in `a` @@ -203,8 +219,8 @@ pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { - vmaxps(a, b) +pub fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vmaxps(a, b) } } /// Compares packed double-precision (64-bit) floating-point elements @@ -215,8 +231,8 @@ pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { - vminpd(a, b) +pub fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vminpd(a, b) } } /// Compares packed single-precision (32-bit) floating-point elements in `a` @@ -227,8 +243,8 @@ pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { - vminps(a, b) +pub fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vminps(a, b) } } /// Multiplies packed double-precision (64-bit) floating-point elements @@ -239,8 +255,8 @@ pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { - simd_mul(a, b) +pub fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_mul(a, b) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` and @@ -251,8 +267,8 @@ pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { - simd_mul(a, b) +pub fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_mul(a, b) } } /// Alternatively adds and subtracts packed double-precision (64-bit) @@ -263,12 +279,14 @@ pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { - let a = a.as_f64x4(); - let b = b.as_f64x4(); - let add = simd_add(a, b); - let sub = simd_sub(a, b); - simd_shuffle!(add, sub, [4, 1, 6, 3]) +pub fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } } /// Alternatively adds and subtracts packed single-precision (32-bit) @@ -279,12 +297,14 @@ pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { - let a = a.as_f32x8(); - let b = b.as_f32x8(); - let add = simd_add(a, b); - let sub = simd_sub(a, b); - simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) +pub fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) + } } /// Subtracts packed double-precision (64-bit) floating-point elements in `b` @@ -295,8 +315,8 @@ pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { - simd_sub(a, b) +pub fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_sub(a, b) } } /// Subtracts packed single-precision (32-bit) floating-point elements in `b` @@ -307,8 +327,8 @@ pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { - simd_sub(a, b) +pub fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_sub(a, b) } } /// Computes the division of each of the 8 packed 32-bit floating-point elements @@ -319,8 +339,8 @@ pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { - simd_div(a, b) +pub fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_div(a, b) } } /// Computes the division of each of the 4 packed 64-bit floating-point elements @@ -331,8 +351,8 @@ pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { - simd_div(a, b) +pub fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_div(a, b) } } /// Rounds packed double-precision (64-bit) floating point elements in `a` @@ -353,9 +373,9 @@ pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_round_pd(a: __m256d) -> __m256d { +pub fn _mm256_round_pd(a: __m256d) -> __m256d { static_assert_uimm_bits!(ROUNDING, 4); - roundpd256(a, ROUNDING) + unsafe { roundpd256(a, ROUNDING) } } /// Rounds packed double-precision (64-bit) floating point elements in `a` @@ -366,8 +386,8 @@ pub unsafe fn _mm256_round_pd(a: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d { - simd_ceil(a) +pub fn _mm256_ceil_pd(a: __m256d) -> __m256d { + unsafe { simd_ceil(a) } } /// Rounds packed double-precision (64-bit) floating point elements in `a` @@ -378,8 +398,8 @@ pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { - simd_floor(a) +pub fn _mm256_floor_pd(a: __m256d) -> __m256d { + unsafe { simd_floor(a) } } /// Rounds packed single-precision (32-bit) floating point elements in `a` @@ -400,9 +420,9 @@ pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_round_ps(a: __m256) -> __m256 { +pub fn _mm256_round_ps(a: __m256) -> __m256 { static_assert_uimm_bits!(ROUNDING, 4); - roundps256(a, ROUNDING) + unsafe { roundps256(a, ROUNDING) } } /// Rounds packed single-precision (32-bit) floating point elements in `a` @@ -413,8 +433,8 @@ pub unsafe fn _mm256_round_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 { - simd_ceil(a) +pub fn _mm256_ceil_ps(a: __m256) -> __m256 { + unsafe { simd_ceil(a) } } /// Rounds packed single-precision (32-bit) floating point elements in `a` @@ -425,8 +445,8 @@ pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 { - simd_floor(a) +pub fn _mm256_floor_ps(a: __m256) -> __m256 { + unsafe { simd_floor(a) } } /// Returns the square root of packed single-precision (32-bit) floating point @@ -437,8 +457,8 @@ pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 { - simd_fsqrt(a) +pub fn _mm256_sqrt_ps(a: __m256) -> __m256 { + unsafe { simd_fsqrt(a) } } /// Returns the square root of packed double-precision (64-bit) floating point @@ -449,8 +469,8 @@ pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { - simd_fsqrt(a) +pub fn _mm256_sqrt_pd(a: __m256d) -> __m256d { + unsafe { simd_fsqrt(a) } } /// Blends packed double-precision (64-bit) floating-point elements from @@ -465,18 +485,20 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256d { +pub fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(IMM4, 4); - simd_shuffle!( - a, - b, - [ - ((IMM4 as u32 >> 0) & 1) * 4 + 0, - ((IMM4 as u32 >> 1) & 1) * 4 + 1, - ((IMM4 as u32 >> 2) & 1) * 4 + 2, - ((IMM4 as u32 >> 3) & 1) * 4 + 3, - ], - ) + unsafe { + simd_shuffle!( + a, + b, + [ + ((IMM4 as u32 >> 0) & 1) * 4 + 0, + ((IMM4 as u32 >> 1) & 1) * 4 + 1, + ((IMM4 as u32 >> 2) & 1) * 4 + 2, + ((IMM4 as u32 >> 3) & 1) * 4 + 3, + ], + ) + } } /// Blends packed single-precision (32-bit) floating-point elements from @@ -488,22 +510,24 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256 #[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - simd_shuffle!( - a, - b, - [ - ((IMM8 as u32 >> 0) & 1) * 8 + 0, - ((IMM8 as u32 >> 1) & 1) * 8 + 1, - ((IMM8 as u32 >> 2) & 1) * 8 + 2, - ((IMM8 as u32 >> 3) & 1) * 8 + 3, - ((IMM8 as u32 >> 4) & 1) * 8 + 4, - ((IMM8 as u32 >> 5) & 1) * 8 + 5, - ((IMM8 as u32 >> 6) & 1) * 8 + 6, - ((IMM8 as u32 >> 7) & 1) * 8 + 7, - ], - ) + unsafe { + simd_shuffle!( + a, + b, + [ + ((IMM8 as u32 >> 0) & 1) * 8 + 0, + ((IMM8 as u32 >> 1) & 1) * 8 + 1, + ((IMM8 as u32 >> 2) & 1) * 8 + 2, + ((IMM8 as u32 >> 3) & 1) * 8 + 3, + ((IMM8 as u32 >> 4) & 1) * 8 + 4, + ((IMM8 as u32 >> 5) & 1) * 8 + 5, + ((IMM8 as u32 >> 6) & 1) * 8 + 6, + ((IMM8 as u32 >> 7) & 1) * 8 + 7, + ], + ) + } } /// Blends packed double-precision (64-bit) floating-point elements from @@ -514,9 +538,11 @@ pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO); - transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4())) +pub fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { + let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO); + transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4())) + } } /// Blends packed single-precision (32-bit) floating-point elements from @@ -527,9 +553,11 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO); - transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8())) +pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { + let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO); + transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8())) + } } /// Conditionally multiplies the packed single-precision (32-bit) floating-point @@ -543,9 +571,9 @@ pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - vdpps(a, b, IMM8) + unsafe { vdpps(a, b, IMM8) } } /// Horizontal addition of adjacent pairs in the two packed vectors @@ -558,8 +586,8 @@ pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { - vhaddpd(a, b) +pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vhaddpd(a, b) } } /// Horizontal addition of adjacent pairs in the two packed vectors @@ -573,8 +601,8 @@ pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { - vhaddps(a, b) +pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vhaddps(a, b) } } /// Horizontal subtraction of adjacent pairs in the two packed vectors @@ -587,8 +615,8 @@ pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { - vhsubpd(a, b) +pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vhsubpd(a, b) } } /// Horizontal subtraction of adjacent pairs in the two packed vectors @@ -602,8 +630,8 @@ pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { - vhsubps(a, b) +pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vhsubps(a, b) } } /// Computes the bitwise XOR of packed double-precision (64-bit) floating-point @@ -614,10 +642,12 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { - let a: u64x4 = transmute(a); - let b: u64x4 = transmute(b); - transmute(simd_xor(a, b)) +pub fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_xor(a, b)) + } } /// Computes the bitwise XOR of packed single-precision (32-bit) floating-point @@ -628,10 +658,12 @@ pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { - let a: u32x8 = transmute(a); - let b: u32x8 = transmute(b); - transmute(simd_xor(a, b)) +pub fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_xor(a, b)) + } } /// Equal (ordered, non-signaling) @@ -741,9 +773,9 @@ pub const _CMP_TRUE_US: i32 = 0x1f; #[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM5, 5); - vcmppd(a, b, const { IMM5 as i8 }) + unsafe { vcmppd(a, b, const { IMM5 as i8 }) } } /// Compares packed double-precision (64-bit) floating-point @@ -756,9 +788,9 @@ pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d { +pub fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(IMM5, 5); - vcmppd256(a, b, IMM5 as u8) + unsafe { vcmppd256(a, b, IMM5 as u8) } } /// Compares packed single-precision (32-bit) floating-point @@ -771,9 +803,9 @@ pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d #[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM5, 5); - vcmpps(a, b, const { IMM5 as i8 }) + unsafe { vcmpps(a, b, const { IMM5 as i8 }) } } /// Compares packed single-precision (32-bit) floating-point @@ -786,9 +818,9 @@ pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM5, 5); - vcmpps256(a, b, const { IMM5 as u8 }) + unsafe { vcmpps256(a, b, const { IMM5 as u8 }) } } /// Compares the lower double-precision (64-bit) floating-point element in @@ -803,9 +835,9 @@ pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] // TODO Validate vcmpsd #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM5, 5); - vcmpsd(a, b, IMM5 as i8) + unsafe { vcmpsd(a, b, IMM5 as i8) } } /// Compares the lower single-precision (32-bit) floating-point element in @@ -820,9 +852,9 @@ pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] // TODO Validate vcmpss #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { +pub fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM5, 5); - vcmpss(a, b, IMM5 as i8) + unsafe { vcmpss(a, b, IMM5 as i8) } } /// Converts packed 32-bit integers in `a` to packed double-precision (64-bit) @@ -833,8 +865,8 @@ pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { - simd_cast(a.as_i32x4()) +pub fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { + unsafe { simd_cast(a.as_i32x4()) } } /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) @@ -845,8 +877,8 @@ pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { - simd_cast(a.as_i32x8()) +pub fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { + unsafe { simd_cast(a.as_i32x8()) } } /// Converts packed double-precision (64-bit) floating-point elements in `a` @@ -857,8 +889,8 @@ pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { - simd_cast(a) +pub fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { + unsafe { simd_cast(a) } } /// Converts packed single-precision (32-bit) floating-point elements in `a` @@ -869,8 +901,8 @@ pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i { - transmute(vcvtps2dq(a)) +pub fn _mm256_cvtps_epi32(a: __m256) -> __m256i { + unsafe { transmute(vcvtps2dq(a)) } } /// Converts packed single-precision (32-bit) floating-point elements in `a` @@ -881,8 +913,8 @@ pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d { - simd_cast(a) +pub fn _mm256_cvtps_pd(a: __m128) -> __m256d { + unsafe { simd_cast(a) } } /// Returns the first element of the input vector of `[4 x double]`. @@ -892,8 +924,8 @@ pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d { #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(movsd))] FIXME #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 { - simd_extract!(a, 0) +pub fn _mm256_cvtsd_f64(a: __m256d) -> f64 { + unsafe { simd_extract!(a, 0) } } /// Converts packed double-precision (64-bit) floating-point elements in `a` @@ -904,8 +936,8 @@ pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { - transmute(vcvttpd2dq(a)) +pub fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2dq(a)) } } /// Converts packed double-precision (64-bit) floating-point elements in `a` @@ -916,8 +948,8 @@ pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { - transmute(vcvtpd2dq(a)) +pub fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { + unsafe { transmute(vcvtpd2dq(a)) } } /// Converts packed single-precision (32-bit) floating-point elements in `a` @@ -928,8 +960,8 @@ pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { - transmute(vcvttps2dq(a)) +pub fn _mm256_cvttps_epi32(a: __m256) -> __m256i { + unsafe { transmute(vcvttps2dq(a)) } } /// Extracts 128 bits (composed of 4 packed single-precision (32-bit) @@ -944,13 +976,15 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { )] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extractf128_ps(a: __m256) -> __m128 { +pub fn _mm256_extractf128_ps(a: __m256) -> __m128 { static_assert_uimm_bits!(IMM1, 1); - simd_shuffle!( - a, - _mm256_undefined_ps(), - [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize], - ) + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ps(), + [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize], + ) + } } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) @@ -965,9 +999,9 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256) -> __m128 { )] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { +pub fn _mm256_extractf128_pd(a: __m256d) -> __m128d { static_assert_uimm_bits!(IMM1, 1); - simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) + unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) } } /// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`. @@ -981,10 +1015,12 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { )] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extractf128_si256(a: __m256i) -> __m128i { +pub fn _mm256_extractf128_si256(a: __m256i) -> __m128i { static_assert_uimm_bits!(IMM1, 1); - let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],); - transmute(dst) + unsafe { + let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],); + transmute(dst) + } } /// Extracts a 32-bit integer from `a`, selected with `INDEX`. @@ -995,9 +1031,9 @@ pub unsafe fn _mm256_extractf128_si256(a: __m256i) -> __m128i { // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { +pub fn _mm256_extract_epi32(a: __m256i) -> i32 { static_assert_uimm_bits!(INDEX, 3); - simd_extract!(a.as_i32x8(), INDEX as u32) + unsafe { simd_extract!(a.as_i32x8(), INDEX as u32) } } /// Returns the first element of the input vector of `[8 x i32]`. @@ -1006,8 +1042,8 @@ pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { #[inline] #[target_feature(enable = "avx")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { - simd_extract!(a.as_i32x8(), 0) +pub fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { + unsafe { simd_extract!(a.as_i32x8(), 0) } } /// Zeroes the contents of all XMM or YMM registers. @@ -1041,8 +1077,8 @@ pub unsafe fn _mm256_zeroupper() { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { - vpermilps256(a, b.as_i32x8()) +pub fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { + unsafe { vpermilps256(a, b.as_i32x8()) } } /// Shuffles single-precision (32-bit) floating-point elements in `a` @@ -1053,8 +1089,8 @@ pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { - vpermilps(a, b.as_i32x4()) +pub fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { + unsafe { vpermilps(a, b.as_i32x4()) } } /// Shuffles single-precision (32-bit) floating-point elements in `a` @@ -1066,22 +1102,24 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { #[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute_ps(a: __m256) -> __m256 { +pub fn _mm256_permute_ps(a: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - simd_shuffle!( - a, - _mm256_undefined_ps(), - [ - (IMM8 as u32 >> 0) & 0b11, - (IMM8 as u32 >> 2) & 0b11, - (IMM8 as u32 >> 4) & 0b11, - (IMM8 as u32 >> 6) & 0b11, - ((IMM8 as u32 >> 0) & 0b11) + 4, - ((IMM8 as u32 >> 2) & 0b11) + 4, - ((IMM8 as u32 >> 4) & 0b11) + 4, - ((IMM8 as u32 >> 6) & 0b11) + 4, - ], - ) + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ps(), + [ + (IMM8 as u32 >> 0) & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ((IMM8 as u32 >> 0) & 0b11) + 4, + ((IMM8 as u32 >> 2) & 0b11) + 4, + ((IMM8 as u32 >> 4) & 0b11) + 4, + ((IMM8 as u32 >> 6) & 0b11) + 4, + ], + ) + } } /// Shuffles single-precision (32-bit) floating-point elements in `a` @@ -1093,18 +1131,20 @@ pub unsafe fn _mm256_permute_ps(a: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_permute_ps(a: __m128) -> __m128 { +pub fn _mm_permute_ps(a: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); - simd_shuffle!( - a, - _mm_undefined_ps(), - [ - (IMM8 as u32 >> 0) & 0b11, - (IMM8 as u32 >> 2) & 0b11, - (IMM8 as u32 >> 4) & 0b11, - (IMM8 as u32 >> 6) & 0b11, - ], - ) + unsafe { + simd_shuffle!( + a, + _mm_undefined_ps(), + [ + (IMM8 as u32 >> 0) & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ], + ) + } } /// Shuffles double-precision (64-bit) floating-point elements in `a` @@ -1115,8 +1155,8 @@ pub unsafe fn _mm_permute_ps(a: __m128) -> __m128 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { - vpermilpd256(a, b.as_i64x4()) +pub fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { + unsafe { vpermilpd256(a, b.as_i64x4()) } } /// Shuffles double-precision (64-bit) floating-point elements in `a` @@ -1127,8 +1167,8 @@ pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { - vpermilpd(a, b.as_i64x2()) +pub fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { + unsafe { vpermilpd(a, b.as_i64x2()) } } /// Shuffles double-precision (64-bit) floating-point elements in `a` @@ -1140,18 +1180,20 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { #[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute_pd(a: __m256d) -> __m256d { +pub fn _mm256_permute_pd(a: __m256d) -> __m256d { static_assert_uimm_bits!(IMM4, 4); - simd_shuffle!( - a, - _mm256_undefined_pd(), - [ - ((IMM4 as u32 >> 0) & 1), - ((IMM4 as u32 >> 1) & 1), - ((IMM4 as u32 >> 2) & 1) + 2, - ((IMM4 as u32 >> 3) & 1) + 2, - ], - ) + unsafe { + simd_shuffle!( + a, + _mm256_undefined_pd(), + [ + ((IMM4 as u32 >> 0) & 1), + ((IMM4 as u32 >> 1) & 1), + ((IMM4 as u32 >> 2) & 1) + 2, + ((IMM4 as u32 >> 3) & 1) + 2, + ], + ) + } } /// Shuffles double-precision (64-bit) floating-point elements in `a` @@ -1163,13 +1205,15 @@ pub unsafe fn _mm256_permute_pd(a: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_permute_pd(a: __m128d) -> __m128d { +pub fn _mm_permute_pd(a: __m128d) -> __m128d { static_assert_uimm_bits!(IMM2, 2); - simd_shuffle!( - a, - _mm_undefined_pd(), - [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1], - ) + unsafe { + simd_shuffle!( + a, + _mm_undefined_pd(), + [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1], + ) + } } /// Shuffles 256 bits (composed of 8 packed single-precision (32-bit) @@ -1181,9 +1225,9 @@ pub unsafe fn _mm_permute_pd(a: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - vperm2f128ps256(a, b, IMM8 as i8) + unsafe { vperm2f128ps256(a, b, IMM8 as i8) } } /// Shuffles 256 bits (composed of 4 packed double-precision (64-bit) @@ -1195,9 +1239,9 @@ pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> _ #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> __m256d { +pub fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 8); - vperm2f128pd256(a, b, IMM8 as i8) + unsafe { vperm2f128pd256(a, b, IMM8 as i8) } } /// Shuffles 128-bits (composed of integer data) selected by `imm8` @@ -1209,9 +1253,9 @@ pub unsafe fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute2f128_si256(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_permute2f128_si256(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8)) + unsafe { transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8)) } } /// Broadcasts a single-precision (32-bit) floating-point element from memory @@ -1290,13 +1334,15 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { )] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m256 { +pub fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m256 { static_assert_uimm_bits!(IMM1, 1); - simd_shuffle!( - a, - _mm256_castps128_ps256(b), - [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize], - ) + unsafe { + simd_shuffle!( + a, + _mm256_castps128_ps256(b), + [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize], + ) + } } /// Copies `a` to result, then inserts 128 bits (composed of 2 packed @@ -1312,13 +1358,15 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m )] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> __m256d { +pub fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> __m256d { static_assert_uimm_bits!(IMM1, 1); - simd_shuffle!( - a, - _mm256_castpd128_pd256(b), - [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], - ) + unsafe { + simd_shuffle!( + a, + _mm256_castpd128_pd256(b), + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + ) + } } /// Copies `a` to result, then inserts 128 bits from `b` into result @@ -1333,14 +1381,16 @@ pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> _ )] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i) -> __m256i { +pub fn _mm256_insertf128_si256(a: __m256i, b: __m128i) -> __m256i { static_assert_uimm_bits!(IMM1, 1); - let dst: i64x4 = simd_shuffle!( - a.as_i64x4(), - _mm256_castsi128_si256(b).as_i64x4(), - [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], - ); - transmute(dst) + unsafe { + let dst: i64x4 = simd_shuffle!( + a.as_i64x4(), + _mm256_castsi128_si256(b).as_i64x4(), + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + ); + transmute(dst) + } } /// Copies `a` to result, and inserts the 8-bit integer `i` into result @@ -1352,9 +1402,9 @@ pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i) - // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i { +pub fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i { static_assert_uimm_bits!(INDEX, 5); - transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) + unsafe { transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) } } /// Copies `a` to result, and inserts the 16-bit integer `i` into result @@ -1366,9 +1416,9 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m256i { +pub fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m256i { static_assert_uimm_bits!(INDEX, 4); - transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) + unsafe { transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) } } /// Copies `a` to result, and inserts the 32-bit integer `i` into result @@ -1380,9 +1430,9 @@ pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m25 // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32) -> __m256i { +pub fn _mm256_insert_epi32(a: __m256i, i: i32) -> __m256i { static_assert_uimm_bits!(INDEX, 3); - transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) + unsafe { transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) } } /// Loads 256-bits (composed of 4 packed double-precision (64-bit) @@ -1673,8 +1723,8 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovshdup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { - simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) +pub fn _mm256_movehdup_ps(a: __m256) -> __m256 { + unsafe { simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements @@ -1685,8 +1735,8 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovsldup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { - simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) +pub fn _mm256_moveldup_ps(a: __m256) -> __m256 { + unsafe { simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) } } /// Duplicate even-indexed double-precision (64-bit) floating-point elements @@ -1697,8 +1747,8 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d { - simd_shuffle!(a, a, [0, 0, 2, 2]) +pub fn _mm256_movedup_pd(a: __m256d) -> __m256d { + unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) } } /// Loads 256-bits of integer data from unaligned memory into result. @@ -1807,8 +1857,8 @@ pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrcpps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 { - vrcpps(a) +pub fn _mm256_rcp_ps(a: __m256) -> __m256 { + unsafe { vrcpps(a) } } /// Computes the approximate reciprocal square root of packed single-precision @@ -1820,8 +1870,8 @@ pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrsqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 { - vrsqrtps(a) +pub fn _mm256_rsqrt_ps(a: __m256) -> __m256 { + unsafe { vrsqrtps(a) } } /// Unpacks and interleave double-precision (64-bit) floating-point elements @@ -1832,8 +1882,8 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { - simd_shuffle!(a, b, [1, 5, 3, 7]) +pub fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } /// Unpacks and interleave single-precision (32-bit) floating-point elements @@ -1844,8 +1894,8 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { - simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) +pub fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) } } /// Unpacks and interleave double-precision (64-bit) floating-point elements @@ -1856,8 +1906,8 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { - simd_shuffle!(a, b, [0, 4, 2, 6]) +pub fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } /// Unpacks and interleave single-precision (32-bit) floating-point elements @@ -1868,8 +1918,8 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { - simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) +pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) } } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and @@ -1882,8 +1932,8 @@ pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { - ptestz256(a.as_i64x4(), b.as_i64x4()) +pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { + unsafe { ptestz256(a.as_i64x4(), b.as_i64x4()) } } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and @@ -1896,8 +1946,8 @@ pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { - ptestc256(a.as_i64x4(), b.as_i64x4()) +pub fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { + unsafe { ptestc256(a.as_i64x4(), b.as_i64x4()) } } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and @@ -1911,8 +1961,8 @@ pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { - ptestnzc256(a.as_i64x4(), b.as_i64x4()) +pub fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { + unsafe { ptestnzc256(a.as_i64x4(), b.as_i64x4()) } } /// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) @@ -1928,8 +1978,8 @@ pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { - vtestzpd256(a, b) +pub fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { + unsafe { vtestzpd256(a, b) } } /// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) @@ -1945,8 +1995,8 @@ pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { - vtestcpd256(a, b) +pub fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { + unsafe { vtestcpd256(a, b) } } /// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) @@ -1963,8 +2013,8 @@ pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { - vtestnzcpd256(a, b) +pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { + unsafe { vtestnzcpd256(a, b) } } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) @@ -1980,8 +2030,8 @@ pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { - vtestzpd(a, b) +pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { + unsafe { vtestzpd(a, b) } } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) @@ -1997,8 +2047,8 @@ pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { - vtestcpd(a, b) +pub fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { + unsafe { vtestcpd(a, b) } } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) @@ -2015,8 +2065,8 @@ pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { - vtestnzcpd(a, b) +pub fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { + unsafe { vtestnzcpd(a, b) } } /// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) @@ -2032,8 +2082,8 @@ pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { - vtestzps256(a, b) +pub fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { + unsafe { vtestzps256(a, b) } } /// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) @@ -2049,8 +2099,8 @@ pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { - vtestcps256(a, b) +pub fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { + unsafe { vtestcps256(a, b) } } /// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) @@ -2067,8 +2117,8 @@ pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { - vtestnzcps256(a, b) +pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { + unsafe { vtestnzcps256(a, b) } } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) @@ -2084,8 +2134,8 @@ pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { - vtestzps(a, b) +pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { + unsafe { vtestzps(a, b) } } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) @@ -2101,8 +2151,8 @@ pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { - vtestcps(a, b) +pub fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { + unsafe { vtestcps(a, b) } } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) @@ -2119,8 +2169,8 @@ pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { - vtestnzcps(a, b) +pub fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { + unsafe { vtestnzcps(a, b) } } /// Sets each bit of the returned mask based on the most significant bit of the @@ -2132,11 +2182,13 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { +pub fn _mm256_movemask_pd(a: __m256d) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO); - simd_bitmask::(mask).into() + unsafe { + let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO); + simd_bitmask::(mask).into() + } } /// Sets each bit of the returned mask based on the most significant bit of the @@ -2148,11 +2200,13 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { +pub fn _mm256_movemask_ps(a: __m256) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO); - simd_bitmask::(mask).into() + unsafe { + let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO); + simd_bitmask::(mask).into() + } } /// Returns vector of type __m256d with all elements set to zero. @@ -2162,8 +2216,8 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setzero_pd() -> __m256d { - const { mem::zeroed() } +pub fn _mm256_setzero_pd() -> __m256d { + const { unsafe { mem::zeroed() } } } /// Returns vector of type __m256 with all elements set to zero. @@ -2173,8 +2227,8 @@ pub unsafe fn _mm256_setzero_pd() -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setzero_ps() -> __m256 { - const { mem::zeroed() } +pub fn _mm256_setzero_ps() -> __m256 { + const { unsafe { mem::zeroed() } } } /// Returns vector of type __m256i with all elements set to zero. @@ -2184,8 +2238,8 @@ pub unsafe fn _mm256_setzero_ps() -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxor))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setzero_si256() -> __m256i { - const { mem::zeroed() } +pub fn _mm256_setzero_si256() -> __m256i { + const { unsafe { mem::zeroed() } } } /// Sets packed double-precision (64-bit) floating-point elements in returned @@ -2197,7 +2251,7 @@ pub unsafe fn _mm256_setzero_si256() -> __m256i { // This intrinsic has no corresponding instruction. #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { +pub fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { _mm256_setr_pd(d, c, b, a) } @@ -2209,16 +2263,7 @@ pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_ps( - a: f32, - b: f32, - c: f32, - d: f32, - e: f32, - f: f32, - g: f32, - h: f32, -) -> __m256 { +pub fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 { _mm256_setr_ps(h, g, f, e, d, c, b, a) } @@ -2229,7 +2274,7 @@ pub unsafe fn _mm256_set_ps( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_epi8( +pub fn _mm256_set_epi8( e00: i8, e01: i8, e02: i8, @@ -2279,7 +2324,7 @@ pub unsafe fn _mm256_set_epi8( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_epi16( +pub fn _mm256_set_epi16( e00: i16, e01: i16, e02: i16, @@ -2313,7 +2358,7 @@ pub unsafe fn _mm256_set_epi16( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_epi32( +pub fn _mm256_set_epi32( e0: i32, e1: i32, e2: i32, @@ -2333,7 +2378,7 @@ pub unsafe fn _mm256_set_epi32( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { +pub fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { _mm256_setr_epi64x(d, c, b, a) } @@ -2345,7 +2390,7 @@ pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { +pub fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { __m256d([a, b, c, d]) } @@ -2357,16 +2402,7 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_ps( - a: f32, - b: f32, - c: f32, - d: f32, - e: f32, - f: f32, - g: f32, - h: f32, -) -> __m256 { +pub fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 { __m256([a, b, c, d, e, f, g, h]) } @@ -2378,7 +2414,7 @@ pub unsafe fn _mm256_setr_ps( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_epi8( +pub fn _mm256_setr_epi8( e00: i8, e01: i8, e02: i8, @@ -2412,13 +2448,15 @@ pub unsafe fn _mm256_setr_epi8( e30: i8, e31: i8, ) -> __m256i { - #[rustfmt::skip] - transmute(i8x32::new( - e00, e01, e02, e03, e04, e05, e06, e07, - e08, e09, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31, - )) + unsafe { + #[rustfmt::skip] + transmute(i8x32::new( + e00, e01, e02, e03, e04, e05, e06, e07, + e08, e09, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31, + )) + } } /// Sets packed 16-bit integers in returned vector with the supplied values in @@ -2429,7 +2467,7 @@ pub unsafe fn _mm256_setr_epi8( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_epi16( +pub fn _mm256_setr_epi16( e00: i16, e01: i16, e02: i16, @@ -2447,13 +2485,15 @@ pub unsafe fn _mm256_setr_epi16( e14: i16, e15: i16, ) -> __m256i { - #[rustfmt::skip] - transmute(i16x16::new( - e00, e01, e02, e03, - e04, e05, e06, e07, - e08, e09, e10, e11, - e12, e13, e14, e15, - )) + unsafe { + #[rustfmt::skip] + transmute(i16x16::new( + e00, e01, e02, e03, + e04, e05, e06, e07, + e08, e09, e10, e11, + e12, e13, e14, e15, + )) + } } /// Sets packed 32-bit integers in returned vector with the supplied values in @@ -2464,7 +2504,7 @@ pub unsafe fn _mm256_setr_epi16( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_epi32( +pub fn _mm256_setr_epi32( e0: i32, e1: i32, e2: i32, @@ -2474,7 +2514,7 @@ pub unsafe fn _mm256_setr_epi32( e6: i32, e7: i32, ) -> __m256i { - transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) + unsafe { transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } } /// Sets packed 64-bit integers in returned vector with the supplied values in @@ -2485,8 +2525,8 @@ pub unsafe fn _mm256_setr_epi32( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { - transmute(i64x4::new(a, b, c, d)) +pub fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { + unsafe { transmute(i64x4::new(a, b, c, d)) } } /// Broadcasts double-precision (64-bit) floating-point value `a` to all @@ -2497,7 +2537,7 @@ pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d { +pub fn _mm256_set1_pd(a: f64) -> __m256d { _mm256_setr_pd(a, a, a, a) } @@ -2509,7 +2549,7 @@ pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 { +pub fn _mm256_set1_ps(a: f32) -> __m256 { _mm256_setr_ps(a, a, a, a, a, a, a, a) } @@ -2521,7 +2561,7 @@ pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { +pub fn _mm256_set1_epi8(a: i8) -> __m256i { #[rustfmt::skip] _mm256_setr_epi8( a, a, a, a, a, a, a, a, @@ -2541,7 +2581,7 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i { +pub fn _mm256_set1_epi16(a: i16) -> __m256i { _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } @@ -2553,7 +2593,7 @@ pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i { +pub fn _mm256_set1_epi32(a: i32) -> __m256i { _mm256_setr_epi32(a, a, a, a, a, a, a, a) } @@ -2567,7 +2607,7 @@ pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i { #[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i { +pub fn _mm256_set1_epi64x(a: i64) -> __m256i { _mm256_setr_epi64x(a, a, a, a) } @@ -2579,8 +2619,8 @@ pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 { - transmute(a) +pub fn _mm256_castpd_ps(a: __m256d) -> __m256 { + unsafe { transmute(a) } } /// Cast vector of type __m256 to type __m256d. @@ -2591,8 +2631,8 @@ pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d { - transmute(a) +pub fn _mm256_castps_pd(a: __m256) -> __m256d { + unsafe { transmute(a) } } /// Casts vector of type __m256 to type __m256i. @@ -2603,8 +2643,8 @@ pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i { - transmute(a) +pub fn _mm256_castps_si256(a: __m256) -> __m256i { + unsafe { transmute(a) } } /// Casts vector of type __m256i to type __m256. @@ -2615,8 +2655,8 @@ pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 { - transmute(a) +pub fn _mm256_castsi256_ps(a: __m256i) -> __m256 { + unsafe { transmute(a) } } /// Casts vector of type __m256d to type __m256i. @@ -2627,8 +2667,8 @@ pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i { - transmute(a) +pub fn _mm256_castpd_si256(a: __m256d) -> __m256i { + unsafe { transmute(a) } } /// Casts vector of type __m256i to type __m256d. @@ -2639,8 +2679,8 @@ pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d { - transmute(a) +pub fn _mm256_castsi256_pd(a: __m256i) -> __m256d { + unsafe { transmute(a) } } /// Casts vector of type __m256 to type __m128. @@ -2651,8 +2691,8 @@ pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 { - simd_shuffle!(a, a, [0, 1, 2, 3]) +pub fn _mm256_castps256_ps128(a: __m256) -> __m128 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } /// Casts vector of type __m256d to type __m128d. @@ -2663,8 +2703,8 @@ pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { - simd_shuffle!(a, a, [0, 1]) +pub fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { + unsafe { simd_shuffle!(a, a, [0, 1]) } } /// Casts vector of type __m256i to type __m128i. @@ -2675,10 +2715,12 @@ pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { - let a = a.as_i64x4(); - let dst: i64x2 = simd_shuffle!(a, a, [0, 1]); - transmute(dst) +pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i { + unsafe { + let a = a.as_i64x4(); + let dst: i64x2 = simd_shuffle!(a, a, [0, 1]); + transmute(dst) + } } /// Casts vector of type __m128 to type __m256; @@ -2690,8 +2732,8 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { - simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) +pub fn _mm256_castps128_ps256(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) } } /// Casts vector of type __m128d to type __m256d; @@ -2703,8 +2745,8 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { - simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) +pub fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { + unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) } } /// Casts vector of type __m128i to type __m256i; @@ -2716,11 +2758,13 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { - let a = a.as_i64x2(); - let undefined = i64x2::ZERO; - let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]); - transmute(dst) +pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i64x2(); + let undefined = i64x2::ZERO; + let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]); + transmute(dst) + } } /// Constructs a 256-bit floating-point vector of `[8 x float]` from a @@ -2733,8 +2777,8 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { - simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) +pub fn _mm256_zextps128_ps256(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) } } /// Constructs a 256-bit integer vector from a 128-bit integer vector. @@ -2747,10 +2791,12 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { - let b = i64x2::ZERO; - let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]); - transmute(dst) +pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { + unsafe { + let b = i64x2::ZERO; + let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]); + transmute(dst) + } } /// Constructs a 256-bit floating-point vector of `[4 x double]` from a @@ -2764,8 +2810,8 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { - simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) +pub fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { + unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) } } /// Returns vector of type `__m256` with indeterminate elements. @@ -2777,8 +2823,8 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_undefined_ps() -> __m256 { - const { mem::zeroed() } +pub fn _mm256_undefined_ps() -> __m256 { + const { unsafe { mem::zeroed() } } } /// Returns vector of type `__m256d` with indeterminate elements. @@ -2790,8 +2836,8 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_undefined_pd() -> __m256d { - const { mem::zeroed() } +pub fn _mm256_undefined_pd() -> __m256d { + const { unsafe { mem::zeroed() } } } /// Returns vector of type __m256i with with indeterminate elements. @@ -2803,8 +2849,8 @@ pub unsafe fn _mm256_undefined_pd() -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_undefined_si256() -> __m256i { - const { mem::zeroed() } +pub fn _mm256_undefined_si256() -> __m256i { + const { unsafe { mem::zeroed() } } } /// Sets packed __m256 returned vector with the supplied values. @@ -2814,8 +2860,8 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { - simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) +pub fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { + unsafe { simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) } } /// Sets packed __m256d returned vector with the supplied values. @@ -2825,10 +2871,12 @@ pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { - let hi: __m128 = transmute(hi); - let lo: __m128 = transmute(lo); - transmute(_mm256_set_m128(hi, lo)) +pub fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { + unsafe { + let hi: __m128 = transmute(hi); + let lo: __m128 = transmute(lo); + transmute(_mm256_set_m128(hi, lo)) + } } /// Sets packed __m256i returned vector with the supplied values. @@ -2838,10 +2886,12 @@ pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { - let hi: __m128 = transmute(hi); - let lo: __m128 = transmute(lo); - transmute(_mm256_set_m128(hi, lo)) +pub fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { + unsafe { + let hi: __m128 = transmute(hi); + let lo: __m128 = transmute(lo); + transmute(_mm256_set_m128(hi, lo)) + } } /// Sets packed __m256 returned vector with the supplied values. @@ -2851,7 +2901,7 @@ pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { +pub fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { _mm256_set_m128(hi, lo) } @@ -2862,7 +2912,7 @@ pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { +pub fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { _mm256_set_m128d(hi, lo) } @@ -2873,7 +2923,7 @@ pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { +pub fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { _mm256_set_m128i(hi, lo) } @@ -2978,8 +3028,8 @@ pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(movss))] FIXME #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 { - simd_extract!(a, 0) +pub fn _mm256_cvtss_f32(a: __m256) -> f32 { + unsafe { simd_extract!(a, 0) } } // LLVM intrinsics used in the above functions diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx.rs b/library/stdarch/crates/core_arch/src/x86_64/avx.rs index 71b8290eb877..b494385e4a61 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/avx.rs @@ -24,9 +24,9 @@ use crate::{core_arch::x86::*, mem::transmute}; #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m256i { +pub fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m256i { static_assert_uimm_bits!(INDEX, 2); - transmute(simd_insert!(a.as_i64x4(), INDEX as u32, i)) + unsafe { transmute(simd_insert!(a.as_i64x4(), INDEX as u32, i)) } } /// Extracts a 64-bit integer from `a`, selected with `INDEX`. @@ -37,9 +37,9 @@ pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m25 #[rustc_legacy_const_generics(1)] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi64(a: __m256i) -> i64 { +pub fn _mm256_extract_epi64(a: __m256i) -> i64 { static_assert_uimm_bits!(INDEX, 2); - simd_extract!(a.as_i64x4(), INDEX as u32) + unsafe { simd_extract!(a.as_i64x4(), INDEX as u32) } } #[cfg(test)]