From 4ca81c04327c20b98fa6a5143553c219e5025c79 Mon Sep 17 00:00:00 2001 From: usamoi Date: Sun, 26 Jan 2025 20:56:38 +0800 Subject: [PATCH] mark FMA intrinsics as safe Mark all FMA intrinsics as safe. --- .../stdarch/crates/core_arch/src/x86/fma.rs | 256 ++++++++++-------- 1 file changed, 144 insertions(+), 112 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/x86/fma.rs b/library/stdarch/crates/core_arch/src/x86/fma.rs index 7e5b93c83908..d3988422b9a4 100644 --- a/library/stdarch/crates/core_arch/src/x86/fma.rs +++ b/library/stdarch/crates/core_arch/src/x86/fma.rs @@ -33,8 +33,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_fma(a, b, c) +pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(a, b, c) } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -45,8 +45,8 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_fma(a, b, c) +pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(a, b, c) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -57,8 +57,8 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_fma(a, b, c) +pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(a, b, c) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -69,8 +69,8 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - simd_fma(a, b, c) +pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(a, b, c) } } /// Multiplies the lower double-precision (64-bit) floating-point elements in @@ -83,12 +83,14 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_insert!( - a, - 0, - fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) - ) +pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) + ) + } } /// Multiplies the lower single-precision (32-bit) floating-point elements in @@ -101,12 +103,14 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_insert!( - a, - 0, - fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c)) - ) +pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c)) + ) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -118,10 +122,12 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [2, 1]) +pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [2, 1]) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -133,10 +139,12 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [4, 1, 6, 3]) +pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -148,10 +156,12 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [4, 1, 6, 3]) +pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -163,10 +173,12 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) +pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -177,8 +189,8 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_fma(a, b, simd_neg(c)) +pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -189,8 +201,8 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_fma(a, b, simd_neg(c)) +pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -201,8 +213,8 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub213ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_fma(a, b, simd_neg(c)) +pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -213,8 +225,8 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub213ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - simd_fma(a, b, simd_neg(c)) +pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiplies the lower double-precision (64-bit) floating-point elements in @@ -227,12 +239,14 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_insert!( - a, - 0, - fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) - ) +pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) + ) + } } /// Multiplies the lower single-precision (32-bit) floating-point elements in @@ -245,12 +259,14 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_insert!( - a, - 0, - fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c)) - ) +pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c)) + ) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -262,10 +278,12 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [0, 3]) +pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 3]) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -277,10 +295,12 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [0, 5, 2, 7]) +pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 5, 2, 7]) + } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -292,10 +312,12 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [0, 5, 2, 7]) +pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 5, 2, 7]) + } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -307,10 +329,12 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15]) +pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15]) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -321,8 +345,8 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_fma(simd_neg(a), b, c) +pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -333,8 +357,8 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_fma(simd_neg(a), b, c) +pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -345,8 +369,8 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_fma(simd_neg(a), b, c) +pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -357,8 +381,8 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - simd_fma(simd_neg(a), b, c) +pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiplies the lower double-precision (64-bit) floating-point elements in @@ -371,12 +395,14 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_insert!( - a, - 0, - fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) - ) +pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) + ) + } } /// Multiplies the lower single-precision (32-bit) floating-point elements in @@ -389,12 +415,14 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_insert!( - a, - 0, - fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c)) - ) +pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c)) + ) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -406,8 +434,8 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -419,8 +447,8 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -432,8 +460,8 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -445,8 +473,8 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiplies the lower double-precision (64-bit) floating-point elements in @@ -460,12 +488,14 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_insert!( - a, - 0, - fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) - ) +pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) + ) + } } /// Multiplies the lower single-precision (32-bit) floating-point elements in @@ -479,12 +509,14 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_insert!( - a, - 0, - fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c)) - ) +pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c)) + ) + } } #[cfg(test)]