mark FMA intrinsics as safe

Mark all FMA intrinsics as safe.
2025-01-26 20:56:38 +08:00 · 2025-01-26 20:56:38 +08:00 · 4ca81c0432
commit 4ca81c0432
parent 093f6a34ef
1 changed files with 144 additions and 112 deletions
--- a/library/stdarch/crates/core_arch/src/x86/fma.rs
+++ b/library/stdarch/crates/core_arch/src/x86/fma.rs
@ -33,8 +33,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_fma(a, b, c)
+pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe { simd_fma(a, b, c) }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -45,8 +45,8 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    simd_fma(a, b, c)
+pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe { simd_fma(a, b, c) }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -57,8 +57,8 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_fma(a, b, c)
+pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe { simd_fma(a, b, c) }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -69,8 +69,8 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    simd_fma(a, b, c)
+pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe { simd_fma(a, b, c) }
 }

 /// Multiplies the lower double-precision (64-bit) floating-point elements in
@ -83,12 +83,14 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_insert!(
-        a,
-        0,
-        fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
-    )
+pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
+        )
+    }
 }

 /// Multiplies the lower single-precision (32-bit) floating-point elements in
@ -101,12 +103,14 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_insert!(
-        a,
-        0,
-        fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c))
-    )
+pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c))
+        )
+    }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -118,10 +122,12 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [2, 1])
+pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [2, 1])
+    }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -133,10 +139,12 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [4, 1, 6, 3])
+pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [4, 1, 6, 3])
+    }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -148,10 +156,12 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [4, 1, 6, 3])
+pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [4, 1, 6, 3])
+    }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -163,10 +173,12 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
+pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
+    }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -177,8 +189,8 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_fma(a, b, simd_neg(c))
+pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe { simd_fma(a, b, simd_neg(c)) }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -189,8 +201,8 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    simd_fma(a, b, simd_neg(c))
+pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe { simd_fma(a, b, simd_neg(c)) }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -201,8 +213,8 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub213ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_fma(a, b, simd_neg(c))
+pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe { simd_fma(a, b, simd_neg(c)) }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -213,8 +225,8 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub213ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    simd_fma(a, b, simd_neg(c))
+pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe { simd_fma(a, b, simd_neg(c)) }
 }

 /// Multiplies the lower double-precision (64-bit) floating-point elements in
@ -227,12 +239,14 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_insert!(
-        a,
-        0,
-        fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
-    )
+pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
+        )
+    }
 }

 /// Multiplies the lower single-precision (32-bit) floating-point elements in
@ -245,12 +259,14 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_insert!(
-        a,
-        0,
-        fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c))
-    )
+pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c))
+        )
+    }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -262,10 +278,12 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [0, 3])
+pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [0, 3])
+    }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -277,10 +295,12 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [0, 5, 2, 7])
+pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [0, 5, 2, 7])
+    }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -292,10 +312,12 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [0, 5, 2, 7])
+pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [0, 5, 2, 7])
+    }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -307,10 +329,12 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
+pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
+    }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -321,8 +345,8 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_fma(simd_neg(a), b, c)
+pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe { simd_fma(simd_neg(a), b, c) }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -333,8 +357,8 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    simd_fma(simd_neg(a), b, c)
+pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe { simd_fma(simd_neg(a), b, c) }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -345,8 +369,8 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_fma(simd_neg(a), b, c)
+pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe { simd_fma(simd_neg(a), b, c) }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -357,8 +381,8 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    simd_fma(simd_neg(a), b, c)
+pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe { simd_fma(simd_neg(a), b, c) }
 }

 /// Multiplies the lower double-precision (64-bit) floating-point elements in
@ -371,12 +395,14 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_insert!(
-        a,
-        0,
-        fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
-    )
+pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
+        )
+    }
 }

 /// Multiplies the lower single-precision (32-bit) floating-point elements in
@ -389,12 +415,14 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_insert!(
-        a,
-        0,
-        fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c))
-    )
+pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c))
+        )
+    }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -406,8 +434,8 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_fma(simd_neg(a), b, simd_neg(c))
+pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }

 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -419,8 +447,8 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    simd_fma(simd_neg(a), b, simd_neg(c))
+pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -432,8 +460,8 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_fma(simd_neg(a), b, simd_neg(c))
+pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }

 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -445,8 +473,8 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    simd_fma(simd_neg(a), b, simd_neg(c))
+pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }

 /// Multiplies the lower double-precision (64-bit) floating-point elements in
@ -460,12 +488,14 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_insert!(
-        a,
-        0,
-        fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
-    )
+pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
+        )
+    }
 }

 /// Multiplies the lower single-precision (32-bit) floating-point elements in
@ -479,12 +509,14 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_insert!(
-        a,
-        0,
-        fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c))
-    )
+pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c))
+        )
+    }
 }

 #[cfg(test)]