From 4ca81c04327c20b98fa6a5143553c219e5025c79 Mon Sep 17 00:00:00 2001
From: usamoi <usamoi@outlook.com>
Date: Sun, 26 Jan 2025 20:56:38 +0800
Subject: [PATCH] mark FMA intrinsics as safe

Mark all FMA intrinsics as safe.
---
 .../stdarch/crates/core_arch/src/x86/fma.rs   | 256 ++++++++++--------
 1 file changed, 144 insertions(+), 112 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/fma.rs b/library/stdarch/crates/core_arch/src/x86/fma.rs
index 7e5b93c83908..d3988422b9a4 100644
--- a/library/stdarch/crates/core_arch/src/x86/fma.rs
+++ b/library/stdarch/crates/core_arch/src/x86/fma.rs
@@ -33,8 +33,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_fma(a, b, c)
+pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe { simd_fma(a, b, c) }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -45,8 +45,8 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    simd_fma(a, b, c)
+pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe { simd_fma(a, b, c) }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -57,8 +57,8 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_fma(a, b, c)
+pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe { simd_fma(a, b, c) }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -69,8 +69,8 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    simd_fma(a, b, c)
+pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe { simd_fma(a, b, c) }
 }
 
 /// Multiplies the lower double-precision (64-bit) floating-point elements in
@@ -83,12 +83,14 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_insert!(
-        a,
-        0,
-        fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
-    )
+pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
+        )
+    }
 }
 
 /// Multiplies the lower single-precision (32-bit) floating-point elements in
@@ -101,12 +103,14 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_insert!(
-        a,
-        0,
-        fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c))
-    )
+pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c))
+        )
+    }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -118,10 +122,12 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [2, 1])
+pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [2, 1])
+    }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -133,10 +139,12 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [4, 1, 6, 3])
+pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [4, 1, 6, 3])
+    }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -148,10 +156,12 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [4, 1, 6, 3])
+pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [4, 1, 6, 3])
+    }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -163,10 +173,12 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
+pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
+    }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -177,8 +189,8 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_fma(a, b, simd_neg(c))
+pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -189,8 +201,8 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    simd_fma(a, b, simd_neg(c))
+pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -201,8 +213,8 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub213ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_fma(a, b, simd_neg(c))
+pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -213,8 +225,8 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub213ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    simd_fma(a, b, simd_neg(c))
+pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
 /// Multiplies the lower double-precision (64-bit) floating-point elements in
@@ -227,12 +239,14 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_insert!(
-        a,
-        0,
-        fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
-    )
+pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
+        )
+    }
 }
 
 /// Multiplies the lower single-precision (32-bit) floating-point elements in
@@ -245,12 +259,14 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_insert!(
-        a,
-        0,
-        fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c))
-    )
+pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c))
+        )
+    }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -262,10 +278,12 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [0, 3])
+pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [0, 3])
+    }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -277,10 +295,12 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [0, 5, 2, 7])
+pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [0, 5, 2, 7])
+    }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -292,10 +312,12 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [0, 5, 2, 7])
+pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [0, 5, 2, 7])
+    }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -307,10 +329,12 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    let add = simd_fma(a, b, c);
-    let sub = simd_fma(a, b, simd_neg(c));
-    simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
+pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe {
+        let add = simd_fma(a, b, c);
+        let sub = simd_fma(a, b, simd_neg(c));
+        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
+    }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -321,8 +345,8 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_fma(simd_neg(a), b, c)
+pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -333,8 +357,8 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    simd_fma(simd_neg(a), b, c)
+pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -345,8 +369,8 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_fma(simd_neg(a), b, c)
+pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -357,8 +381,8 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    simd_fma(simd_neg(a), b, c)
+pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
 /// Multiplies the lower double-precision (64-bit) floating-point elements in
@@ -371,12 +395,14 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_insert!(
-        a,
-        0,
-        fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
-    )
+pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
+        )
+    }
 }
 
 /// Multiplies the lower single-precision (32-bit) floating-point elements in
@@ -389,12 +415,14 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_insert!(
-        a,
-        0,
-        fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c))
-    )
+pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c))
+        )
+    }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -406,8 +434,8 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_fma(simd_neg(a), b, simd_neg(c))
+pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -419,8 +447,8 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
-    simd_fma(simd_neg(a), b, simd_neg(c))
+pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -432,8 +460,8 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_fma(simd_neg(a), b, simd_neg(c))
+pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
 /// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -445,8 +473,8 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
-    simd_fma(simd_neg(a), b, simd_neg(c))
+pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
 /// Multiplies the lower double-precision (64-bit) floating-point elements in
@@ -460,12 +488,14 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
-    simd_insert!(
-        a,
-        0,
-        fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
-    )
+pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
+        )
+    }
 }
 
 /// Multiplies the lower single-precision (32-bit) floating-point elements in
@@ -479,12 +509,14 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
-    simd_insert!(
-        a,
-        0,
-        fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c))
-    )
+pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+    unsafe {
+        simd_insert!(
+            a,
+            0,
+            fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c))
+        )
+    }
 }
 
 #[cfg(test)]