mark FMA intrinsics as safe

Mark all FMA intrinsics as safe.
This commit is contained in:
usamoi 2025-01-26 20:56:38 +08:00 committed by Amanieu d'Antras
parent 093f6a34ef
commit 4ca81c0432

View file

@ -33,8 +33,8 @@ use stdarch_test::assert_instr;
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
simd_fma(a, b, c)
pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe { simd_fma(a, b, c) }
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -45,8 +45,8 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
simd_fma(a, b, c)
pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
unsafe { simd_fma(a, b, c) }
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -57,8 +57,8 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
simd_fma(a, b, c)
pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe { simd_fma(a, b, c) }
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -69,8 +69,8 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
simd_fma(a, b, c)
pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
unsafe { simd_fma(a, b, c) }
}
/// Multiplies the lower double-precision (64-bit) floating-point elements in
@ -83,12 +83,14 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
simd_insert!(
a,
0,
fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
)
pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe {
simd_insert!(
a,
0,
fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
)
}
}
/// Multiplies the lower single-precision (32-bit) floating-point elements in
@ -101,12 +103,14 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
simd_insert!(
a,
0,
fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c))
)
pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe {
simd_insert!(
a,
0,
fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c))
)
}
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -118,10 +122,12 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [2, 1])
pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [2, 1])
}
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -133,10 +139,12 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [4, 1, 6, 3])
pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
unsafe {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [4, 1, 6, 3])
}
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -148,10 +156,12 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [4, 1, 6, 3])
pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [4, 1, 6, 3])
}
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -163,10 +173,12 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
unsafe {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
}
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -177,8 +189,8 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
simd_fma(a, b, simd_neg(c))
pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe { simd_fma(a, b, simd_neg(c)) }
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -189,8 +201,8 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
simd_fma(a, b, simd_neg(c))
pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
unsafe { simd_fma(a, b, simd_neg(c)) }
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -201,8 +213,8 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
simd_fma(a, b, simd_neg(c))
pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe { simd_fma(a, b, simd_neg(c)) }
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -213,8 +225,8 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
simd_fma(a, b, simd_neg(c))
pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
unsafe { simd_fma(a, b, simd_neg(c)) }
}
/// Multiplies the lower double-precision (64-bit) floating-point elements in
@ -227,12 +239,14 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
simd_insert!(
a,
0,
fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
)
pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe {
simd_insert!(
a,
0,
fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
)
}
}
/// Multiplies the lower single-precision (32-bit) floating-point elements in
@ -245,12 +259,14 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
simd_insert!(
a,
0,
fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c))
)
pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe {
simd_insert!(
a,
0,
fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c))
)
}
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -262,10 +278,12 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [0, 3])
pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [0, 3])
}
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -277,10 +295,12 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [0, 5, 2, 7])
pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
unsafe {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [0, 5, 2, 7])
}
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -292,10 +312,12 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [0, 5, 2, 7])
pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [0, 5, 2, 7])
}
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -307,10 +329,12 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
unsafe {
let add = simd_fma(a, b, c);
let sub = simd_fma(a, b, simd_neg(c));
simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
}
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -321,8 +345,8 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
simd_fma(simd_neg(a), b, c)
pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe { simd_fma(simd_neg(a), b, c) }
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -333,8 +357,8 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
simd_fma(simd_neg(a), b, c)
pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
unsafe { simd_fma(simd_neg(a), b, c) }
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -345,8 +369,8 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
simd_fma(simd_neg(a), b, c)
pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe { simd_fma(simd_neg(a), b, c) }
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -357,8 +381,8 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
simd_fma(simd_neg(a), b, c)
pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
unsafe { simd_fma(simd_neg(a), b, c) }
}
/// Multiplies the lower double-precision (64-bit) floating-point elements in
@ -371,12 +395,14 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
simd_insert!(
a,
0,
fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
)
pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe {
simd_insert!(
a,
0,
fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
)
}
}
/// Multiplies the lower single-precision (32-bit) floating-point elements in
@ -389,12 +415,14 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
simd_insert!(
a,
0,
fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c))
)
pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe {
simd_insert!(
a,
0,
fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c))
)
}
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -406,8 +434,8 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
simd_fma(simd_neg(a), b, simd_neg(c))
pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
}
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@ -419,8 +447,8 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
simd_fma(simd_neg(a), b, simd_neg(c))
pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -432,8 +460,8 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
simd_fma(simd_neg(a), b, simd_neg(c))
pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
}
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@ -445,8 +473,8 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
simd_fma(simd_neg(a), b, simd_neg(c))
pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
}
/// Multiplies the lower double-precision (64-bit) floating-point elements in
@ -460,12 +488,14 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
simd_insert!(
a,
0,
fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
)
pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
unsafe {
simd_insert!(
a,
0,
fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
)
}
}
/// Multiplies the lower single-precision (32-bit) floating-point elements in
@ -479,12 +509,14 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
simd_insert!(
a,
0,
fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c))
)
pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
unsafe {
simd_insert!(
a,
0,
fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c))
)
}
}
#[cfg(test)]