mark FMA intrinsics as safe
Mark all FMA intrinsics as safe.
This commit is contained in:
parent
093f6a34ef
commit
4ca81c0432
1 changed files with 144 additions and 112 deletions
|
|
@ -33,8 +33,8 @@ use stdarch_test::assert_instr;
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
simd_fma(a, b, c)
|
||||
pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe { simd_fma(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -45,8 +45,8 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
simd_fma(a, b, c)
|
||||
pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
unsafe { simd_fma(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -57,8 +57,8 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
simd_fma(a, b, c)
|
||||
pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe { simd_fma(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -69,8 +69,8 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
simd_fma(a, b, c)
|
||||
pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
unsafe { simd_fma(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiplies the lower double-precision (64-bit) floating-point elements in
|
||||
|
|
@ -83,12 +83,14 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
|
||||
)
|
||||
pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies the lower single-precision (32-bit) floating-point elements in
|
||||
|
|
@ -101,12 +103,14 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c))
|
||||
)
|
||||
pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -118,10 +122,12 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmaddsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [2, 1])
|
||||
pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [2, 1])
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -133,10 +139,12 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmaddsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [4, 1, 6, 3])
|
||||
pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
unsafe {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [4, 1, 6, 3])
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -148,10 +156,12 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmaddsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [4, 1, 6, 3])
|
||||
pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [4, 1, 6, 3])
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -163,10 +173,12 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmaddsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
|
||||
pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
unsafe {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -177,8 +189,8 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
simd_fma(a, b, simd_neg(c))
|
||||
pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe { simd_fma(a, b, simd_neg(c)) }
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -189,8 +201,8 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
simd_fma(a, b, simd_neg(c))
|
||||
pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
unsafe { simd_fma(a, b, simd_neg(c)) }
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -201,8 +213,8 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub213ps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
simd_fma(a, b, simd_neg(c))
|
||||
pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe { simd_fma(a, b, simd_neg(c)) }
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -213,8 +225,8 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub213ps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
simd_fma(a, b, simd_neg(c))
|
||||
pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
unsafe { simd_fma(a, b, simd_neg(c)) }
|
||||
}
|
||||
|
||||
/// Multiplies the lower double-precision (64-bit) floating-point elements in
|
||||
|
|
@ -227,12 +239,14 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
|
||||
)
|
||||
pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies the lower single-precision (32-bit) floating-point elements in
|
||||
|
|
@ -245,12 +259,14 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c))
|
||||
)
|
||||
pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -262,10 +278,12 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsubadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [0, 3])
|
||||
pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [0, 3])
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -277,10 +295,12 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsubadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [0, 5, 2, 7])
|
||||
pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
unsafe {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [0, 5, 2, 7])
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -292,10 +312,12 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsubadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [0, 5, 2, 7])
|
||||
pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [0, 5, 2, 7])
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -307,10 +329,12 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfmsubadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
|
||||
pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
unsafe {
|
||||
let add = simd_fma(a, b, c);
|
||||
let sub = simd_fma(a, b, simd_neg(c));
|
||||
simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -321,8 +345,8 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
simd_fma(simd_neg(a), b, c)
|
||||
pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe { simd_fma(simd_neg(a), b, c) }
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -333,8 +357,8 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
simd_fma(simd_neg(a), b, c)
|
||||
pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
unsafe { simd_fma(simd_neg(a), b, c) }
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -345,8 +369,8 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
simd_fma(simd_neg(a), b, c)
|
||||
pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe { simd_fma(simd_neg(a), b, c) }
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -357,8 +381,8 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
simd_fma(simd_neg(a), b, c)
|
||||
pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
unsafe { simd_fma(simd_neg(a), b, c) }
|
||||
}
|
||||
|
||||
/// Multiplies the lower double-precision (64-bit) floating-point elements in
|
||||
|
|
@ -371,12 +395,14 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
|
||||
)
|
||||
pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies the lower single-precision (32-bit) floating-point elements in
|
||||
|
|
@ -389,12 +415,14 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmadd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c))
|
||||
)
|
||||
pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -406,8 +434,8 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
simd_fma(simd_neg(a), b, simd_neg(c))
|
||||
pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
|
||||
}
|
||||
|
||||
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
|
||||
|
|
@ -419,8 +447,8 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
simd_fma(simd_neg(a), b, simd_neg(c))
|
||||
pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
||||
unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -432,8 +460,8 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
simd_fma(simd_neg(a), b, simd_neg(c))
|
||||
pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
|
||||
}
|
||||
|
||||
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
|
||||
|
|
@ -445,8 +473,8 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
simd_fma(simd_neg(a), b, simd_neg(c))
|
||||
pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
||||
unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
|
||||
}
|
||||
|
||||
/// Multiplies the lower double-precision (64-bit) floating-point elements in
|
||||
|
|
@ -460,12 +488,14 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
|
||||
)
|
||||
pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
||||
unsafe {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiplies the lower single-precision (32-bit) floating-point elements in
|
||||
|
|
@ -479,12 +509,14 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
|
|||
#[target_feature(enable = "fma")]
|
||||
#[cfg_attr(test, assert_instr(vfnmsub))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c))
|
||||
)
|
||||
pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
|
||||
unsafe {
|
||||
simd_insert!(
|
||||
a,
|
||||
0,
|
||||
fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue