sse3: _mm_hsub_ps
This commit is contained in:
parent
d81d0a4a67
commit
e4ffcb6fdd
1 changed files with 24 additions and 5 deletions
|
|
@ -8,7 +8,7 @@ use stdsimd_test::assert_instr;
|
|||
/// floating-point elements in `a` to/from packed elements in `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse3"]
|
||||
#[cfg_attr(test, assert_instr(addsub))]
|
||||
#[cfg_attr(test, assert_instr(addsubps))]
|
||||
pub unsafe fn _mm_addsub_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
addsubps(a, b)
|
||||
}
|
||||
|
|
@ -17,7 +17,7 @@ pub unsafe fn _mm_addsub_ps(a: f32x4, b: f32x4) -> f32x4 {
|
|||
/// floating-point elements in `a` to/from packed elements in `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse3"]
|
||||
#[cfg_attr(test, assert_instr(addsub))]
|
||||
#[cfg_attr(test, assert_instr(addsubpd))]
|
||||
pub unsafe fn _mm_addsub_pd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
addsubpd(a, b)
|
||||
}
|
||||
|
|
@ -26,7 +26,7 @@ pub unsafe fn _mm_addsub_pd(a: f64x2, b: f64x2) -> f64x2 {
|
|||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse3"]
|
||||
#[cfg_attr(test, assert_instr(hadd))]
|
||||
#[cfg_attr(test, assert_instr(haddpd))]
|
||||
pub unsafe fn _mm_hadd_pd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
haddpd(a, b)
|
||||
}
|
||||
|
|
@ -35,7 +35,7 @@ pub unsafe fn _mm_hadd_pd(a: f64x2, b: f64x2) -> f64x2 {
|
|||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse3"]
|
||||
#[cfg_attr(test, assert_instr(hadd))]
|
||||
#[cfg_attr(test, assert_instr(haddps))]
|
||||
pub unsafe fn _mm_hadd_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
haddps(a, b)
|
||||
}
|
||||
|
|
@ -44,11 +44,20 @@ pub unsafe fn _mm_hadd_ps(a: f32x4, b: f32x4) -> f32x4 {
|
|||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse3"]
|
||||
#[cfg_attr(test, assert_instr(hsub))]
|
||||
#[cfg_attr(test, assert_instr(hsubpd))]
|
||||
pub unsafe fn _mm_hsub_pd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
hsubpd(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally add adjacent pairs of single-precision (32-bit)
|
||||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse3"]
|
||||
#[cfg_attr(test, assert_instr(hsubps))]
|
||||
pub unsafe fn _mm_hsub_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
hsubps(a, b)
|
||||
}
|
||||
|
||||
/// Load 128-bits of integer data from unaligned memory.
|
||||
/// This intrinsic may perform better than `_mm_loadu_si128`
|
||||
/// when the data crosses a cache line boundary.
|
||||
|
|
@ -71,6 +80,8 @@ extern {
|
|||
fn haddps(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse3.hsub.pd"]
|
||||
fn hsubpd(a: f64x2, b: f64x2) -> f64x2;
|
||||
#[link_name = "llvm.x86.sse3.hsub.ps"]
|
||||
fn hsubps(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse3.ldu.dq"]
|
||||
fn lddqu(mem_addr: *const i8) -> __m128i;
|
||||
}
|
||||
|
|
@ -123,6 +134,14 @@ mod tests {
|
|||
assert_eq!(r, f64x2::new(-6.0, -120.0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse3"]
|
||||
unsafe fn _mm_hsub_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse3::_mm_hsub_ps(a, b);
|
||||
assert_eq!(r, f32x4::new(-6.0, 10.0, -120.0, 5.0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse3"]
|
||||
unsafe fn _mm_lddqu_si128() {
|
||||
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue