commit
4d504a7767
2 changed files with 265 additions and 15 deletions
|
|
@ -59,23 +59,23 @@ sse
|
|||
* [ ] `_m_pmovmskb`
|
||||
* [ ] `_mm_shuffle_pi16`
|
||||
* [ ] `_m_pshufw`
|
||||
* [ ] `_mm_add_ss`
|
||||
* [ ] `_mm_add_ps`
|
||||
* [ ] `_mm_sub_ss`
|
||||
* [ ] `_mm_sub_ps`
|
||||
* [ ] `_mm_mul_ss`
|
||||
* [ ] `_mm_mul_ps`
|
||||
* [ ] `_mm_div_ss`
|
||||
* [ ] `_mm_div_ps`
|
||||
* [ ] `_mm_sqrt_ss`
|
||||
* [x] `_mm_add_ss`
|
||||
* [x] `_mm_add_ps`
|
||||
* [x] `_mm_sub_ss`
|
||||
* [x] `_mm_sub_ps`
|
||||
* [x] `_mm_mul_ss`
|
||||
* [x] `_mm_mul_ps`
|
||||
* [x] `_mm_div_ss`
|
||||
* [x] `_mm_div_ps`
|
||||
* [x] `_mm_sqrt_ss`
|
||||
* [x] `_mm_sqrt_ps`
|
||||
* [ ] `_mm_rcp_ss`
|
||||
* [x] `_mm_rcp_ss`
|
||||
* [x] `_mm_rcp_ps`
|
||||
* [ ] `_mm_rsqrt_ss`
|
||||
* [x] `_mm_rsqrt_ss`
|
||||
* [x] `_mm_rsqrt_ps`
|
||||
* [ ] `_mm_min_ss`
|
||||
* [x] `_mm_min_ss`
|
||||
* [x] `_mm_min_ps`
|
||||
* [ ] `_mm_max_ss`
|
||||
* [x] `_mm_max_ss`
|
||||
* [x] `_mm_max_ps`
|
||||
* [ ] `_mm_and_ps`
|
||||
* [ ] `_mm_andnot_ps`
|
||||
|
|
@ -458,8 +458,8 @@ sse4.1
|
|||
* [ ] `_mm_blendv_ps`
|
||||
* [x] `_mm_blendv_epi8`
|
||||
* [ ] `_mm_blend_epi16`
|
||||
* [ ] `_mm_dp_pd`
|
||||
* [ ] `_mm_dp_ps`
|
||||
* [x] `_mm_dp_pd`
|
||||
* [x] `_mm_dp_ps`
|
||||
* [ ] `_mm_extract_ps`
|
||||
* [ ] `_mm_extract_epi8`
|
||||
* [ ] `_mm_extract_epi32`
|
||||
|
|
|
|||
|
|
@ -4,6 +4,83 @@ use v128::*;
|
|||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
|
||||
/// Adds the first component of `a` and `b`, the other components are copied
|
||||
/// from `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(addss))]
|
||||
pub fn _mm_add_ss(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { addss(a, b) }
|
||||
}
|
||||
|
||||
/// Adds f32x4 vectors.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(addps))]
|
||||
pub fn _mm_add_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
a + b
|
||||
}
|
||||
|
||||
/// Subtracts the first component of `b` from `a`, the other components are
|
||||
/// copied from `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(subss))]
|
||||
pub fn _mm_sub_ss(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { subss(a, b) }
|
||||
}
|
||||
|
||||
/// Subtracts f32x4 vectors.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(subps))]
|
||||
pub fn _mm_sub_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
a - b
|
||||
}
|
||||
|
||||
/// Multiplies the first component of `a` and `b`, the other components are
|
||||
/// copied from `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(mulss))]
|
||||
pub fn _mm_mul_ss(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { mulss(a, b) }
|
||||
}
|
||||
|
||||
/// Multiplies f32x4 vectors.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(mulps))]
|
||||
pub fn _mm_mul_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
a * b
|
||||
}
|
||||
|
||||
/// Divides the first component of `b` by `a`, the other components are
|
||||
/// copied from `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(divss))]
|
||||
pub fn _mm_div_ss(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { divss(a, b) }
|
||||
}
|
||||
|
||||
/// Divides f32x4 vectors.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(divps))]
|
||||
pub fn _mm_div_ps(a: f32x4, b: f32x4) -> f32x4 {
|
||||
a / b
|
||||
}
|
||||
|
||||
/// Return the square root of the first single-precision (32-bit)
|
||||
/// floating-point element in `a`, the other elements are unchanged.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(sqrtss))]
|
||||
pub fn _mm_sqrt_ss(a: f32x4) -> f32x4 {
|
||||
unsafe { sqrtss(a) }
|
||||
}
|
||||
|
||||
/// Return the square root of packed single-precision (32-bit) floating-point
|
||||
/// elements in `a`.
|
||||
#[inline(always)]
|
||||
|
|
@ -13,6 +90,15 @@ pub fn _mm_sqrt_ps(a: f32x4) -> f32x4 {
|
|||
unsafe { sqrtps(a) }
|
||||
}
|
||||
|
||||
/// Return the approximate reciprocal of the first single-precision
|
||||
/// (32-bit) floating-point element in `a`, the other elements are unchanged.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(rcpss))]
|
||||
pub fn _mm_rcp_ss(a: f32x4) -> f32x4 {
|
||||
unsafe { rcpss(a) }
|
||||
}
|
||||
|
||||
/// Return the approximate reciprocal of packed single-precision (32-bit)
|
||||
/// floating-point elements in `a`.
|
||||
#[inline(always)]
|
||||
|
|
@ -22,6 +108,15 @@ pub fn _mm_rcp_ps(a: f32x4) -> f32x4 {
|
|||
unsafe { rcpps(a) }
|
||||
}
|
||||
|
||||
/// Return the approximate reciprocal square root of the fist single-precision
|
||||
/// (32-bit) floating-point elements in `a`, the other elements are unchanged.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(rsqrtss))]
|
||||
pub fn _mm_rsqrt_ss(a: f32x4) -> f32x4 {
|
||||
unsafe { rsqrtss(a) }
|
||||
}
|
||||
|
||||
/// Return the approximate reciprocal square root of packed single-precision
|
||||
/// (32-bit) floating-point elements in `a`.
|
||||
#[inline(always)]
|
||||
|
|
@ -31,6 +126,16 @@ pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 {
|
|||
unsafe { rsqrtps(a) }
|
||||
}
|
||||
|
||||
/// Compare the first single-precision (32-bit) floating-point element of `a`
|
||||
/// and `b`, and return the minimum value in the first element of the return
|
||||
/// value, the other elements are copied from `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(minss))]
|
||||
pub fn _mm_min_ss(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { minss(a, b) }
|
||||
}
|
||||
|
||||
/// Compare packed single-precision (32-bit) floating-point elements in `a` and
|
||||
/// `b`, and return the corresponding minimum values.
|
||||
#[inline(always)]
|
||||
|
|
@ -40,6 +145,16 @@ pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 {
|
|||
unsafe { minps(a, b) }
|
||||
}
|
||||
|
||||
/// Compare the first single-precision (32-bit) floating-point element of `a`
|
||||
/// and `b`, and return the maximum value in the first element of the return
|
||||
/// value, the other elements are copied from `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(maxss))]
|
||||
pub fn _mm_max_ss(a: f32x4, b: f32x4) -> f32x4 {
|
||||
unsafe { maxss(a, b) }
|
||||
}
|
||||
|
||||
/// Compare packed single-precision (32-bit) floating-point elements in `a` and
|
||||
/// `b`, and return the corresponding maximum values.
|
||||
#[inline(always)]
|
||||
|
|
@ -70,14 +185,32 @@ pub fn _mm_movemask_ps(a: f32x4) -> i32 {
|
|||
|
||||
#[allow(improper_ctypes)]
|
||||
extern {
|
||||
#[link_name = "llvm.x86.sse.add.ss"]
|
||||
fn addss(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.sub.ss"]
|
||||
fn subss(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.mul.ss"]
|
||||
fn mulss(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.div.ss"]
|
||||
fn divss(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.sqrt.ss"]
|
||||
fn sqrtss(a: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.sqrt.ps"]
|
||||
fn sqrtps(a: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.rcp.ss"]
|
||||
fn rcpss(a: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.rcp.ps"]
|
||||
fn rcpps(a: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.rsqrt.ss"]
|
||||
fn rsqrtss(a: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.rsqrt.ps"]
|
||||
fn rsqrtps(a: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.min.ss"]
|
||||
fn minss(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.min.ps"]
|
||||
fn minps(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.max.ss"]
|
||||
fn maxss(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.max.ps"]
|
||||
fn maxps(a: f32x4, b: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse.movmsk.ps"]
|
||||
|
|
@ -89,6 +222,87 @@ mod tests {
|
|||
use v128::*;
|
||||
use x86::sse;
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_add_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse::_mm_add_ps(a, b);
|
||||
assert_eq!(r, f32x4::new(-101.0, 25.0, 0.0, -15.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_add_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse::_mm_add_ss(a, b);
|
||||
assert_eq!(r, f32x4::new(-101.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_sub_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse::_mm_sub_ps(a, b);
|
||||
assert_eq!(r, f32x4::new(99.0, -15.0, 0.0, -5.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_sub_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse::_mm_sub_ss(a, b);
|
||||
assert_eq!(r, f32x4::new(99.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_mul_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse::_mm_mul_ps(a, b);
|
||||
assert_eq!(r, f32x4::new(100.0, 100.0, 0.0, 50.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_mul_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse::_mm_mul_ss(a, b);
|
||||
assert_eq!(r, f32x4::new(100.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_div_ps() {
|
||||
let a = f32x4::new(-1.0, 5.0, 2.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.2, -5.0);
|
||||
let r = sse::_mm_div_ps(a, b);
|
||||
assert_eq!(r, f32x4::new(0.01, 0.25, 10.0, 2.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_div_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse::_mm_div_ss(a, b);
|
||||
assert_eq!(r, f32x4::new(0.01, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_sqrt_ss() {
|
||||
let a = f32x4::new(4.0, 13.0, 16.0, 100.0);
|
||||
let r = sse::_mm_sqrt_ss(a);
|
||||
let e = f32x4::new(2.0, 13.0, 16.0, 100.0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_sqrt_ps() {
|
||||
|
|
@ -98,6 +312,15 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_rcp_ss() {
|
||||
let a = f32x4::new(4.0, 13.0, 16.0, 100.0);
|
||||
let r = sse::_mm_rcp_ss(a);
|
||||
let e = f32x4::new(0.24993896, 13.0, 16.0, 100.0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_rcp_ps() {
|
||||
|
|
@ -107,6 +330,15 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_rsqrt_ss() {
|
||||
let a = f32x4::new(4.0, 13.0, 16.0, 100.0);
|
||||
let r = sse::_mm_rsqrt_ss(a);
|
||||
let e = f32x4::new(0.49987793, 13.0, 16.0, 100.0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_rsqrt_ps() {
|
||||
|
|
@ -116,6 +348,15 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_min_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse::_mm_min_ss(a, b);
|
||||
assert_eq!(r, f32x4::new(-100.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_min_ps() {
|
||||
|
|
@ -125,6 +366,15 @@ mod tests {
|
|||
assert_eq!(r, f32x4::new(-100.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_max_ss() {
|
||||
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = sse::_mm_max_ss(a, b);
|
||||
assert_eq!(r, f32x4::new(-1.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[target_feature = "+sse"]
|
||||
fn _mm_max_ps() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue