progress
This commit is contained in:
parent
c709196f7a
commit
12121fc2bb
8 changed files with 867 additions and 81 deletions
|
|
@ -12,3 +12,4 @@ license = "MIT"
|
|||
|
||||
[profile.release]
|
||||
debug = true
|
||||
opt-level = 3
|
||||
|
|
|
|||
|
|
@ -1,28 +1,24 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
use std::env;
|
||||
use std::io::Write;
|
||||
|
||||
use stdsimd as s;
|
||||
|
||||
fn main() {
|
||||
let arg1: f64 = env::args().nth(1).unwrap().parse().unwrap();
|
||||
let arg2: f64 = env::args().nth(2).unwrap().parse().unwrap();
|
||||
let arg3: f64 = env::args().nth(3).unwrap().parse().unwrap();
|
||||
let arg4: f64 = env::args().nth(4).unwrap().parse().unwrap();
|
||||
let arg1: u8 = env::args().nth(1).unwrap().parse().unwrap();
|
||||
let arg2: u8 = env::args().nth(2).unwrap().parse().unwrap();
|
||||
let arg3: u8 = env::args().nth(3).unwrap().parse().unwrap();
|
||||
let arg4: u8 = env::args().nth(4).unwrap().parse().unwrap();
|
||||
unsafe {
|
||||
let a1 = s::_mm_load_pd(&(arg1, arg2) as *const _ as *const f64);
|
||||
let b1 = s::_mm_load_pd(&(arg3, arg4) as *const _ as *const f64);
|
||||
// println!("{:?}, {:?}", a, b);
|
||||
let r1 = s::_mm_add_sd(a1, b1);
|
||||
// println!("{:?}", r1);
|
||||
let mut r2: (f64, f64) = (0.0, 0.0);
|
||||
s::_mm_store_pd(&mut r2 as *mut _ as *mut f64, r1);
|
||||
if r2 == (4.0, 2.0) {
|
||||
::std::io::stdout().write_all(b"yes\n").unwrap();
|
||||
} else {
|
||||
::std::io::stdout().write_all(b"NO\n").unwrap();
|
||||
}
|
||||
// println!("{:?}", r2);
|
||||
s::_mm_lfence();
|
||||
s::_mm_pause();
|
||||
let a = s::u8x16::new(
|
||||
arg1, arg1, arg1, arg1, arg1, arg1, arg1, arg1,
|
||||
arg2, arg2, arg2, arg2, arg2, arg2, arg2, arg2);
|
||||
let b = s::u8x16::new(
|
||||
arg3, arg3, arg3, arg3, arg3, arg3, arg3, arg3,
|
||||
arg4, arg4, arg4, arg4, arg4, arg4, arg4, arg4);
|
||||
let r = s::_mm_sad_epu8(a.as_m128i(), b.as_m128i());
|
||||
println!("{:?}", s::u64x2::from(r));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,36 +1,14 @@
|
|||
#![allow(dead_code)]
|
||||
#![feature(platform_intrinsics, repr_simd)]
|
||||
#![feature(link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi)]
|
||||
|
||||
// pub use v128::{__m128, __m128d, __m128i};
|
||||
pub use v128::*;
|
||||
pub use v64::__m64;
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
pub use x86::*;
|
||||
|
||||
mod simd;
|
||||
mod v128;
|
||||
mod v64;
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
mod x86;
|
||||
|
||||
extern "platform-intrinsic" {
|
||||
fn simd_eq<T, U>(x: T, y: T) -> U;
|
||||
fn simd_ne<T, U>(x: T, y: T) -> U;
|
||||
fn simd_lt<T, U>(x: T, y: T) -> U;
|
||||
fn simd_le<T, U>(x: T, y: T) -> U;
|
||||
fn simd_gt<T, U>(x: T, y: T) -> U;
|
||||
fn simd_ge<T, U>(x: T, y: T) -> U;
|
||||
|
||||
fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
|
||||
fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
|
||||
fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
|
||||
fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
|
||||
|
||||
fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
|
||||
fn simd_extract<T, U>(x: T, idx: u32) -> U;
|
||||
|
||||
fn simd_cast<T, U>(x: T) -> U;
|
||||
|
||||
fn simd_add<T>(x: T, y: T) -> T;
|
||||
fn simd_sub<T>(x: T, y: T) -> T;
|
||||
fn simd_mul<T>(x: T, y: T) -> T;
|
||||
fn simd_div<T>(x: T, y: T) -> T;
|
||||
fn simd_shl<T>(x: T, y: T) -> T;
|
||||
fn simd_shr<T>(x: T, y: T) -> T;
|
||||
fn simd_and<T>(x: T, y: T) -> T;
|
||||
fn simd_or<T>(x: T, y: T) -> T;
|
||||
fn simd_xor<T>(x: T, y: T) -> T;
|
||||
}
|
||||
|
|
|
|||
28
library/stdarch/src/simd.rs
Normal file
28
library/stdarch/src/simd.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
extern "platform-intrinsic" {
|
||||
pub fn simd_eq<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_ne<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_lt<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_le<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_gt<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_ge<T, U>(x: T, y: T) -> U;
|
||||
|
||||
pub fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
|
||||
pub fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
|
||||
pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
|
||||
pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
|
||||
|
||||
pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
|
||||
pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
|
||||
|
||||
pub fn simd_cast<T, U>(x: T) -> U;
|
||||
|
||||
pub fn simd_add<T>(x: T, y: T) -> T;
|
||||
pub fn simd_sub<T>(x: T, y: T) -> T;
|
||||
pub fn simd_mul<T>(x: T, y: T) -> T;
|
||||
pub fn simd_div<T>(x: T, y: T) -> T;
|
||||
pub fn simd_shl<T>(x: T, y: T) -> T;
|
||||
pub fn simd_shr<T>(x: T, y: T) -> T;
|
||||
pub fn simd_and<T>(x: T, y: T) -> T;
|
||||
pub fn simd_or<T>(x: T, y: T) -> T;
|
||||
pub fn simd_xor<T>(x: T, y: T) -> T;
|
||||
}
|
||||
160
library/stdarch/src/v128.rs
Normal file
160
library/stdarch/src/v128.rs
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
use std::mem::transmute;
|
||||
|
||||
use simd::*;
|
||||
|
||||
macro_rules! define_ty {
|
||||
($name:ident, $($elty:ident),+) => {
|
||||
#[repr(simd)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct $name($($elty),*);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! define_ty_internal {
|
||||
($name:ident, $($elty:ident),+) => {
|
||||
#[repr(simd)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct $name($($elty),*);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! define_impl {
|
||||
($name:ident, $boolname:ident, $elemty:ident, $nelems:expr,
|
||||
$($elname:ident),+) => {
|
||||
impl From<__m128> for $name {
|
||||
#[inline]
|
||||
fn from(v: __m128) -> $name { unsafe { transmute(v) } }
|
||||
}
|
||||
|
||||
impl From<__m128i> for $name {
|
||||
#[inline]
|
||||
fn from(v: __m128i) -> $name { unsafe { transmute(v) } }
|
||||
}
|
||||
|
||||
impl From<__m128d> for $name {
|
||||
#[inline]
|
||||
fn from(v: __m128d) -> $name { unsafe { transmute(v) } }
|
||||
}
|
||||
|
||||
impl $name {
|
||||
#[inline]
|
||||
pub fn new($($elname: $elemty),*) -> $name {
|
||||
$name($($elname),*)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn splat(value: $elemty) -> $name {
|
||||
$name($({
|
||||
#[allow(non_camel_case_types, dead_code)]
|
||||
struct $elname;
|
||||
value
|
||||
}),*)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn eq(self, other: $name) -> $boolname {
|
||||
unsafe { simd_eq(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn ne(self, other: $name) -> $boolname {
|
||||
unsafe { simd_ne(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn lt(self, other: $name) -> $boolname {
|
||||
unsafe { simd_lt(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn le(self, other: $name) -> $boolname {
|
||||
unsafe { simd_le(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn gt(self, other: $name) -> $boolname {
|
||||
unsafe { simd_gt(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn ge(self, other: $name) -> $boolname {
|
||||
unsafe { simd_ge(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn extract(self, idx: u32) -> $elemty {
|
||||
debug_assert!(idx < $nelems);
|
||||
simd_extract(self, idx)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn insert(self, idx: u32, val: $elemty) -> $name {
|
||||
debug_assert!(idx < $nelems);
|
||||
simd_insert(self, idx, val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn as_m128(self) -> __m128 { unsafe { transmute(self) } }
|
||||
#[inline]
|
||||
pub fn as_m128d(self) -> __m128d { unsafe { transmute(self) } }
|
||||
#[inline]
|
||||
pub fn as_m128i(self) -> __m128i { unsafe { transmute(self) } }
|
||||
#[inline]
|
||||
pub fn as_f32x4(self) -> f32x4 { unsafe { transmute(self) } }
|
||||
#[inline]
|
||||
pub fn as_f64x2(self) -> f64x2 { unsafe { transmute(self) } }
|
||||
#[inline]
|
||||
pub fn as_u8x16(self) -> u8x16 { unsafe { transmute(self) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
define_ty! { __m128, f32, f32, f32, f32 }
|
||||
define_ty! { __m128d, f64, f64 }
|
||||
define_ty! { __m128i, u64, u64 }
|
||||
|
||||
define_ty_internal! { boolu64x2, u64, u64 }
|
||||
define_ty_internal! { boolu32x4, u32, u32, u32, u32 }
|
||||
define_ty_internal! { boolu16x8, u16, u16, u16, u16, u16, u16, u16, u16 }
|
||||
define_ty_internal! {
|
||||
boolu8x16, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8
|
||||
}
|
||||
|
||||
define_ty_internal! { f64x2, f64, f64 }
|
||||
define_impl! { f64x2, boolu64x2, f64, 2, x0, x1 }
|
||||
|
||||
define_ty_internal! { f32x4, f32, f32, f32, f32 }
|
||||
define_impl! { f32x4, boolu32x4, f32, 2, x0, x1, x2, x3 }
|
||||
|
||||
define_ty_internal! { u64x2, u64, u64 }
|
||||
define_impl! { u64x2, boolu64x2, u64, 2, x0, x1 }
|
||||
|
||||
define_ty_internal! { u32x4, u32, u32, u32, u32 }
|
||||
define_impl! { u32x4, boolu32x4, u32, 4, x0, x1, x2, x3 }
|
||||
|
||||
define_ty_internal! { i32x4, i32, i32, i32, i32 }
|
||||
define_impl! { i32x4, boolu32x4, i32, 4, x0, x1, x2, x3 }
|
||||
|
||||
define_ty_internal! { u16x8, u16, u16, u16, u16, u16, u16, u16, u16 }
|
||||
define_impl! { u16x8, boolu16x8, u16, 8, x0, x1, x2, x3, x4, x5, x6, x7 }
|
||||
|
||||
define_ty_internal! { i16x8, i16, i16, i16, i16, i16, i16, i16, i16 }
|
||||
define_impl! { i16x8, boolu16x8, i16, 8, x0, x1, x2, x3, x4, x5, x6, x7 }
|
||||
|
||||
define_ty_internal! {
|
||||
u8x16, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8
|
||||
}
|
||||
define_impl! {
|
||||
u8x16, boolu8x16, u8, 16,
|
||||
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
|
||||
}
|
||||
|
||||
define_ty_internal! {
|
||||
i8x16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8
|
||||
}
|
||||
define_impl! {
|
||||
i8x16, boolu8x16, i8, 16,
|
||||
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
|
||||
}
|
||||
105
library/stdarch/src/v64.rs
Normal file
105
library/stdarch/src/v64.rs
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
use std::mem::transmute;
|
||||
|
||||
use simd::*;
|
||||
|
||||
macro_rules! define_ty {
|
||||
($name:ident, $($elty:ident),+) => {
|
||||
#[repr(simd)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct $name($($elty),*);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! define_ty_internal {
|
||||
($name:ident, $($elty:ident),+) => {
|
||||
#[repr(simd)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct $name($($elty),*);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! define_impl {
|
||||
($name:ident, $boolname:ident, $elemty:ident, $nelems:expr,
|
||||
$($elname:ident),+) => {
|
||||
impl From<__m64> for $name {
|
||||
#[inline]
|
||||
fn from(v: __m64) -> $name { unsafe { transmute(v) } }
|
||||
}
|
||||
|
||||
impl $name {
|
||||
#[inline]
|
||||
pub fn new($($elname: $elemty),*) -> $name {
|
||||
$name($($elname),*)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn splat(value: $elemty) -> $name {
|
||||
$name($({
|
||||
#[allow(non_camel_case_types, dead_code)]
|
||||
struct $elname;
|
||||
value
|
||||
}),*)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn eq(self, other: $name) -> $boolname {
|
||||
unsafe { simd_eq(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn ne(self, other: $name) -> $boolname {
|
||||
unsafe { simd_ne(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn lt(self, other: $name) -> $boolname {
|
||||
unsafe { simd_lt(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn le(self, other: $name) -> $boolname {
|
||||
unsafe { simd_le(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn gt(self, other: $name) -> $boolname {
|
||||
unsafe { simd_gt(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn ge(self, other: $name) -> $boolname {
|
||||
unsafe { simd_ge(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn extract(self, idx: u32) -> $elemty {
|
||||
debug_assert!(idx < $nelems);
|
||||
simd_extract(self, idx)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn insert(self, idx: u32, val: $elemty) -> $name {
|
||||
debug_assert!(idx < $nelems);
|
||||
simd_insert(self, idx, val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn as_m64(self) -> __m64 { unsafe { transmute(self) } }
|
||||
#[inline]
|
||||
pub fn as_u64(self) -> u64 { unsafe { transmute(self) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
define_ty! { __m64, u64 }
|
||||
|
||||
define_ty_internal! { boolu64x1, u64 }
|
||||
define_ty_internal! { boolu32x2, u32, u32 }
|
||||
|
||||
define_ty_internal! { u64x1, u64 }
|
||||
define_impl! { u64x1, boolu64x1, u64, 1, x0 }
|
||||
|
||||
define_ty_internal! { u32x2, u32, u32 }
|
||||
define_impl! { u32x2, boolu32x2, u32, 2, x0, x1 }
|
||||
|
|
@ -1,29 +1,5 @@
|
|||
pub use self::sse::*;
|
||||
pub use self::sse2::*;
|
||||
|
||||
mod sse;
|
||||
mod sse2;
|
||||
|
||||
#[repr(simd)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct __m128(f32, f32, f32, f32);
|
||||
|
||||
#[repr(simd)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct __m128d(f64, f64);
|
||||
|
||||
#[repr(simd)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct __m128i(u64, u64);
|
||||
|
||||
#[repr(simd)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct f64x2(f64, f64);
|
||||
|
||||
#[repr(simd)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
struct u8x16(u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8);
|
||||
|
|
|
|||
|
|
@ -1,26 +1,568 @@
|
|||
use std::mem::transmute;
|
||||
use std::os::raw::c_void;
|
||||
|
||||
use super::{__m128d, __m128i, f64x2, u8x16};
|
||||
use {simd_add, simd_extract, simd_insert};
|
||||
use simd::*;
|
||||
use v128::*;
|
||||
use v64::*;
|
||||
|
||||
/// Provide a hint to the processor that the code sequence is a spin-wait loop.
|
||||
///
|
||||
/// This can help improve the performance and power consumption of spin-wait
|
||||
/// loops.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_pause() {
|
||||
pause()
|
||||
}
|
||||
|
||||
/// Invalidate and flush the cache line that contains `p` from all levels of
|
||||
/// the cache hierarchy.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_clflush(p: *mut c_void) {
|
||||
clflush(p)
|
||||
}
|
||||
|
||||
/// Perform a serializing operation on all load-from-memory instructions
|
||||
/// that were issued prior to this instruction.
|
||||
///
|
||||
/// Guarantees that every load instruction that precedes, in program order, is
|
||||
/// globally visible before any load instruction which follows the fence in
|
||||
/// program order.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_lfence() {
|
||||
lfence()
|
||||
}
|
||||
|
||||
/// Perform a serializing operation on all load-from-memory and store-to-memory
|
||||
/// instructions that were issued prior to this instruction.
|
||||
///
|
||||
/// Guarantees that every memory access that precedes, in program order, the
|
||||
/// memory fence instruction is globally visible before any memory instruction
|
||||
/// which follows the fence in program order.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_mfence() {
|
||||
mfence()
|
||||
}
|
||||
|
||||
/// Add packed 8-bit integers in "a" and "b", and return the results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(simd_add::<u8x16>(transmute(a), transmute(b)))
|
||||
simd_add(u8x16::from(a), u8x16::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Add packed 16-bit integers in "a" and "b", and return the results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
simd_add(u16x8::from(a), u16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Add packed 32-bit integers in "a" and "b", and return the results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
simd_add(u32x4::from(a), u32x4::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Add 64-bit integers "a" and "b", and return the results.
|
||||
#[inline]
|
||||
unsafe fn _mm_add_si64(_a: __m64, _b: __m64) -> __m64 {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Add packed 64-bit integers in "a" and "b", and return the results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
simd_add(u64x2::from(a), u64x2::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Add packed 8-bit integers in "a" and "b" using saturation, and return the
|
||||
/// results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
paddsb(i8x16::from(a), i8x16::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Add packed 16-bit integers in "a" and "b" using saturation, and return the
|
||||
/// results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
paddsw(i16x8::from(a), i16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Add packed unsigned 8-bit integers in "a" and "b" using saturation, and
|
||||
/// return the results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
|
||||
paddsub(u8x16::from(a), u8x16::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Add packed unsigned 16-bit integers in "a" and "b" using saturation, and
|
||||
/// return the results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
|
||||
paddsuw(u16x8::from(a), u16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Average packed unsigned 8-bit integers in "a" and "b", and return the
|
||||
/// results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
|
||||
pavgb(u8x16::from(a), u8x16::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Average packed unsigned 16-bit integers in "a" and "b", and return the
|
||||
/// results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
|
||||
pavgw(u16x8::from(a), u16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Multiply packed signed 16-bit integers in "a" and "b", producing
|
||||
/// intermediate signed 32-bit integers.
|
||||
///
|
||||
/// Horizontally add adjacent pairs of intermediate 32-bit integers, and pack
|
||||
/// the results in "dst".
|
||||
#[inline]
|
||||
pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
pmaddwd(i16x8::from(a), i16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Compare packed 16-bit integers in `a` and `b`, and return the packed
|
||||
/// maximum values.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
pmaxsw(i16x8::from(a), i16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 8-bit integers in `a` and `b`, and return the
|
||||
/// packed maximum values.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
|
||||
pmaxub(u8x16::from(a), u8x16::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Compare packed 16-bit integers in `a` and `b`, and return the packed
|
||||
/// minimum values.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
pminsw(i16x8::from(a), i16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 8-bit integers in `a` and `b`, and return the
|
||||
/// packed minimum values.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
|
||||
pminub(u8x16::from(a), u8x16::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Multiply the packed 16-bit integers in `a` and `b`.
|
||||
///
|
||||
/// The multiplication produces intermediate 32-bit integers, and returns the
|
||||
/// high 16 bits of the intermediate integers.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
pmulhw(i16x8::from(a), i16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Multiply the packed unsigned 16-bit integers in `a` and `b`.
|
||||
///
|
||||
/// The multiplication produces intermediate 32-bit integers, and returns the
|
||||
/// high 16 bits of the intermediate integers.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
|
||||
pmulhuw(u16x8::from(a), u16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Multiply the packed 16-bit integers in `a` and `b`.
|
||||
///
|
||||
/// The multiplication produces intermediate 32-bit integers, and returns the
|
||||
/// low 16 bits of the intermediate integers.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
simd_mul(i16x8::from(a), i16x8::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Multiply the low unsigned 32-bit integers from `a` and `b`.
|
||||
///
|
||||
/// Return the unsigned 64-bit result.
|
||||
#[inline]
|
||||
unsafe fn _mm_mul_su32(_a: __m64, _b: __m64) -> __m64 {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Multiply the low unsigned 32-bit integers from each packed 64-bit element
|
||||
/// in `a` and `b`.
|
||||
///
|
||||
/// Return the unsigned 64-bit results.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
pmuludq(u32x4::from(a), u32x4::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
/// Sum the absolute differences of packed unsigned 8-bit integers.
|
||||
///
|
||||
/// Compute the absolute differences of packed unsigned 8-bit integers in `a`
|
||||
/// and `b`, then horizontally sum each consecutive 8 differences to produce
|
||||
/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
|
||||
/// the low 16 bits of 64-bit elements returned.
|
||||
#[inline]
|
||||
pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
|
||||
psadbw(u8x16::from(a), u8x16::from(b)).as_m128i()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
|
||||
let alow = simd_extract::<f64x2, f64>(transmute(a), 0);
|
||||
let blow = simd_extract::<f64x2, f64>(transmute(b), 0);
|
||||
transmute(simd_insert::<f64x2, f64>(transmute(a), 0, alow + blow))
|
||||
let (a, b) = (f64x2::from(a), f64x2::from(b));
|
||||
a.insert(0, a.extract(0) + b.extract(0)).as_m128d()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
|
||||
transmute(simd_add::<f64x2>(transmute(a), transmute(b)))
|
||||
simd_add(f64x2::from(a), f64x2::from(b)).as_m128d()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
|
||||
*(mem_addr as *const __m128d)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
|
||||
*(mem_addr as *mut __m128d) = a;
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern {
|
||||
#[link_name = "llvm.x86.sse2.pause"]
|
||||
pub fn pause();
|
||||
#[link_name = "llvm.x86.sse2.clflush"]
|
||||
pub fn clflush(p: *mut c_void);
|
||||
#[link_name = "llvm.x86.sse2.lfence"]
|
||||
pub fn lfence();
|
||||
#[link_name = "llvm.x86.sse2.mfence"]
|
||||
pub fn mfence();
|
||||
#[link_name = "llvm.x86.sse2.padds.b"]
|
||||
pub fn paddsb(a: i8x16, b: i8x16) -> i8x16;
|
||||
#[link_name = "llvm.x86.sse2.padds.w"]
|
||||
pub fn paddsw(a: i16x8, b: i16x8) -> i16x8;
|
||||
#[link_name = "llvm.x86.sse2.paddus.b"]
|
||||
pub fn paddsub(a: u8x16, b: u8x16) -> u8x16;
|
||||
#[link_name = "llvm.x86.sse2.paddus.w"]
|
||||
pub fn paddsuw(a: u16x8, b: u16x8) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse2.pavg.b"]
|
||||
pub fn pavgb(a: u8x16, b: u8x16) -> u8x16;
|
||||
#[link_name = "llvm.x86.sse2.pavg.w"]
|
||||
pub fn pavgw(a: u16x8, b: u16x8) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse2.pmadd.wd"]
|
||||
pub fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
|
||||
#[link_name = "llvm.x86.sse2.pmaxs.w"]
|
||||
pub fn pmaxsw(a: i16x8, b: i16x8) -> i16x8;
|
||||
#[link_name = "llvm.x86.sse2.pmaxu.b"]
|
||||
pub fn pmaxub(a: u8x16, b: u8x16) -> u8x16;
|
||||
#[link_name = "llvm.x86.sse2.pmins.w"]
|
||||
pub fn pminsw(a: i16x8, b: i16x8) -> i16x8;
|
||||
#[link_name = "llvm.x86.sse2.pminu.b"]
|
||||
pub fn pminub(a: u8x16, b: u8x16) -> u8x16;
|
||||
#[link_name = "llvm.x86.sse2.pmulh.w"]
|
||||
pub fn pmulhw(a: i16x8, b: i16x8) -> i16x8;
|
||||
#[link_name = "llvm.x86.sse2.pmulhu.w"]
|
||||
pub fn pmulhuw(a: u16x8, b: u16x8) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse2.pmulu.dq"]
|
||||
pub fn pmuludq(a: u32x4, b: u32x4) -> u64x2;
|
||||
#[link_name = "llvm.x86.sse2.psad.bw"]
|
||||
pub fn psadbw(a: u8x16, b: u8x16) -> u64x2;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::os::raw::c_void;
|
||||
|
||||
use v128::*;
|
||||
use v64::*;
|
||||
use x86::sse2 as sse2;
|
||||
|
||||
#[test]
|
||||
fn _mm_pause() {
|
||||
unsafe { sse2::_mm_pause() }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_clflush() {
|
||||
let x = 0;
|
||||
unsafe { sse2::_mm_clflush(&x as *const _ as *mut c_void) }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_lfence() {
|
||||
unsafe { sse2::_mm_lfence() }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_mfence() {
|
||||
unsafe { sse2::_mm_mfence() }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_add_epi8() {
|
||||
let a = u8x16::new(
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = u8x16::new(
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
||||
let r = unsafe { sse2::_mm_add_epi8(a.as_m128i(), b.as_m128i()) };
|
||||
let e = u8x16::new(
|
||||
16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46);
|
||||
assert_eq!(u8x16::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epi8_overflow() {
|
||||
let a = u8x16::splat(0xFF);
|
||||
let b = u8x16::splat(1);
|
||||
let r = unsafe { sse2::_mm_adds_epi8(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(u8x16::from(r), u8x16::splat(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_add_epi16() {
|
||||
let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r = unsafe { sse2::_mm_add_epi16(a.as_m128i(), b.as_m128i()) };
|
||||
let e = u16x8::new(8, 10, 12, 14, 16, 18, 20, 22);
|
||||
assert_eq!(u16x8::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_add_epi32() {
|
||||
let a = u32x4::new(0, 1, 2, 3);
|
||||
let b = u32x4::new(4, 5, 6, 7);
|
||||
let r = unsafe { sse2::_mm_add_epi32(a.as_m128i(), b.as_m128i()) };
|
||||
let e = u32x4::new(4, 6, 8, 10);
|
||||
assert_eq!(u32x4::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn _mm_add_si64() {
|
||||
let (a, b) = (u64x1::new(1), u64x1::new(2));
|
||||
let r = unsafe { sse2::_mm_add_si64(a.as_m64(), b.as_m64()) };
|
||||
let e = u64x1::new(3);
|
||||
assert_eq!(u64x1::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_add_epi64() {
|
||||
let a = u64x2::new(0, 1);
|
||||
let b = u64x2::new(2, 3);
|
||||
let r = unsafe { sse2::_mm_add_epi64(a.as_m128i(), b.as_m128i()) };
|
||||
let e = u64x2::new(2, 4);
|
||||
assert_eq!(u64x2::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epi8() {
|
||||
let a = i8x16::new(
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = i8x16::new(
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
||||
let r = unsafe { sse2::_mm_adds_epi8(a.as_m128i(), b.as_m128i()) };
|
||||
let e = i8x16::new(
|
||||
16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46);
|
||||
assert_eq!(i8x16::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epi8_saturate_positive() {
|
||||
let a = i8x16::splat(0x7F);
|
||||
let b = i8x16::splat(1);
|
||||
let r = unsafe { sse2::_mm_adds_epi8(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(i8x16::from(r), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epi8_saturate_negative() {
|
||||
let a = i8x16::splat(-0x80);
|
||||
let b = i8x16::splat(-1);
|
||||
let r = unsafe { sse2::_mm_adds_epi8(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(i8x16::from(r), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epi16() {
|
||||
let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r = unsafe { sse2::_mm_adds_epi16(a.as_m128i(), b.as_m128i()) };
|
||||
let e = i16x8::new(8, 10, 12, 14, 16, 18, 20, 22);
|
||||
assert_eq!(i16x8::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epi16_saturate_positive() {
|
||||
let a = i16x8::splat(0x7FFF);
|
||||
let b = i16x8::splat(1);
|
||||
let r = unsafe { sse2::_mm_adds_epi16(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(i16x8::from(r), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epi16_saturate_negative() {
|
||||
let a = i16x8::splat(-0x8000);
|
||||
let b = i16x8::splat(-1);
|
||||
let r = unsafe { sse2::_mm_adds_epi16(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(i16x8::from(r), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epu8() {
|
||||
let a = u8x16::new(
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = u8x16::new(
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
||||
let r = unsafe { sse2::_mm_adds_epu8(a.as_m128i(), b.as_m128i()) };
|
||||
let e = u8x16::new(
|
||||
16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46);
|
||||
assert_eq!(u8x16::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epu8_saturate() {
|
||||
let a = u8x16::splat(0xFF);
|
||||
let b = u8x16::splat(1);
|
||||
let r = unsafe { sse2::_mm_adds_epu8(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(u8x16::from(r), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epu16() {
|
||||
let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r = unsafe { sse2::_mm_adds_epu16(a.as_m128i(), b.as_m128i()) };
|
||||
let e = u16x8::new(8, 10, 12, 14, 16, 18, 20, 22);
|
||||
assert_eq!(u16x8::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_adds_epu16_saturate() {
|
||||
let a = u16x8::splat(0xFFFF);
|
||||
let b = u16x8::splat(1);
|
||||
let r = unsafe { sse2::_mm_adds_epu16(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(u16x8::from(r), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_avg_epu8() {
|
||||
let (a, b) = (u8x16::splat(3), u8x16::splat(9));
|
||||
let r = unsafe { sse2::_mm_avg_epu8(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(u8x16::from(r), u8x16::splat(6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_avg_epu16() {
|
||||
let (a, b) = (u16x8::splat(3), u16x8::splat(9));
|
||||
let r = unsafe { sse2::_mm_avg_epu8(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(u16x8::from(r), u16x8::splat(6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_madd_epi16() {
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = i16x8::new(9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = unsafe { sse2::_mm_madd_epi16(a.as_m128i(), b.as_m128i()) };
|
||||
let e = i32x4::new(29, 81, 149, 233);
|
||||
assert_eq!(i32x4::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_max_epi16() {
|
||||
let a = i16x8::splat(1);
|
||||
let b = i16x8::splat(-1);
|
||||
let r = unsafe { sse2::_mm_max_epi16(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(i16x8::from(r), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_max_epu8() {
|
||||
let a = u8x16::splat(1);
|
||||
let b = u8x16::splat(255);
|
||||
let r = unsafe { sse2::_mm_max_epu8(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(u8x16::from(r), b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_min_epi16() {
|
||||
let a = i16x8::splat(1);
|
||||
let b = i16x8::splat(-1);
|
||||
let r = unsafe { sse2::_mm_min_epi16(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(i16x8::from(r), b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_min_epu8() {
|
||||
let a = u8x16::splat(1);
|
||||
let b = u8x16::splat(255);
|
||||
let r = unsafe { sse2::_mm_min_epu8(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(u8x16::from(r), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_mulhi_epi16() {
|
||||
let (a, b) = (i16x8::splat(1000), i16x8::splat(-1001));
|
||||
let r = unsafe { sse2::_mm_mulhi_epi16(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(i16x8::from(r), i16x8::splat(-16));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_mulhi_epu16() {
|
||||
let (a, b) = (u16x8::splat(1000), u16x8::splat(1001));
|
||||
let r = unsafe { sse2::_mm_mulhi_epu16(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(u16x8::from(r), u16x8::splat(15));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_mullo_epi16() {
|
||||
let (a, b) = (i16x8::splat(1000), i16x8::splat(-1001));
|
||||
let r = unsafe { sse2::_mm_mullo_epi16(a.as_m128i(), b.as_m128i()) };
|
||||
assert_eq!(i16x8::from(r), i16x8::splat(-17960));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn _mm_mul_su32() {
|
||||
let a = u32x2::new(1_000_000_000, 3);
|
||||
let b = u32x2::new(1_000_000_000, 4);
|
||||
let r = unsafe { sse2::_mm_mul_su32(a.as_m64(), b.as_m64()) };
|
||||
let e = u64x1::new(1_000_000_000 * 1_000_000_000);
|
||||
assert_eq!(u64x1::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_mul_epu32() {
|
||||
let a = u64x2::new(1_000_000_000, 1 << 34);
|
||||
let b = u64x2::new(1_000_000_000, 1 << 35);
|
||||
let r = unsafe { sse2::_mm_mul_epu32(a.as_m128i(), b.as_m128i()) };
|
||||
let e = u64x2::new(1_000_000_000 * 1_000_000_000, 0);
|
||||
assert_eq!(u64x2::from(r), e);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_sad_epu8() {
|
||||
let a = u8x16::new(
|
||||
255, 254, 253, 252, 1, 2, 3, 4,
|
||||
155, 154, 153, 152, 1, 2, 3, 4);
|
||||
let b = u8x16::new(
|
||||
0, 0, 0, 0, 2, 1, 2, 1,
|
||||
1, 1, 1, 1, 1, 2, 1, 2);
|
||||
let r = unsafe { sse2::_mm_sad_epu8(a.as_m128i(), b.as_m128i()) };
|
||||
let e = u64x2::new(1020, 614);
|
||||
assert_eq!(u64x2::from(r), e);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue