MMX subtraction instructions (#280)
* Implement `_m_psubb` * Implement `_m_psubw` * Implement `_m_psubd` * Implement `_m_psubsb` * Implement `_m_psubsw` * Implement `_m_psubusb` * Implement `_m_psubusw` * Have the subtraction intrinsic naming consistent with the addition ones E.g. use `_mm_sub_pi8` instead of `_m_psubb` * Implement all subtraction aliases for the `_mm_*` variants - `_m_psubb` for `_mm_sub_pi8` - `_m_psubw` for `_mm_sub_pi16` - `_m_psubd` for `_mm_sub_pi32` - `_m_psubsb` for `_mm_subs_pi8` - `_m_psubsw` for `_mm_subs_pi16` - `_m_psubusb` for `_mm_subs_pu8` - `_m_psubusw` for `_mm_subs_pu16`
This commit is contained in:
parent
e77ebf194a
commit
50cf00372d
1 changed files with 216 additions and 4 deletions
|
|
@ -80,6 +80,126 @@ pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
|
|||
paddusw(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubb))]
|
||||
pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
psubb(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubb))]
|
||||
pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_sub_pi8(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubw))]
|
||||
pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
psubw(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubw))]
|
||||
pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_sub_pi16(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubd))]
|
||||
pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
psubd(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubd))]
|
||||
pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_sub_pi32(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
|
||||
/// using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubsb))]
|
||||
pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
psubsb(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
|
||||
/// using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubsb))]
|
||||
pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_subs_pi8(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
|
||||
/// using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubsw))]
|
||||
pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
psubsw(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
|
||||
/// using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubsw))]
|
||||
pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_subs_pi16(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
|
||||
/// integers in `a` using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubusb))]
|
||||
pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
psubusb(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
|
||||
/// integers in `a` using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubusb))]
|
||||
pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_subs_pu8(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
|
||||
/// 16-bit integers in `a` using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubusw))]
|
||||
pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 {
|
||||
psubusw(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
|
||||
/// 16-bit integers in `a` using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(psubusw))]
|
||||
pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_subs_pu16(a, b)
|
||||
}
|
||||
|
||||
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
|
||||
/// using signed saturation.
|
||||
///
|
||||
|
|
@ -202,7 +322,9 @@ pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
|
|||
/// Set packed 8-bit integers in dst with the supplied values.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 {
|
||||
pub unsafe fn _mm_set_pi8(
|
||||
e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8
|
||||
) -> __m64 {
|
||||
_mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
|
||||
}
|
||||
|
||||
|
|
@ -227,14 +349,16 @@ pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 {
|
|||
_mm_setr_pi8(a, a, a, a, a, a, a, a)
|
||||
}
|
||||
|
||||
/// Set packed 16-bit integers in dst with the supplied values in reverse order.
|
||||
/// Set packed 16-bit integers in dst with the supplied values in reverse
|
||||
/// order.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 {
|
||||
mem::transmute(i16x4::new(e0, e1, e2, e3))
|
||||
}
|
||||
|
||||
/// Set packed 32-bit integers in dst with the supplied values in reverse order.
|
||||
/// Set packed 32-bit integers in dst with the supplied values in reverse
|
||||
/// order.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
|
||||
|
|
@ -244,7 +368,9 @@ pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
|
|||
/// Set packed 8-bit integers in dst with the supplied values in reverse order.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_setr_pi8(e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8) -> __m64 {
|
||||
pub unsafe fn _mm_setr_pi8(
|
||||
e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8
|
||||
) -> __m64 {
|
||||
mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
|
||||
}
|
||||
|
||||
|
|
@ -264,6 +390,20 @@ extern "C" {
|
|||
fn paddusb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.paddus.w"]
|
||||
fn paddusw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psub.b"]
|
||||
fn psubb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psub.w"]
|
||||
fn psubw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psub.d"]
|
||||
fn psubd(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psubs.b"]
|
||||
fn psubsb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psubs.w"]
|
||||
fn psubsw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psubus.b"]
|
||||
fn psubusb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psubus.w"]
|
||||
fn psubusw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.packsswb"]
|
||||
fn packsswb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.packssdw"]
|
||||
|
|
@ -369,6 +509,78 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_sub_pi8() {
|
||||
let a = i8x8::new(0, 0, 1, 1, -1, -1, 0, 0);
|
||||
let b = i8x8::new(-1, 1, -2, 2, 100, -100, -127, 127);
|
||||
let e = i8x8::new(1, -1, 3, -1, -101, 99, 127, -127);
|
||||
assert_eq!(e, i8x8::from(mmx::_mm_sub_pi8(a.into(), b.into())));
|
||||
assert_eq!(e, i8x8::from(mmx::_m_psubb(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_sub_pi16() {
|
||||
let a = i16x4::new(-20000, -20000, 20000, 30000);
|
||||
let b = i16x4::new(-10000, 10000, -10000, 30000);
|
||||
let e = i16x4::new(-10000, -30000, 30000, 0);
|
||||
assert_eq!(e, i16x4::from(mmx::_mm_sub_pi16(a.into(), b.into())));
|
||||
assert_eq!(e, i16x4::from(mmx::_m_psubw(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_sub_pi32() {
|
||||
let a = i32x2::new(500_000, -500_000);
|
||||
let b = i32x2::new(500_000, 500_000);
|
||||
let e = i32x2::new(0, -1_000_000);
|
||||
assert_eq!(e, i32x2::from(mmx::_mm_sub_pi32(a.into(), b.into())));
|
||||
assert_eq!(e, i32x2::from(mmx::_m_psubd(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_subs_pi8() {
|
||||
let a = i8x8::new(-100, 100, 0, 0, 0, 0, -5, 5);
|
||||
let b = i8x8::new(100, -100, i8::min_value(), 127, -1, 1, 3, -3);
|
||||
let e = i8x8::new(
|
||||
i8::min_value(),
|
||||
i8::max_value(),
|
||||
i8::max_value(),
|
||||
-127,
|
||||
1,
|
||||
-1,
|
||||
-8,
|
||||
8,
|
||||
);
|
||||
assert_eq!(e, i8x8::from(mmx::_mm_subs_pi8(a.into(), b.into())));
|
||||
assert_eq!(e, i8x8::from(mmx::_m_psubsb(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_subs_pi16() {
|
||||
let a = i16x4::new(-20000, 20000, 0, 0);
|
||||
let b = i16x4::new(20000, -20000, -1, 1);
|
||||
let e = i16x4::new(i16::min_value(), i16::max_value(), 1, -1);
|
||||
assert_eq!(e, i16x4::from(mmx::_mm_subs_pi16(a.into(), b.into())));
|
||||
assert_eq!(e, i16x4::from(mmx::_m_psubsw(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_subs_pu8() {
|
||||
let a = u8x8::new(50, 10, 20, 30, 40, 60, 70, 80);
|
||||
let b = u8x8::new(60, 20, 30, 40, 30, 20, 10, 0);
|
||||
let e = u8x8::new(0, 0, 0, 0, 10, 40, 60, 80);
|
||||
assert_eq!(e, u8x8::from(mmx::_mm_subs_pu8(a.into(), b.into())));
|
||||
assert_eq!(e, u8x8::from(mmx::_m_psubusb(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_subs_pu16() {
|
||||
let a = u16x4::new(10000, 200, 0, 44444);
|
||||
let b = u16x4::new(20000, 300, 1, 11111);
|
||||
let e = u16x4::new(0, 0, 0, 33333);
|
||||
assert_eq!(e, u16x4::from(mmx::_mm_subs_pu16(a.into(), b.into())));
|
||||
assert_eq!(e, u16x4::from(mmx::_m_psubusw(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_packs_pi16() {
|
||||
let a = i16x4::new(-1, 2, -3, 4);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue