MMX subtraction instructions (#280)

* Implement `_m_psubb`

* Implement `_m_psubw`

* Implement `_m_psubd`

* Implement `_m_psubsb`

* Implement `_m_psubsw`

* Implement `_m_psubusb`

* Implement `_m_psubusw`

* Have the subtraction intrinsic naming consistent with the addition ones

E.g. use `_mm_sub_pi8` instead of `_m_psubb`

* Implement all subtraction aliases for the `_mm_*` variants

- `_m_psubb` for `_mm_sub_pi8`
- `_m_psubw` for `_mm_sub_pi16`
- `_m_psubd` for `_mm_sub_pi32`
- `_m_psubsb` for `_mm_subs_pi8`
- `_m_psubsw` for `_mm_subs_pi16`
- `_m_psubusb` for `_mm_subs_pu8`
- `_m_psubusw` for `_mm_subs_pu16`
This commit is contained in:
Josef Ippisch 2018-01-13 00:10:51 +01:00 committed by Alex Crichton
parent e77ebf194a
commit 50cf00372d

View file

@ -80,6 +80,126 @@ pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
paddusw(a, b)
}
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubb))]
pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 {
psubb(a, b)
}
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubb))]
pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 {
_mm_sub_pi8(a, b)
}
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubw))]
pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 {
psubw(a, b)
}
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubw))]
pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 {
_mm_sub_pi16(a, b)
}
/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubd))]
pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 {
psubd(a, b)
}
/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubd))]
pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 {
_mm_sub_pi32(a, b)
}
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
/// using saturation.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubsb))]
pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 {
psubsb(a, b)
}
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
/// using saturation.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubsb))]
pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 {
_mm_subs_pi8(a, b)
}
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
/// using saturation.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubsw))]
pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 {
psubsw(a, b)
}
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
/// using saturation.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubsw))]
pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 {
_mm_subs_pi16(a, b)
}
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
/// integers in `a` using saturation.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubusb))]
pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 {
psubusb(a, b)
}
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
/// integers in `a` using saturation.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubusb))]
pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 {
_mm_subs_pu8(a, b)
}
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
/// 16-bit integers in `a` using saturation.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubusw))]
pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 {
psubusw(a, b)
}
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
/// 16-bit integers in `a` using saturation.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(psubusw))]
pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 {
_mm_subs_pu16(a, b)
}
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// using signed saturation.
///
@ -202,7 +322,9 @@ pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
/// Set packed 8-bit integers in dst with the supplied values.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 {
pub unsafe fn _mm_set_pi8(
e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8
) -> __m64 {
_mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
}
@ -227,14 +349,16 @@ pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 {
_mm_setr_pi8(a, a, a, a, a, a, a, a)
}
/// Set packed 16-bit integers in dst with the supplied values in reverse order.
/// Set packed 16-bit integers in dst with the supplied values in reverse
/// order.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 {
mem::transmute(i16x4::new(e0, e1, e2, e3))
}
/// Set packed 32-bit integers in dst with the supplied values in reverse order.
/// Set packed 32-bit integers in dst with the supplied values in reverse
/// order.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
@ -244,7 +368,9 @@ pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
/// Set packed 8-bit integers in dst with the supplied values in reverse order.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_setr_pi8(e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8) -> __m64 {
pub unsafe fn _mm_setr_pi8(
e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8
) -> __m64 {
mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
}
@ -264,6 +390,20 @@ extern "C" {
fn paddusb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.paddus.w"]
fn paddusw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psub.b"]
fn psubb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psub.w"]
fn psubw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psub.d"]
fn psubd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psubs.b"]
fn psubsb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psubs.w"]
fn psubsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psubus.b"]
fn psubusb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psubus.w"]
fn psubusw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.packsswb"]
fn packsswb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.packssdw"]
@ -369,6 +509,78 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test = "mmx"]
unsafe fn _mm_sub_pi8() {
let a = i8x8::new(0, 0, 1, 1, -1, -1, 0, 0);
let b = i8x8::new(-1, 1, -2, 2, 100, -100, -127, 127);
let e = i8x8::new(1, -1, 3, -1, -101, 99, 127, -127);
assert_eq!(e, i8x8::from(mmx::_mm_sub_pi8(a.into(), b.into())));
assert_eq!(e, i8x8::from(mmx::_m_psubb(a.into(), b.into())));
}
#[simd_test = "mmx"]
unsafe fn _mm_sub_pi16() {
let a = i16x4::new(-20000, -20000, 20000, 30000);
let b = i16x4::new(-10000, 10000, -10000, 30000);
let e = i16x4::new(-10000, -30000, 30000, 0);
assert_eq!(e, i16x4::from(mmx::_mm_sub_pi16(a.into(), b.into())));
assert_eq!(e, i16x4::from(mmx::_m_psubw(a.into(), b.into())));
}
#[simd_test = "mmx"]
unsafe fn _mm_sub_pi32() {
let a = i32x2::new(500_000, -500_000);
let b = i32x2::new(500_000, 500_000);
let e = i32x2::new(0, -1_000_000);
assert_eq!(e, i32x2::from(mmx::_mm_sub_pi32(a.into(), b.into())));
assert_eq!(e, i32x2::from(mmx::_m_psubd(a.into(), b.into())));
}
#[simd_test = "mmx"]
unsafe fn _mm_subs_pi8() {
let a = i8x8::new(-100, 100, 0, 0, 0, 0, -5, 5);
let b = i8x8::new(100, -100, i8::min_value(), 127, -1, 1, 3, -3);
let e = i8x8::new(
i8::min_value(),
i8::max_value(),
i8::max_value(),
-127,
1,
-1,
-8,
8,
);
assert_eq!(e, i8x8::from(mmx::_mm_subs_pi8(a.into(), b.into())));
assert_eq!(e, i8x8::from(mmx::_m_psubsb(a.into(), b.into())));
}
#[simd_test = "mmx"]
unsafe fn _mm_subs_pi16() {
let a = i16x4::new(-20000, 20000, 0, 0);
let b = i16x4::new(20000, -20000, -1, 1);
let e = i16x4::new(i16::min_value(), i16::max_value(), 1, -1);
assert_eq!(e, i16x4::from(mmx::_mm_subs_pi16(a.into(), b.into())));
assert_eq!(e, i16x4::from(mmx::_m_psubsw(a.into(), b.into())));
}
#[simd_test = "mmx"]
unsafe fn _mm_subs_pu8() {
let a = u8x8::new(50, 10, 20, 30, 40, 60, 70, 80);
let b = u8x8::new(60, 20, 30, 40, 30, 20, 10, 0);
let e = u8x8::new(0, 0, 0, 0, 10, 40, 60, 80);
assert_eq!(e, u8x8::from(mmx::_mm_subs_pu8(a.into(), b.into())));
assert_eq!(e, u8x8::from(mmx::_m_psubusb(a.into(), b.into())));
}
#[simd_test = "mmx"]
unsafe fn _mm_subs_pu16() {
let a = u16x4::new(10000, 200, 0, 44444);
let b = u16x4::new(20000, 300, 1, 11111);
let e = u16x4::new(0, 0, 0, 33333);
assert_eq!(e, u16x4::from(mmx::_mm_subs_pu16(a.into(), b.into())));
assert_eq!(e, u16x4::from(mmx::_m_psubusw(a.into(), b.into())));
}
#[simd_test = "mmx"]
unsafe fn _mm_packs_pi16() {
let a = i16x4::new(-1, 2, -3, 4);