From 50cf00372df822888acea6b8bb84f037eaa5ae73 Mon Sep 17 00:00:00 2001 From: Josef Ippisch Date: Sat, 13 Jan 2018 00:10:51 +0100 Subject: [PATCH] MMX subtraction instructions (#280) * Implement `_m_psubb` * Implement `_m_psubw` * Implement `_m_psubd` * Implement `_m_psubsb` * Implement `_m_psubsw` * Implement `_m_psubusb` * Implement `_m_psubusw` * Have the subtraction intrinsic naming consistent with the addition ones E.g. use `_mm_sub_pi8` instead of `_m_psubb` * Implement all subtraction aliases for the `_mm_*` variants - `_m_psubb` for `_mm_sub_pi8` - `_m_psubw` for `_mm_sub_pi16` - `_m_psubd` for `_mm_sub_pi32` - `_m_psubsb` for `_mm_subs_pi8` - `_m_psubsw` for `_mm_subs_pi16` - `_m_psubusb` for `_mm_subs_pu8` - `_m_psubusw` for `_mm_subs_pu16` --- library/stdarch/coresimd/src/x86/i686/mmx.rs | 220 ++++++++++++++++++- 1 file changed, 216 insertions(+), 4 deletions(-) diff --git a/library/stdarch/coresimd/src/x86/i686/mmx.rs b/library/stdarch/coresimd/src/x86/i686/mmx.rs index 07f1008a9cc5..2659cf3dc9c2 100644 --- a/library/stdarch/coresimd/src/x86/i686/mmx.rs +++ b/library/stdarch/coresimd/src/x86/i686/mmx.rs @@ -80,6 +80,126 @@ pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 { paddusw(a, b) } +/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubb))] +pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 { + psubb(a, b) +} + +/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubb))] +pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 { + _mm_sub_pi8(a, b) +} + +/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubw))] +pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 { + psubw(a, b) +} + +/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubw))] +pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 { + _mm_sub_pi16(a, b) +} + +/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubd))] +pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 { + psubd(a, b) +} + +/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubd))] +pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 { + _mm_sub_pi32(a, b) +} + +/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` +/// using saturation. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubsb))] +pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 { + psubsb(a, b) +} + +/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` +/// using saturation. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubsb))] +pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 { + _mm_subs_pi8(a, b) +} + +/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` +/// using saturation. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubsw))] +pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 { + psubsw(a, b) +} + +/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` +/// using saturation. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubsw))] +pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 { + _mm_subs_pi16(a, b) +} + +/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit +/// integers in `a` using saturation. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubusb))] +pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 { + psubusb(a, b) +} + +/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit +/// integers in `a` using saturation. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubusb))] +pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 { + _mm_subs_pu8(a, b) +} + +/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned +/// 16-bit integers in `a` using saturation. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubusw))] +pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 { + psubusw(a, b) +} + +/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned +/// 16-bit integers in `a` using saturation. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(psubusw))] +pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 { + _mm_subs_pu16(a, b) +} + /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation. /// @@ -202,7 +322,9 @@ pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 { /// Set packed 8-bit integers in dst with the supplied values. #[inline(always)] #[target_feature = "+mmx"] -pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 { +pub unsafe fn _mm_set_pi8( + e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8 +) -> __m64 { _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7) } @@ -227,14 +349,16 @@ pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 { _mm_setr_pi8(a, a, a, a, a, a, a, a) } -/// Set packed 16-bit integers in dst with the supplied values in reverse order. +/// Set packed 16-bit integers in dst with the supplied values in reverse +/// order. #[inline(always)] #[target_feature = "+mmx"] pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 { mem::transmute(i16x4::new(e0, e1, e2, e3)) } -/// Set packed 32-bit integers in dst with the supplied values in reverse order. +/// Set packed 32-bit integers in dst with the supplied values in reverse +/// order. #[inline(always)] #[target_feature = "+mmx"] pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 { @@ -244,7 +368,9 @@ pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 { /// Set packed 8-bit integers in dst with the supplied values in reverse order. #[inline(always)] #[target_feature = "+mmx"] -pub unsafe fn _mm_setr_pi8(e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8) -> __m64 { +pub unsafe fn _mm_setr_pi8( + e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8 +) -> __m64 { mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } @@ -264,6 +390,20 @@ extern "C" { fn paddusb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.paddus.w"] fn paddusw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.psub.b"] + fn psubb(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.psub.w"] + fn psubw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.psub.d"] + fn psubd(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.psubs.b"] + fn psubsb(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.psubs.w"] + fn psubsw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.psubus.b"] + fn psubusb(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.psubus.w"] + fn psubusw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.packsswb"] fn packsswb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.packssdw"] @@ -369,6 +509,78 @@ mod tests { assert_eq!(r, e); } + #[simd_test = "mmx"] + unsafe fn _mm_sub_pi8() { + let a = i8x8::new(0, 0, 1, 1, -1, -1, 0, 0); + let b = i8x8::new(-1, 1, -2, 2, 100, -100, -127, 127); + let e = i8x8::new(1, -1, 3, -1, -101, 99, 127, -127); + assert_eq!(e, i8x8::from(mmx::_mm_sub_pi8(a.into(), b.into()))); + assert_eq!(e, i8x8::from(mmx::_m_psubb(a.into(), b.into()))); + } + + #[simd_test = "mmx"] + unsafe fn _mm_sub_pi16() { + let a = i16x4::new(-20000, -20000, 20000, 30000); + let b = i16x4::new(-10000, 10000, -10000, 30000); + let e = i16x4::new(-10000, -30000, 30000, 0); + assert_eq!(e, i16x4::from(mmx::_mm_sub_pi16(a.into(), b.into()))); + assert_eq!(e, i16x4::from(mmx::_m_psubw(a.into(), b.into()))); + } + + #[simd_test = "mmx"] + unsafe fn _mm_sub_pi32() { + let a = i32x2::new(500_000, -500_000); + let b = i32x2::new(500_000, 500_000); + let e = i32x2::new(0, -1_000_000); + assert_eq!(e, i32x2::from(mmx::_mm_sub_pi32(a.into(), b.into()))); + assert_eq!(e, i32x2::from(mmx::_m_psubd(a.into(), b.into()))); + } + + #[simd_test = "mmx"] + unsafe fn _mm_subs_pi8() { + let a = i8x8::new(-100, 100, 0, 0, 0, 0, -5, 5); + let b = i8x8::new(100, -100, i8::min_value(), 127, -1, 1, 3, -3); + let e = i8x8::new( + i8::min_value(), + i8::max_value(), + i8::max_value(), + -127, + 1, + -1, + -8, + 8, + ); + assert_eq!(e, i8x8::from(mmx::_mm_subs_pi8(a.into(), b.into()))); + assert_eq!(e, i8x8::from(mmx::_m_psubsb(a.into(), b.into()))); + } + + #[simd_test = "mmx"] + unsafe fn _mm_subs_pi16() { + let a = i16x4::new(-20000, 20000, 0, 0); + let b = i16x4::new(20000, -20000, -1, 1); + let e = i16x4::new(i16::min_value(), i16::max_value(), 1, -1); + assert_eq!(e, i16x4::from(mmx::_mm_subs_pi16(a.into(), b.into()))); + assert_eq!(e, i16x4::from(mmx::_m_psubsw(a.into(), b.into()))); + } + + #[simd_test = "mmx"] + unsafe fn _mm_subs_pu8() { + let a = u8x8::new(50, 10, 20, 30, 40, 60, 70, 80); + let b = u8x8::new(60, 20, 30, 40, 30, 20, 10, 0); + let e = u8x8::new(0, 0, 0, 0, 10, 40, 60, 80); + assert_eq!(e, u8x8::from(mmx::_mm_subs_pu8(a.into(), b.into()))); + assert_eq!(e, u8x8::from(mmx::_m_psubusb(a.into(), b.into()))); + } + + #[simd_test = "mmx"] + unsafe fn _mm_subs_pu16() { + let a = u16x4::new(10000, 200, 0, 44444); + let b = u16x4::new(20000, 300, 1, 11111); + let e = u16x4::new(0, 0, 0, 33333); + assert_eq!(e, u16x4::from(mmx::_mm_subs_pu16(a.into(), b.into()))); + assert_eq!(e, u16x4::from(mmx::_m_psubusw(a.into(), b.into()))); + } + #[simd_test = "mmx"] unsafe fn _mm_packs_pi16() { let a = i16x4::new(-1, 2, -3, 4);