* SSSE3: _mm_abs_epi16, _mm_abs_epi32, _mm_hadd_epi16

* SSSE3: _mm_hadds_epi16

* SSSE3: assert_instr

* SSSE3: _mm_hadd_epi32

* SSSE3: _mm_hsub_epi16

* SSSE3: _mm_hsubs_epi16

* SSSE3: _mm_hsub_epi32

* SSSE3: _mm_maddubs_epi16

* SSSE3: _mm_mulhrs_epi16

* SSSE3: _mm_sign_epi8

* SSSE3: _mm_sign_epi32

* SSSE3: _mm_sign_epi32

* SSSE3: Fix assert_instr
This commit is contained in:
gwenn 2017-09-28 21:10:40 +02:00 committed by Alex Crichton
parent 0511ecbaf0
commit d8881bcbc9

View file

@ -12,6 +12,24 @@ pub unsafe fn _mm_abs_epi8(a: i8x16) -> u8x16 {
pabsb128(a)
}
/// Compute the absolute value of each of the packed 16-bit signed integers in `a` and
/// return the 16-bit unsigned integer
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pabsw))]
pub unsafe fn _mm_abs_epi16(a: i16x8) -> u16x8 {
pabsw128(a)
}
/// Compute the absolute value of each of the packed 32-bit signed integers in `a` and
/// return the 32-bit unsigned integer
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pabsd))]
pub unsafe fn _mm_abs_epi32(a: i32x4) -> u32x4 {
pabsd128(a)
}
/// Shuffle bytes from `a` according to the content of `b`.
///
/// The last 4 bits of each byte of `b` are used as addresses
@ -43,13 +61,164 @@ pub unsafe fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
pshufb128(a, b)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16].
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phaddw))]
pub unsafe fn _mm_hadd_epi16(a: i16x8, b: i16x8) -> i16x8 {
phaddw128(a, b)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phaddsw))]
pub unsafe fn _mm_hadds_epi16(a: i16x8, b: i16x8) -> i16x8 {
phaddsw128(a, b)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [4 x i32].
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phaddd))]
pub unsafe fn _mm_hadd_epi32(a: i32x4, b: i32x4) -> i32x4 {
phaddd128(a, b)
}
/// Horizontally subtract the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [8 x i16].
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phsubw))]
pub unsafe fn _mm_hsub_epi16(a: i16x8, b: i16x8) -> i16x8 {
phsubw128(a, b)
}
/// Horizontally subtract the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phsubsw))]
pub unsafe fn _mm_hsubs_epi16(a: i16x8, b: i16x8) -> i16x8 {
phsubsw128(a, b)
}
/// Horizontally subtract the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [4 x i32].
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phsubd))]
pub unsafe fn _mm_hsub_epi32(a: i32x4, b: i32x4) -> i32x4 {
phsubd128(a, b)
}
/// Multiply corresponding pairs of packed 8-bit unsigned integer
/// values contained in the first source operand and packed 8-bit signed
/// integer values contained in the second source operand, add pairs of
/// contiguous products with signed saturation, and writes the 16-bit sums to
/// the corresponding bits in the destination.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pmaddubsw))]
pub unsafe fn _mm_maddubs_epi16(a: u8x16, b: i8x16) -> i16x8 {
pmaddubsw128(a, b)
}
/// Multiply packed 16-bit signed integer values, truncate the 32-bit
/// product to the 18 most significant bits by right-shifting, round the
/// truncated value by adding 1, and write bits [16:1] to the destination.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pmulhrsw))]
pub unsafe fn _mm_mulhrs_epi16(a: i16x8, b: i16x8) -> i16x8 {
pmulhrsw128(a, b)
}
/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
/// integer in `b` is negative, and return the result.
/// Elements in result are zeroed out when the corresponding element in `b`
/// is zero.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(psignb))]
pub unsafe fn _mm_sign_epi8(a: i8x16, b: i8x16) -> i8x16 {
psignb128(a, b)
}
/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
/// integer in `b` is negative, and return the results.
/// Elements in result are zeroed out when the corresponding element in `b`
/// is zero.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(psignw))]
pub unsafe fn _mm_sign_epi16(a: i16x8, b: i16x8) -> i16x8 {
psignw128(a, b)
}
/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b`
/// is zero.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(psignd))]
pub unsafe fn _mm_sign_epi32(a: i32x4, b: i32x4) -> i32x4 {
psignd128(a, b)
}
#[allow(improper_ctypes)]
extern {
#[link_name = "llvm.x86.ssse3.pabs.b.128"]
fn pabsb128(a: i8x16) -> u8x16;
#[link_name = "llvm.x86.ssse3.pabs.w.128"]
fn pabsw128(a: i16x8) -> u16x8;
#[link_name = "llvm.x86.ssse3.pabs.d.128"]
fn pabsd128(a: i32x4) -> u32x4;
#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
#[link_name = "llvm.x86.ssse3.phadd.w.128"]
fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.phadd.sw.128"]
fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.phadd.d.128"]
fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.ssse3.phsub.w.128"]
fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.phsub.sw.128"]
fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.phsub.d.128"]
fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
#[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.psign.b.128"]
fn psignb128(a: i8x16, b: i8x16) -> i8x16;
#[link_name = "llvm.x86.ssse3.psign.w.128"]
fn psignw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.psign.d.128"]
fn psignd128(a: i32x4, b: i32x4) -> i32x4;
}
#[cfg(test)]
@ -65,6 +234,18 @@ mod tests {
assert_eq!(r, u8x16::splat(5));
}
#[simd_test = "ssse3"]
unsafe fn _mm_abs_epi16() {
let r = ssse3::_mm_abs_epi16(i16x8::splat(-5));
assert_eq!(r, u16x8::splat(5));
}
#[simd_test = "ssse3"]
unsafe fn _mm_abs_epi32() {
let r = ssse3::_mm_abs_epi32(i32x4::splat(-5));
assert_eq!(r, u32x4::splat(5));
}
#[simd_test = "ssse3"]
unsafe fn _mm_shuffle_epi8() {
let a = u8x16::new(
@ -88,4 +269,103 @@ mod tests {
let r = ssse3::_mm_shuffle_epi8(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_hadd_epi16() {
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i16x8::new(4, 128, 4, 3, 24, 12, 6, 19);
let expected = i16x8::new(3, 7, 11, 15, 132, 7, 36, 25);
let r = ssse3::_mm_hadd_epi16(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_hadds_epi16() {
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i16x8::new(4, 128, 4, 3, 32767, 1, -32768, -1);
let expected = i16x8::new(3, 7, 11, 15, 132, 7, 32767, -32768);
let r = ssse3::_mm_hadds_epi16(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_hadd_epi32() {
let a = i32x4::new(1, 2, 3, 4);
let b = i32x4::new(4, 128, 4, 3);
let expected = i32x4::new(3, 7, 132, 7);
let r = ssse3::_mm_hadd_epi32(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_hsub_epi16() {
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i16x8::new(4, 128, 4, 3, 24, 12, 6, 19);
let expected = i16x8::new(-1, -1, -1, -1, -124, 1, 12, -13);
let r = ssse3::_mm_hsub_epi16(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_hsubs_epi16() {
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i16x8::new(4, 128, 4, 3, 32767, -1, -32768, 1);
let expected = i16x8::new(-1, -1, -1, -1, -124, 1, 32767, -32768);
let r = ssse3::_mm_hsubs_epi16(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_hsub_epi32() {
let a = i32x4::new(1, 2, 3, 4);
let b = i32x4::new(4, 128, 4, 3);
let expected = i32x4::new(-1, -1, -124, 1);
let r = ssse3::_mm_hsub_epi32(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_maddubs_epi16() {
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
let expected = i16x8::new(130, 24, 192, 194, 158, 175, 66, 120);
let r = ssse3::_mm_maddubs_epi16(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_mulhrs_epi16() {
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i16x8::new(4, 128, 4, 3, 32767, -1, -32768, 1);
let expected = i16x8::new(0, 0, 0, 0, 5, 0, -7, 0);
let r = ssse3::_mm_mulhrs_epi16(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_sign_epi8() {
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16);
let b = i8x16::new(4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0);
let expected = i8x16::new(1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0);
let r = ssse3::_mm_sign_epi8(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_sign_epi16() {
let a = i16x8::new(1, 2, 3, 4, -5, -6, 7, 8);
let b = i16x8::new(4, 128, 0, 3, 1, -1, -2, 1);
let expected = i16x8::new(1, 2, 0, 4, -5, 6, -7, 8);
let r = ssse3::_mm_sign_epi16(a, b);
assert_eq!(r, expected);
}
#[simd_test = "ssse3"]
unsafe fn _mm_sign_epi32() {
let a = i32x4::new(-1, 2, 3, 4);
let b = i32x4::new(1, -1, 1, 0);
let expected = i32x4::new(-1, -2, 3, 0);
let r = ssse3::_mm_sign_epi32(a, b);
assert_eq!(r, expected);
}
}