ssse3 (#68)
* SSSE3: _mm_abs_epi16, _mm_abs_epi32, _mm_hadd_epi16 * SSSE3: _mm_hadds_epi16 * SSSE3: assert_instr * SSSE3: _mm_hadd_epi32 * SSSE3: _mm_hsub_epi16 * SSSE3: _mm_hsubs_epi16 * SSSE3: _mm_hsub_epi32 * SSSE3: _mm_maddubs_epi16 * SSSE3: _mm_mulhrs_epi16 * SSSE3: _mm_sign_epi8 * SSSE3: _mm_sign_epi32 * SSSE3: _mm_sign_epi32 * SSSE3: Fix assert_instr
This commit is contained in:
parent
0511ecbaf0
commit
d8881bcbc9
1 changed files with 280 additions and 0 deletions
|
|
@ -12,6 +12,24 @@ pub unsafe fn _mm_abs_epi8(a: i8x16) -> u8x16 {
|
|||
pabsb128(a)
|
||||
}
|
||||
|
||||
/// Compute the absolute value of each of the packed 16-bit signed integers in `a` and
|
||||
/// return the 16-bit unsigned integer
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pabsw))]
|
||||
pub unsafe fn _mm_abs_epi16(a: i16x8) -> u16x8 {
|
||||
pabsw128(a)
|
||||
}
|
||||
|
||||
/// Compute the absolute value of each of the packed 32-bit signed integers in `a` and
|
||||
/// return the 32-bit unsigned integer
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pabsd))]
|
||||
pub unsafe fn _mm_abs_epi32(a: i32x4) -> u32x4 {
|
||||
pabsd128(a)
|
||||
}
|
||||
|
||||
/// Shuffle bytes from `a` according to the content of `b`.
|
||||
///
|
||||
/// The last 4 bits of each byte of `b` are used as addresses
|
||||
|
|
@ -43,13 +61,164 @@ pub unsafe fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
|
|||
pshufb128(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally add the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [8 x i16].
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phaddw))]
|
||||
pub unsafe fn _mm_hadd_epi16(a: i16x8, b: i16x8) -> i16x8 {
|
||||
phaddw128(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally add the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
|
||||
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phaddsw))]
|
||||
pub unsafe fn _mm_hadds_epi16(a: i16x8, b: i16x8) -> i16x8 {
|
||||
phaddsw128(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally add the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [4 x i32].
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phaddd))]
|
||||
pub unsafe fn _mm_hadd_epi32(a: i32x4, b: i32x4) -> i32x4 {
|
||||
phaddd128(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally subtract the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of [8 x i16].
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phsubw))]
|
||||
pub unsafe fn _mm_hsub_epi16(a: i16x8, b: i16x8) -> i16x8 {
|
||||
phsubw128(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally subtract the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
|
||||
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
|
||||
/// saturated to 8000h.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phsubsw))]
|
||||
pub unsafe fn _mm_hsubs_epi16(a: i16x8, b: i16x8) -> i16x8 {
|
||||
phsubsw128(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally subtract the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of [4 x i32].
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phsubd))]
|
||||
pub unsafe fn _mm_hsub_epi32(a: i32x4, b: i32x4) -> i32x4 {
|
||||
phsubd128(a, b)
|
||||
}
|
||||
|
||||
/// Multiply corresponding pairs of packed 8-bit unsigned integer
|
||||
/// values contained in the first source operand and packed 8-bit signed
|
||||
/// integer values contained in the second source operand, add pairs of
|
||||
/// contiguous products with signed saturation, and writes the 16-bit sums to
|
||||
/// the corresponding bits in the destination.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pmaddubsw))]
|
||||
pub unsafe fn _mm_maddubs_epi16(a: u8x16, b: i8x16) -> i16x8 {
|
||||
pmaddubsw128(a, b)
|
||||
}
|
||||
|
||||
/// Multiply packed 16-bit signed integer values, truncate the 32-bit
|
||||
/// product to the 18 most significant bits by right-shifting, round the
|
||||
/// truncated value by adding 1, and write bits [16:1] to the destination.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pmulhrsw))]
|
||||
pub unsafe fn _mm_mulhrs_epi16(a: i16x8, b: i16x8) -> i16x8 {
|
||||
pmulhrsw128(a, b)
|
||||
}
|
||||
|
||||
/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
|
||||
/// integer in `b` is negative, and return the result.
|
||||
/// Elements in result are zeroed out when the corresponding element in `b`
|
||||
/// is zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(psignb))]
|
||||
pub unsafe fn _mm_sign_epi8(a: i8x16, b: i8x16) -> i8x16 {
|
||||
psignb128(a, b)
|
||||
}
|
||||
|
||||
/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
|
||||
/// integer in `b` is negative, and return the results.
|
||||
/// Elements in result are zeroed out when the corresponding element in `b`
|
||||
/// is zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(psignw))]
|
||||
pub unsafe fn _mm_sign_epi16(a: i16x8, b: i16x8) -> i16x8 {
|
||||
psignw128(a, b)
|
||||
}
|
||||
|
||||
/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
|
||||
/// integer in `b` is negative, and return the results.
|
||||
/// Element in result are zeroed out when the corresponding element in `b`
|
||||
/// is zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(psignd))]
|
||||
pub unsafe fn _mm_sign_epi32(a: i32x4, b: i32x4) -> i32x4 {
|
||||
psignd128(a, b)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern {
|
||||
#[link_name = "llvm.x86.ssse3.pabs.b.128"]
|
||||
fn pabsb128(a: i8x16) -> u8x16;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pabs.w.128"]
|
||||
fn pabsw128(a: i16x8) -> u16x8;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pabs.d.128"]
|
||||
fn pabsd128(a: i32x4) -> u32x4;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
|
||||
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phadd.w.128"]
|
||||
fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phadd.sw.128"]
|
||||
fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phadd.d.128"]
|
||||
fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phsub.w.128"]
|
||||
fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phsub.sw.128"]
|
||||
fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phsub.d.128"]
|
||||
fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
|
||||
fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
|
||||
fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.psign.b.128"]
|
||||
fn psignb128(a: i8x16, b: i8x16) -> i8x16;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.psign.w.128"]
|
||||
fn psignw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.psign.d.128"]
|
||||
fn psignd128(a: i32x4, b: i32x4) -> i32x4;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -65,6 +234,18 @@ mod tests {
|
|||
assert_eq!(r, u8x16::splat(5));
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_abs_epi16() {
|
||||
let r = ssse3::_mm_abs_epi16(i16x8::splat(-5));
|
||||
assert_eq!(r, u16x8::splat(5));
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_abs_epi32() {
|
||||
let r = ssse3::_mm_abs_epi32(i32x4::splat(-5));
|
||||
assert_eq!(r, u32x4::splat(5));
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_shuffle_epi8() {
|
||||
let a = u8x16::new(
|
||||
|
|
@ -88,4 +269,103 @@ mod tests {
|
|||
let r = ssse3::_mm_shuffle_epi8(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_hadd_epi16() {
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = i16x8::new(4, 128, 4, 3, 24, 12, 6, 19);
|
||||
let expected = i16x8::new(3, 7, 11, 15, 132, 7, 36, 25);
|
||||
let r = ssse3::_mm_hadd_epi16(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_hadds_epi16() {
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = i16x8::new(4, 128, 4, 3, 32767, 1, -32768, -1);
|
||||
let expected = i16x8::new(3, 7, 11, 15, 132, 7, 32767, -32768);
|
||||
let r = ssse3::_mm_hadds_epi16(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_hadd_epi32() {
|
||||
let a = i32x4::new(1, 2, 3, 4);
|
||||
let b = i32x4::new(4, 128, 4, 3);
|
||||
let expected = i32x4::new(3, 7, 132, 7);
|
||||
let r = ssse3::_mm_hadd_epi32(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_hsub_epi16() {
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = i16x8::new(4, 128, 4, 3, 24, 12, 6, 19);
|
||||
let expected = i16x8::new(-1, -1, -1, -1, -124, 1, 12, -13);
|
||||
let r = ssse3::_mm_hsub_epi16(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_hsubs_epi16() {
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = i16x8::new(4, 128, 4, 3, 32767, -1, -32768, 1);
|
||||
let expected = i16x8::new(-1, -1, -1, -1, -124, 1, 32767, -32768);
|
||||
let r = ssse3::_mm_hsubs_epi16(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_hsub_epi32() {
|
||||
let a = i32x4::new(1, 2, 3, 4);
|
||||
let b = i32x4::new(4, 128, 4, 3);
|
||||
let expected = i32x4::new(-1, -1, -124, 1);
|
||||
let r = ssse3::_mm_hsub_epi32(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_maddubs_epi16() {
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let b = i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
|
||||
let expected = i16x8::new(130, 24, 192, 194, 158, 175, 66, 120);
|
||||
let r = ssse3::_mm_maddubs_epi16(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_mulhrs_epi16() {
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = i16x8::new(4, 128, 4, 3, 32767, -1, -32768, 1);
|
||||
let expected = i16x8::new(0, 0, 0, 0, 5, 0, -7, 0);
|
||||
let r = ssse3::_mm_mulhrs_epi16(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_sign_epi8() {
|
||||
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16);
|
||||
let b = i8x16::new(4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0);
|
||||
let expected = i8x16::new(1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0);
|
||||
let r = ssse3::_mm_sign_epi8(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_sign_epi16() {
|
||||
let a = i16x8::new(1, 2, 3, 4, -5, -6, 7, 8);
|
||||
let b = i16x8::new(4, 128, 0, 3, 1, -1, -2, 1);
|
||||
let expected = i16x8::new(1, 2, 0, 4, -5, 6, -7, 8);
|
||||
let r = ssse3::_mm_sign_epi16(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_sign_epi32() {
|
||||
let a = i32x4::new(-1, 2, 3, 4);
|
||||
let b = i32x4::new(1, -1, 1, 0);
|
||||
let expected = i32x4::new(-1, -2, 3, 0);
|
||||
let r = ssse3::_mm_sign_epi32(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue