Implement _mm(256)_abs_epi* without x86-specific LLVM intrinsics
This commit is contained in:
parent
153bde14d1
commit
5b0b9e9db2
3 changed files with 91 additions and 21 deletions
|
|
@ -371,6 +371,73 @@ simd_ty!(
|
|||
x7
|
||||
);
|
||||
|
||||
simd_m_ty!(
|
||||
m8x32[i8]:
|
||||
x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15,
|
||||
x16,
|
||||
x17,
|
||||
x18,
|
||||
x19,
|
||||
x20,
|
||||
x21,
|
||||
x22,
|
||||
x23,
|
||||
x24,
|
||||
x25,
|
||||
x26,
|
||||
x27,
|
||||
x28,
|
||||
x29,
|
||||
x30,
|
||||
x31
|
||||
);
|
||||
simd_m_ty!(
|
||||
m16x16[i16]:
|
||||
x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_m_ty!(
|
||||
m32x8[i32]:
|
||||
x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
|
||||
|
||||
// 512-bit wide types:
|
||||
|
||||
simd_ty!(
|
||||
|
|
|
|||
|
|
@ -32,7 +32,10 @@ use stdarch_test::assert_instr;
|
|||
#[cfg_attr(test, assert_instr(vpabsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
|
||||
transmute(pabsd(a.as_i32x8()))
|
||||
let a = a.as_i32x8();
|
||||
let zero = i32x8::splat(0);
|
||||
let r = simd_select::<m32x8, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Computes the absolute values of packed 16-bit integers in `a`.
|
||||
|
|
@ -43,7 +46,10 @@ pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpabsw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
|
||||
transmute(pabsw(a.as_i16x16()))
|
||||
let a = a.as_i16x16();
|
||||
let zero = i16x16::splat(0);
|
||||
let r = simd_select::<m16x16, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Computes the absolute values of packed 8-bit integers in `a`.
|
||||
|
|
@ -54,7 +60,10 @@ pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpabsb))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i {
|
||||
transmute(pabsb(a.as_i8x32()))
|
||||
let a = a.as_i8x32();
|
||||
let zero = i8x32::splat(0);
|
||||
let r = simd_select::<m8x32, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Adds packed 64-bit integers in `a` and `b`.
|
||||
|
|
@ -3639,12 +3648,6 @@ pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
|
|||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.avx2.pabs.b"]
|
||||
fn pabsb(a: i8x32) -> u8x32;
|
||||
#[link_name = "llvm.x86.avx2.pabs.w"]
|
||||
fn pabsw(a: i16x16) -> u16x16;
|
||||
#[link_name = "llvm.x86.avx2.pabs.d"]
|
||||
fn pabsd(a: i32x8) -> u32x8;
|
||||
#[link_name = "llvm.x86.avx2.phadd.w"]
|
||||
fn phaddw(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.phadd.d"]
|
||||
|
|
|
|||
|
|
@ -17,7 +17,10 @@ use stdarch_test::assert_instr;
|
|||
#[cfg_attr(test, assert_instr(pabsb))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
|
||||
transmute(pabsb128(a.as_i8x16()))
|
||||
let a = a.as_i8x16();
|
||||
let zero = i8x16::splat(0);
|
||||
let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Computes the absolute value of each of the packed 16-bit signed integers in
|
||||
|
|
@ -30,7 +33,10 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(pabsw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
|
||||
transmute(pabsw128(a.as_i16x8()))
|
||||
let a = a.as_i16x8();
|
||||
let zero = i16x8::splat(0);
|
||||
let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Computes the absolute value of each of the packed 32-bit signed integers in
|
||||
|
|
@ -43,7 +49,10 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(pabsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
|
||||
transmute(pabsd128(a.as_i32x4()))
|
||||
let a = a.as_i32x4();
|
||||
let zero = i32x4::splat(0);
|
||||
let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Shuffles bytes from `a` according to the content of `b`.
|
||||
|
|
@ -285,15 +294,6 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.ssse3.pabs.b.128"]
|
||||
fn pabsb128(a: i8x16) -> u8x16;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pabs.w.128"]
|
||||
fn pabsw128(a: i16x8) -> u16x8;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pabs.d.128"]
|
||||
fn pabsd128(a: i32x4) -> u32x4;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
|
||||
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue