Implement _mm(256)_abs_epi* without x86-specific LLVM intrinsics

This commit is contained in:
Eduardo Sánchez Muñoz 2024-04-14 16:27:22 +02:00 committed by Amanieu d'Antras
parent 153bde14d1
commit 5b0b9e9db2
3 changed files with 91 additions and 21 deletions

View file

@ -371,6 +371,73 @@ simd_ty!(
x7
);
simd_m_ty!(
m8x32[i8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_m_ty!(
m16x16[i16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_m_ty!(
m32x8[i32]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
// 512-bit wide types:
simd_ty!(

View file

@ -32,7 +32,10 @@ use stdarch_test::assert_instr;
#[cfg_attr(test, assert_instr(vpabsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
transmute(pabsd(a.as_i32x8()))
let a = a.as_i32x8();
let zero = i32x8::splat(0);
let r = simd_select::<m32x8, _>(simd_lt(a, zero), simd_neg(a), a);
transmute(r)
}
/// Computes the absolute values of packed 16-bit integers in `a`.
@ -43,7 +46,10 @@ pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpabsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
transmute(pabsw(a.as_i16x16()))
let a = a.as_i16x16();
let zero = i16x16::splat(0);
let r = simd_select::<m16x16, _>(simd_lt(a, zero), simd_neg(a), a);
transmute(r)
}
/// Computes the absolute values of packed 8-bit integers in `a`.
@ -54,7 +60,10 @@ pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpabsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i {
transmute(pabsb(a.as_i8x32()))
let a = a.as_i8x32();
let zero = i8x32::splat(0);
let r = simd_select::<m8x32, _>(simd_lt(a, zero), simd_neg(a), a);
transmute(r)
}
/// Adds packed 64-bit integers in `a` and `b`.
@ -3639,12 +3648,6 @@ pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx2.pabs.b"]
fn pabsb(a: i8x32) -> u8x32;
#[link_name = "llvm.x86.avx2.pabs.w"]
fn pabsw(a: i16x16) -> u16x16;
#[link_name = "llvm.x86.avx2.pabs.d"]
fn pabsd(a: i32x8) -> u32x8;
#[link_name = "llvm.x86.avx2.phadd.w"]
fn phaddw(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx2.phadd.d"]

View file

@ -17,7 +17,10 @@ use stdarch_test::assert_instr;
#[cfg_attr(test, assert_instr(pabsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
transmute(pabsb128(a.as_i8x16()))
let a = a.as_i8x16();
let zero = i8x16::splat(0);
let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
transmute(r)
}
/// Computes the absolute value of each of the packed 16-bit signed integers in
@ -30,7 +33,10 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pabsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
transmute(pabsw128(a.as_i16x8()))
let a = a.as_i16x8();
let zero = i16x8::splat(0);
let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
transmute(r)
}
/// Computes the absolute value of each of the packed 32-bit signed integers in
@ -43,7 +49,10 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pabsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
transmute(pabsd128(a.as_i32x4()))
let a = a.as_i32x4();
let zero = i32x4::splat(0);
let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
transmute(r)
}
/// Shuffles bytes from `a` according to the content of `b`.
@ -285,15 +294,6 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.ssse3.pabs.b.128"]
fn pabsb128(a: i8x16) -> u8x16;
#[link_name = "llvm.x86.ssse3.pabs.w.128"]
fn pabsw128(a: i16x8) -> u16x8;
#[link_name = "llvm.x86.ssse3.pabs.d.128"]
fn pabsd128(a: i32x4) -> u32x4;
#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;