Reimplement _mm_blendv_epi8 and _mm256_blendv_epi8 without LLVM intrinsics

This commit is contained in:
Eduardo Sánchez Muñoz 2023-10-04 19:14:56 +02:00 committed by Amanieu d'Antras
parent fd694451fe
commit 2a63cfea9e
2 changed files with 4 additions and 6 deletions

View file

@ -464,7 +464,8 @@ pub unsafe fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
#[cfg_attr(test, assert_instr(vpblendvb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
transmute(pblendvb(a.as_i8x32(), b.as_i8x32(), mask.as_i8x32()))
let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::splat(0));
transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
}
/// Broadcasts the low packed 8-bit integer from `a` to all elements of
@ -3646,8 +3647,6 @@ extern "C" {
fn pabsw(a: i16x16) -> u16x16;
#[link_name = "llvm.x86.avx2.pabs.d"]
fn pabsd(a: i32x8) -> u32x8;
#[link_name = "llvm.x86.avx2.pblendvb"]
fn pblendvb(a: i8x32, b: i8x32, mask: i8x32) -> i8x32;
#[link_name = "llvm.x86.avx2.phadd.w"]
fn phaddw(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx2.phadd.d"]

View file

@ -62,7 +62,8 @@ pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTI
#[cfg_attr(test, assert_instr(pblendvb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
transmute(pblendvb(a.as_i8x16(), b.as_i8x16(), mask.as_i8x16()))
let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::splat(0));
transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16()))
}
/// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`.
@ -1126,8 +1127,6 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.sse41.pblendvb"]
fn pblendvb(a: i8x16, b: i8x16, mask: i8x16) -> i8x16;
#[link_name = "llvm.x86.sse41.blendvpd"]
fn blendvpd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d;
#[link_name = "llvm.x86.sse41.blendvps"]