From 2a63cfea9ea4bfcacc07260e9303d1c213939c58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Wed, 4 Oct 2023 19:14:56 +0200 Subject: [PATCH] Reimplement _mm_blendv_epi8 and _mm256_blendv_epi8 without LLVM intrinsics --- library/stdarch/crates/core_arch/src/x86/avx2.rs | 5 ++--- library/stdarch/crates/core_arch/src/x86/sse41.rs | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs index 7acfe336453a..243a4cdab1d2 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs @@ -464,7 +464,8 @@ pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i) -> __m #[cfg_attr(test, assert_instr(vpblendvb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i { - transmute(pblendvb(a.as_i8x32(), b.as_i8x32(), mask.as_i8x32())) + let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::splat(0)); + transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32())) } /// Broadcasts the low packed 8-bit integer from `a` to all elements of @@ -3646,8 +3647,6 @@ extern "C" { fn pabsw(a: i16x16) -> u16x16; #[link_name = "llvm.x86.avx2.pabs.d"] fn pabsd(a: i32x8) -> u32x8; - #[link_name = "llvm.x86.avx2.pblendvb"] - fn pblendvb(a: i8x32, b: i8x32, mask: i8x32) -> i8x32; #[link_name = "llvm.x86.avx2.phadd.w"] fn phaddw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.phadd.d"] diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs index 444b59974969..aaa18703d9ea 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs @@ -62,7 +62,8 @@ pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTI #[cfg_attr(test, assert_instr(pblendvb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { - transmute(pblendvb(a.as_i8x16(), b.as_i8x16(), mask.as_i8x16())) + let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::splat(0)); + transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16())) } /// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`. @@ -1126,8 +1127,6 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { #[allow(improper_ctypes)] extern "C" { - #[link_name = "llvm.x86.sse41.pblendvb"] - fn pblendvb(a: i8x16, b: i8x16, mask: i8x16) -> i8x16; #[link_name = "llvm.x86.sse41.blendvpd"] fn blendvpd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d; #[link_name = "llvm.x86.sse41.blendvps"]