From cab716647427572de0638117b847bd9df422a846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Wed, 4 Oct 2023 19:51:08 +0200 Subject: [PATCH] Reimplement _mm_blendv_ps and _mm256_blendv_ps without LLVM intrinsics --- library/stdarch/crates/core_arch/src/x86/avx.rs | 5 ++--- library/stdarch/crates/core_arch/src/x86/sse41.rs | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs index 88465b4312e2..28e1c6b35aab 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -524,7 +524,8 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vblendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - vblendvps(a, b, c) + let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::splat(0)); + transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8())) } /// Conditionally multiplies the packed single-precision (32-bit) floating-point @@ -2915,8 +2916,6 @@ extern "C" { fn roundps256(a: __m256, b: i32) -> __m256; #[link_name = "llvm.x86.avx.sqrt.ps.256"] fn sqrtps256(a: __m256) -> __m256; - #[link_name = "llvm.x86.avx.blendv.ps.256"] - fn vblendvps(a: __m256, b: __m256, c: __m256) -> __m256; #[link_name = "llvm.x86.avx.dp.ps.256"] fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256; #[link_name = "llvm.x86.avx.hadd.pd.256"] diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs index 15898eb7b3e2..bad7ed2c681f 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs @@ -118,7 +118,8 @@ pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(blendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { - blendvps(a, b, mask) + let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::splat(0)); + transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4())) } /// Blend packed double-precision (64-bit) floating-point elements from `a` @@ -1138,8 +1139,6 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { #[allow(improper_ctypes)] extern "C" { - #[link_name = "llvm.x86.sse41.blendvps"] - fn blendvps(a: __m128, b: __m128, mask: __m128) -> __m128; #[link_name = "llvm.x86.sse41.blendpd"] fn blendpd(a: __m128d, b: __m128d, imm2: u8) -> __m128d; #[link_name = "llvm.x86.sse41.blendps"]