From 5c20a68339dc2def9a92cddc1941225528a094b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Wed, 4 Oct 2023 20:26:08 +0200 Subject: [PATCH] Reimplement _mm256_addsub_ps and _mm256_addsub_pd without LLVM intrinsics --- library/stdarch/crates/core_arch/src/x86/avx.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs index 28e1c6b35aab..55a90f70b597 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -268,7 +268,11 @@ pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vaddsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { - addsubpd256(a, b) + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [4, 1, 6, 3]) } /// Alternatively adds and subtracts packed single-precision (32-bit) @@ -280,7 +284,11 @@ pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vaddsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { - addsubps256(a, b) + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) } /// Subtracts packed double-precision (64-bit) floating-point elements in `b` @@ -2906,10 +2914,6 @@ pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 { // LLVM intrinsics used in the above functions #[allow(improper_ctypes)] extern "C" { - #[link_name = "llvm.x86.avx.addsub.pd.256"] - fn addsubpd256(a: __m256d, b: __m256d) -> __m256d; - #[link_name = "llvm.x86.avx.addsub.ps.256"] - fn addsubps256(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.avx.round.pd.256"] fn roundpd256(a: __m256d, b: i32) -> __m256d; #[link_name = "llvm.x86.avx.round.ps.256"]