From 1c09cc76c483caf93f832d9e7bbfdf07f09a8576 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 18 Jul 2018 16:38:36 +0200 Subject: [PATCH] LLVM7 generates different machine than LLVM6 for x86/x86_64 targets for some intrinsics. These are new optimizations --- library/stdarch/coresimd/x86/avx.rs | 5 ++++- library/stdarch/coresimd/x86/sse41.rs | 10 ++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/library/stdarch/coresimd/x86/avx.rs b/library/stdarch/coresimd/x86/avx.rs index 58d9482ff1fe..c1c2fff84634 100644 --- a/library/stdarch/coresimd/x86/avx.rs +++ b/library/stdarch/coresimd/x86/avx.rs @@ -524,7 +524,10 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))] +// Note: LLVM7 prefers single-precision blend instructions when +// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194 +// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))] +#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { diff --git a/library/stdarch/coresimd/x86/sse41.rs b/library/stdarch/coresimd/x86/sse41.rs index 198bb16ba03c..91722507da16 100644 --- a/library/stdarch/coresimd/x86/sse41.rs +++ b/library/stdarch/coresimd/x86/sse41.rs @@ -80,7 +80,10 @@ pub unsafe fn _mm_blendv_epi8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16) #[inline] #[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))] +// Note: LLVM7 prefers the single-precision floating-point domain when possible +// see https://bugs.llvm.org/show_bug.cgi?id=38195 +// #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))] +#[cfg_attr(test, assert_instr(blendps, imm8 = 0xF0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { @@ -124,7 +127,10 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd) #[inline] #[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))] +// Note: LLVM7 prefers the single-precision floating-point domain when possible +// see https://bugs.llvm.org/show_bug.cgi?id=38195 +// #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))] +#[cfg_attr(test, assert_instr(blendps, imm2 = 0b10))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {