LLVM7 generates different machine than LLVM6 for x86/x86_64 targets for some intrinsics. These are new optimizations

2018-07-18 16:38:36 +02:00 · 2018-07-18 16:38:36 +02:00 · 1c09cc76c4
commit 1c09cc76c4
parent faafc284bd
2 changed files with 12 additions and 3 deletions
--- a/library/stdarch/coresimd/x86/avx.rs
+++ b/library/stdarch/coresimd/x86/avx.rs
@ -524,7 +524,10 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-#[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
+// Note: LLVM7 prefers single-precision blend instructions when
+// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
+// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
+#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
 #[rustc_args_required_const(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
--- a/library/stdarch/coresimd/x86/sse41.rs
+++ b/library/stdarch/coresimd/x86/sse41.rs
@ -80,7 +80,10 @@ pub unsafe fn _mm_blendv_epi8(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16)
 #[inline]
 #[target_feature(enable = "sse4.1")]
-#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
+// Note: LLVM7 prefers the single-precision floating-point domain when possible
+// see https://bugs.llvm.org/show_bug.cgi?id=38195
+// #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
+#[cfg_attr(test, assert_instr(blendps, imm8 = 0xF0))]
 #[rustc_args_required_const(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
@ -124,7 +127,10 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd)
 #[inline]
 #[target_feature(enable = "sse4.1")]
-#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
+// Note: LLVM7 prefers the single-precision floating-point domain when possible
+// see https://bugs.llvm.org/show_bug.cgi?id=38195
+// #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
+#[cfg_attr(test, assert_instr(blendps, imm2 = 0b10))]
 #[rustc_args_required_const(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {