Update x86 codegen

2018-11-27 10:49:58 +01:00 · 2018-11-27 10:49:58 +01:00 · 900fa0261a
commit 900fa0261a
parent 4a582fe37e
2 changed files with 13 additions and 14 deletions
--- a/library/stdarch/coresimd/macros.rs
+++ b/library/stdarch/coresimd/macros.rs
@ -1,5 +1,6 @@
 //! Utility macros.

+#[allow(unused)]
 macro_rules! constify_imm8 {
    ($imm8:expr, $expand:ident) => {
        #[allow(overflowing_literals)]
--- a/library/stdarch/coresimd/x86/sse.rs
+++ b/library/stdarch/coresimd/x86/sse.rs
@ -1144,18 +1144,20 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
 /// ```
 #[inline]
 #[target_feature(enable = "sse")]
-// TODO: generates MOVHPD if the CPU supports SSE2.
-// #[cfg_attr(test, assert_instr(movhps))]
-#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movhpd))]
-// 32-bit codegen does not generate `movhps` or `movhpd`, but instead
-// `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2).
 #[cfg_attr(
-    all(test, target_arch = "x86", target_feature = "sse2"),
-    assert_instr(movlhps)
+    all(
+        test,
+        any(
+            target_arch = "x86_64",
+            all(target_arch = "x86", target_feature = "sse2")
+        )
+    ),
+    assert_instr(movhpd)
 )]
+// FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps`
 #[cfg_attr(
    all(test, target_arch = "x86", not(target_feature = "sse2")),
-    assert_instr(unpcklps)
+    assert_instr(shufps)
 )]
 // TODO: This function is actually not limited to floats, but that's what
 // what matches the C type most closely: (__m128, *const __m64) -> __m128
@ -1202,20 +1204,16 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
 /// ```
 #[inline]
 #[target_feature(enable = "sse")]
-// TODO: generates MOVLPD if the CPU supports SSE2.
-// #[cfg_attr(test, assert_instr(movlps))]
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))]
-// On 32-bit targets with SSE2, it just generates two `movsd`.
 #[cfg_attr(
    all(test, target_arch = "x86", target_feature = "sse2"),
-    assert_instr(movsd)
+    assert_instr(movlpd)
 )]
-// It should really generate "movlps", but oh well...
+// FIXME: On 32-bit targets without SSE2, it just generates two `movss`...
 #[cfg_attr(
    all(test, target_arch = "x86", not(target_feature = "sse2")),
    assert_instr(movss)
 )]
-// TODO: Like _mm_loadh_pi, this also isn't limited to floats.
 pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 {
    let q = p as *const f32x2;
    let b: f32x2 = *q;