diff --git a/library/stdarch/coresimd/macros.rs b/library/stdarch/coresimd/macros.rs index 82da39932ce4..74a01be77098 100644 --- a/library/stdarch/coresimd/macros.rs +++ b/library/stdarch/coresimd/macros.rs @@ -1,5 +1,6 @@ //! Utility macros. +#[allow(unused)] macro_rules! constify_imm8 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] diff --git a/library/stdarch/coresimd/x86/sse.rs b/library/stdarch/coresimd/x86/sse.rs index 2405243f4b85..9d177bf70617 100644 --- a/library/stdarch/coresimd/x86/sse.rs +++ b/library/stdarch/coresimd/x86/sse.rs @@ -1144,18 +1144,20 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { /// ``` #[inline] #[target_feature(enable = "sse")] -// TODO: generates MOVHPD if the CPU supports SSE2. -// #[cfg_attr(test, assert_instr(movhps))] -#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movhpd))] -// 32-bit codegen does not generate `movhps` or `movhpd`, but instead -// `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2). #[cfg_attr( - all(test, target_arch = "x86", target_feature = "sse2"), - assert_instr(movlhps) + all( + test, + any( + target_arch = "x86_64", + all(target_arch = "x86", target_feature = "sse2") + ) + ), + assert_instr(movhpd) )] +// FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps` #[cfg_attr( all(test, target_arch = "x86", not(target_feature = "sse2")), - assert_instr(unpcklps) + assert_instr(shufps) )] // TODO: This function is actually not limited to floats, but that's what // what matches the C type most closely: (__m128, *const __m64) -> __m128 @@ -1202,20 +1204,16 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 { /// ``` #[inline] #[target_feature(enable = "sse")] -// TODO: generates MOVLPD if the CPU supports SSE2. -// #[cfg_attr(test, assert_instr(movlps))] #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))] -// On 32-bit targets with SSE2, it just generates two `movsd`. #[cfg_attr( all(test, target_arch = "x86", target_feature = "sse2"), - assert_instr(movsd) + assert_instr(movlpd) )] -// It should really generate "movlps", but oh well... +// FIXME: On 32-bit targets without SSE2, it just generates two `movss`... #[cfg_attr( all(test, target_arch = "x86", not(target_feature = "sse2")), assert_instr(movss) )] -// TODO: Like _mm_loadh_pi, this also isn't limited to floats. pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 { let q = p as *const f32x2; let b: f32x2 = *q;