diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs index 28d798edc825..162ac313b85e 100644 --- a/library/stdarch/src/x86/sse2.rs +++ b/library/stdarch/src/x86/sse2.rs @@ -314,6 +314,7 @@ pub unsafe fn _mm_subs_epu16(a: u16x8, b: u16x8) -> u16x8 { /// Shift `a` left by `imm8` bytes while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i { let (zero, imm8) = (__m128i::splat(0), imm8 as u32); const fn sub(a: u32, b: u32) -> u32 { a - b } @@ -344,41 +345,22 @@ pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i { } } -#[cfg(test)] -#[target_feature = "+sse2"] -#[cfg_attr(test, assert_instr(pslldq))] -fn _test_mm_slli_si128(a: __m128i) -> __m128i { - unsafe { _mm_slli_si128(a, 1) } -} - /// Shift `a` left by `imm8` bytes while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_slli_si128(a, imm8) } -#[cfg(test)] -#[target_feature = "+sse2"] -#[cfg_attr(test, assert_instr(pslldq))] -fn _test_mm_bslli_si128(a: __m128i) -> __m128i { - unsafe { _mm_bslli_si128(a, 1) } -} - /// Shift `a` right by `imm8` bytes while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_srli_si128(a, imm8) } -#[cfg(test)] -#[target_feature = "+sse2"] -#[cfg_attr(test, assert_instr(psrldq))] -fn _test_mm_bsrli_si128(a: __m128i) -> __m128i { - unsafe { _mm_bsrli_si128(a, 1) } -} - /// Shift packed 16-bit integers in `a` left by `imm8` while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] @@ -469,6 +451,7 @@ pub unsafe fn _mm_sra_epi32(a: i32x4, count: i32x4) -> i32x4 { /// Shift `a` right by `imm8` bytes while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i { let (zero, imm8) = (__m128i::splat(0), imm8 as u32); const fn add(a: u32, b: u32) -> u32 { a + b } @@ -499,13 +482,6 @@ pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i { } } -#[cfg(test)] -#[target_feature = "+sse2"] -#[cfg_attr(test, assert_instr(psrldq))] -fn _test_mm_srli_si128(a: __m128i) -> __m128i { - unsafe { _mm_srli_si128(a, 1) } -} - /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in /// zeros. #[inline(always)] @@ -1002,31 +978,19 @@ pub unsafe fn _mm_packus_epi16(a: i16x8, b: i16x8) -> u8x16 { /// Return the `imm8` element of `a`. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(pextrw, imm8 = 9))] pub unsafe fn _mm_extract_epi16(a: i16x8, imm8: i32) -> i32 { a.extract(imm8 as u32 & 0b111) as i32 } -#[cfg(test)] -#[target_feature = "+sse2"] -#[cfg_attr(test, assert_instr(pextrw))] -fn _test_mm_extract_epi16(a: i16x8) -> i32 { - unsafe { _mm_extract_epi16(a, 9) } -} - /// Return a new vector where the `imm8` element of `a` is replaced with `i`. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))] pub unsafe fn _mm_insert_epi16(a: i16x8, i: i32, imm8: i32) -> i16x8 { a.replace(imm8 as u32 & 0b111, i as i16) } -#[cfg(test)] -#[target_feature = "+sse2"] -#[cfg_attr(test, assert_instr(pinsrw))] -fn _test_mm_insert_epi16(a: i16x8, i: i32) -> i16x8 { - unsafe { _mm_insert_epi16(a, i, 9) } -} - /// Return a mask of the most significant bit of each element in `a`. #[inline(always)] #[target_feature = "+sse2"] @@ -1038,6 +1002,7 @@ pub unsafe fn _mm_movemask_epi8(a: i8x16) -> i32 { /// Shuffle 32-bit integers in `a` using the control in `imm8`. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(pshufd, imm8 = 9))] pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 { // simd_shuffleX requires that its selector parameter be made up of // constant values, but we can't enforce that here. In spirit, we need @@ -1091,13 +1056,6 @@ pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 { } } -#[cfg(test)] -#[target_feature = "+sse2"] -#[cfg_attr(test, assert_instr(pshufd))] -fn _test_mm_shuffle_epi32(a: i32x4) -> i32x4 { - unsafe { _mm_shuffle_epi32(a, 9) } -} - /// Shuffle 16-bit integers in the high 64 bits of `a` using the control in /// `imm8`. /// @@ -1105,6 +1063,7 @@ fn _test_mm_shuffle_epi32(a: i32x4) -> i32x4 { /// bits being copied from from `a`. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))] pub unsafe fn _mm_shufflehi_epi16(a: i16x8, imm8: i32) -> i16x8 { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; @@ -1155,13 +1114,6 @@ pub unsafe fn _mm_shufflehi_epi16(a: i16x8, imm8: i32) -> i16x8 { } } -#[cfg(test)] -#[target_feature = "+sse2"] -#[cfg_attr(test, assert_instr(pshufhw))] -fn _test_mm_shufflehi_epi16(a: i16x8) -> i16x8 { - unsafe { _mm_shufflehi_epi16(a, 9) } -} - /// Shuffle 16-bit integers in the low 64 bits of `a` using the control in /// `imm8`. /// @@ -1169,6 +1121,7 @@ fn _test_mm_shufflehi_epi16(a: i16x8) -> i16x8 { /// bits being copied from from `a`. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))] pub unsafe fn _mm_shufflelo_epi16(a: i16x8, imm8: i32) -> i16x8 { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; @@ -1216,13 +1169,6 @@ pub unsafe fn _mm_shufflelo_epi16(a: i16x8, imm8: i32) -> i16x8 { } } -#[cfg(test)] -#[target_feature = "+sse2"] -#[cfg_attr(test, assert_instr(pshuflw))] -fn _test_mm_shufflelo_epi16(a: i16x8) -> i16x8 { - unsafe { _mm_shufflelo_epi16(a, 9) } -} - /// Unpack and interleave 8-bit integers from the high half of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index 11519c8f1169..2cd36f74c6a7 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -24,6 +24,7 @@ pub unsafe fn _mm_blendv_epi8( /// the broadcast mask bit is zero then the return component will be zero. #[inline(always)] #[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(dppd, imm8 = 0))] pub unsafe fn _mm_dp_pd(a: f64x2, b: f64x2, imm8: u8) -> f64x2 { macro_rules! call { ($imm8:expr) => { dppd(a, b, $imm8) } @@ -31,13 +32,6 @@ pub unsafe fn _mm_dp_pd(a: f64x2, b: f64x2, imm8: u8) -> f64x2 { constify_imm8!(imm8, call) } -#[cfg(test)] -#[target_feature = "+sse4.1"] -#[cfg_attr(test, assert_instr(dppd))] -fn _test_mm_dp_pd(a: f64x2, b: f64x2) -> f64x2 { - unsafe { _mm_dp_pd(a, b, 0) } -} - /// Returns the dot product of two f32x4 vectors. /// /// `imm8[3:0]` is the broadcast mask, and `imm8[7:4]` is the condition mask. @@ -47,6 +41,7 @@ fn _test_mm_dp_pd(a: f64x2, b: f64x2) -> f64x2 { /// the broadcast mask bit is zero then the return component will be zero. #[inline(always)] #[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(dpps, imm8 = 0))] pub unsafe fn _mm_dp_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 { macro_rules! call { ($imm8:expr) => { dpps(a, b, $imm8) } @@ -54,13 +49,6 @@ pub unsafe fn _mm_dp_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 { constify_imm8!(imm8, call) } -#[cfg(test)] -#[target_feature = "+sse4.1"] -#[cfg_attr(test, assert_instr(dpps))] -fn _test_mm_dp_ps(a: f32x4, b: f32x4) -> f32x4 { - unsafe { _mm_dp_ps(a, b, 0) } -} - #[allow(improper_ctypes)] extern { #[link_name = "llvm.x86.sse41.pblendvb"] diff --git a/library/stdarch/src/x86/sse42.rs b/library/stdarch/src/x86/sse42.rs index 8a18f31264be..72d7519e0146 100644 --- a/library/stdarch/src/x86/sse42.rs +++ b/library/stdarch/src/x86/sse42.rs @@ -22,6 +22,7 @@ pub const _SIDD_MOST_SIGNIFICANT: i8 = 0b01000000; #[inline(always)] #[target_feature = "+sse4.2"] +#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] pub unsafe fn _mm_cmpestri( a: __m128i, la: i32, @@ -35,13 +36,6 @@ pub unsafe fn _mm_cmpestri( constify_imm8!(imm8, call) } -#[cfg(test)] -#[target_feature = "+sse4.2"] -#[cfg_attr(test, assert_instr(pcmpestri))] -fn _test_mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { - unsafe { _mm_cmpestri(a, la, b, lb, 0) } -} - #[allow(improper_ctypes)] extern { #[link_name = "llvm.x86.sse42.pcmpestri128"]