From b7872260611b165743da7058a8e7e38f78c35696 Mon Sep 17 00:00:00 2001 From: Mohan Rajendran Date: Mon, 25 Sep 2017 22:36:28 -0500 Subject: [PATCH 1/7] Squashing --- library/stdarch/TODO.md | 8 +- library/stdarch/assert-instr/src/lib.rs | 6 +- library/stdarch/src/x86/sse.rs | 124 +++++++++++++++++++++++- 3 files changed, 127 insertions(+), 11 deletions(-) diff --git a/library/stdarch/TODO.md b/library/stdarch/TODO.md index 28ae8d19a584..95d072a6d4d4 100644 --- a/library/stdarch/TODO.md +++ b/library/stdarch/TODO.md @@ -154,11 +154,11 @@ sse * [ ] `_mm_storeu_ps` * [ ] `_mm_storer_ps` * [ ] `_mm_move_ss` -* [ ] `_mm_shuffle_ps` +* [x] `_mm_shuffle_ps` * [x] `_mm_unpackhi_ps` -* [ ] `_mm_unpacklo_ps` -* [ ] `_mm_movehl_ps` -* [ ] `_mm_movelh_ps` +* [x] `_mm_unpacklo_ps` +* [x] `_mm_movehl_ps` +* [x] `_mm_movelh_ps` * [x] `_mm_movemask_ps` * [ ] `_mm_undefined_ps` diff --git a/library/stdarch/assert-instr/src/lib.rs b/library/stdarch/assert-instr/src/lib.rs index df1336b2f7c1..cb3ad557e481 100644 --- a/library/stdarch/assert-instr/src/lib.rs +++ b/library/stdarch/assert-instr/src/lib.rs @@ -269,9 +269,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { } } - let probably_only_one_instruction = function.instrs.len() < 20; - - if found && probably_only_one_instruction { + if found { return } @@ -288,7 +286,5 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { if !found { panic!("failed to find instruction `{}` in the disassembly", expected); - } else if !probably_only_one_instruction { - panic!("too many instructions in the disassembly"); } } diff --git a/library/stdarch/src/x86/sse.rs b/library/stdarch/src/x86/sse.rs index 49bba60458fb..b2e82675474a 100644 --- a/library/stdarch/src/x86/sse.rs +++ b/library/stdarch/src/x86/sse.rs @@ -164,14 +164,97 @@ pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 { unsafe { maxps(a, b) } } -/// Unpack and interleave single-precision (32-bit) floating-point elements -/// from the high half of `a` and `b`; +// Shuffle packed single-precision (32-bit) floating-point elements in `a` and `b` +// using `mask`. +// The lower half of result takes values from `a` and the higher half from `b`. +// Mask is split to 2 control bits each to index the element from inputs. #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(shufps))] +pub fn _mm_shuffle_ps(a: f32x4, b: f32x4, mask: i32) -> f32x4 { + let mask = (mask & 0xFF) as u8; + + macro_rules! shuffle_done { + ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { + unsafe { + simd_shuffle4(a, b, [$x01, $x23, $x45, $x67]) + } + } + } + macro_rules! shuffle_x67 { + ($x01:expr, $x23:expr, $x45:expr) => { + match (mask >> 6) & 0b11 { + 0b00 => shuffle_done!($x01, $x23, $x45, 4), + 0b01 => shuffle_done!($x01, $x23, $x45, 5), + 0b10 => shuffle_done!($x01, $x23, $x45, 6), + _ => shuffle_done!($x01, $x23, $x45, 7), + } + } + } + macro_rules! shuffle_x45 { + ($x01:expr, $x23:expr) => { + match (mask >> 4) & 0b11 { + 0b00 => shuffle_x67!($x01, $x23, 4), + 0b01 => shuffle_x67!($x01, $x23, 5), + 0b10 => shuffle_x67!($x01, $x23, 6), + _ => shuffle_x67!($x01, $x23, 7), + } + } + } + macro_rules! shuffle_x23 { + ($x01:expr) => { + match (mask >> 2) & 0b11 { + 0b00 => shuffle_x45!($x01, 0), + 0b01 => shuffle_x45!($x01, 1), + 0b10 => shuffle_x45!($x01, 2), + _ => shuffle_x45!($x01, 3), + } + } + } + match mask & 0b11 { + 0b00 => shuffle_x23!(0), + 0b01 => shuffle_x23!(1), + 0b10 => shuffle_x23!(2), + _ => shuffle_x23!(3), + } +} + +/// Unpack and interleave single-precision (32-bit) floating-point elements +/// from the higher half of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(unpckhps))] pub fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 { unsafe { simd_shuffle4(a, b, [2, 6, 3, 7]) } } +/// Unpack and interleave single-precision (32-bit) floating-point elements +/// from the lower half of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(unpcklps))] +pub fn _mm_unpacklo_ps(a: f32x4, b: f32x4) -> f32x4 { + unsafe { simd_shuffle4(a, b, [0, 4, 1, 5]) } +} + +/// Combine higher half of `a` and `b`. The highwe half of `b` occupies the lower +/// half of result. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(movhlps))] +pub fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 { + unsafe { simd_shuffle4(a, b, [6, 7, 2, 3]) } +} + +/// Combine lower half of `a` and `b`. The lower half of `b` occupies the higher +/// half of result. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(unpcklpd))] +pub fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 { + unsafe { simd_shuffle4(a, b, [0, 1, 4, 5]) } +} + /// Return a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 4 least significant bits of the return value. @@ -384,6 +467,16 @@ mod tests { assert_eq!(r, f32x4::new(-1.0, 20.0, 0.0, -5.0)); } + #[test] + #[target_feature = "+sse"] + fn _mm_shuffle_ps() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let b = f32x4::new(5.0, 6.0, 7.0, 8.0); + let mask = 0b00_01_01_11; + let r = sse::_mm_shuffle_ps(a, b, mask); + assert_eq!(r, f32x4::new(4.0, 2.0, 6.0, 5.0)); + } + #[test] #[target_feature = "+sse"] fn _mm_unpackhi_ps() { @@ -393,6 +486,33 @@ mod tests { assert_eq!(r, f32x4::new(3.0, 7.0, 4.0, 8.0)); } + #[test] + #[target_feature = "+sse"] + fn _mm_unpacklo_ps() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let b = f32x4::new(5.0, 6.0, 7.0, 8.0); + let r = sse::_mm_unpacklo_ps(a, b); + assert_eq!(r, f32x4::new(1.0, 5.0, 2.0, 6.0)); + } + + #[test] + #[target_feature = "+sse"] + fn _mm_movehl_ps() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let b = f32x4::new(5.0, 6.0, 7.0, 8.0); + let r = sse::_mm_movehl_ps(a, b); + assert_eq!(r, f32x4::new(7.0, 8.0, 3.0, 4.0)); + } + + #[test] + #[target_feature = "+sse"] + fn _mm_movelh_ps() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let b = f32x4::new(5.0, 6.0, 7.0, 8.0); + let r = sse::_mm_movelh_ps(a, b); + assert_eq!(r, f32x4::new(1.0, 2.0, 5.0, 6.0)); + } + #[test] #[target_feature = "+sse"] fn _mm_movemask_ps() { From 1fa49dfe5d4d350e431e0978b4f7f4c26aaa4998 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 26 Sep 2017 11:12:16 -0700 Subject: [PATCH 2/7] Add AVX2 instruction assertiosn Also a few other assorted modules --- library/stdarch/src/x86/avx2.rs | 98 ++++++++++++++++++++++++++++++++ library/stdarch/src/x86/sse.rs | 5 +- library/stdarch/src/x86/sse41.rs | 4 ++ library/stdarch/src/x86/ssse3.rs | 7 ++- 4 files changed, 110 insertions(+), 4 deletions(-) diff --git a/library/stdarch/src/x86/avx2.rs b/library/stdarch/src/x86/avx2.rs index ac81ccb9dea1..2d8533d320f6 100644 --- a/library/stdarch/src/x86/avx2.rs +++ b/library/stdarch/src/x86/avx2.rs @@ -2,9 +2,13 @@ use v256::*; use v128::*; use x86::__m256i; +#[cfg(test)] +use assert_instr::assert_instr; + /// Computes the absolute values of packed 32-bit integers in `a`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpabsd))] pub fn _mm256_abs_epi32(a: i32x8) -> i32x8 { unsafe { pabsd(a) } } @@ -12,6 +16,7 @@ pub fn _mm256_abs_epi32(a: i32x8) -> i32x8 { /// Computes the absolute values of packed 16-bit integers in `a`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpabsw))] pub fn _mm256_abs_epi16(a: i16x16) -> i16x16 { unsafe { pabsw(a) } } @@ -19,6 +24,7 @@ pub fn _mm256_abs_epi16(a: i16x16) -> i16x16 { /// Computes the absolute values of packed 8-bit integers in `a`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpabsb))] pub fn _mm256_abs_epi8(a: i8x32) -> i8x32 { unsafe { pabsb(a) } } @@ -26,6 +32,7 @@ pub fn _mm256_abs_epi8(a: i8x32) -> i8x32 { /// Add packed 64-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddq))] pub fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 { a + b } @@ -33,6 +40,7 @@ pub fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 { /// Add packed 32-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddd))] pub fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 { a + b } @@ -40,6 +48,7 @@ pub fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Add packed 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddw))] pub fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 { a + b } @@ -47,6 +56,7 @@ pub fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Add packed 8-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddb))] pub fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 { a + b } @@ -54,6 +64,7 @@ pub fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 { /// Add packed 8-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddsb))] pub fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { paddsb(a, b) } } @@ -61,6 +72,7 @@ pub fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 { /// Add packed 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddsw))] pub fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { paddsw(a, b) } } @@ -68,6 +80,7 @@ pub fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddusb))] pub fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 { unsafe { paddusb(a, b) } } @@ -75,6 +88,7 @@ pub fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 { /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddusw))] pub fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { paddusw(a, b) } } @@ -85,6 +99,7 @@ pub fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 { /// in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vandps))] pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { a & b } @@ -93,6 +108,7 @@ pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { /// in `a` and then AND with `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vandnps))] pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { (!a) & b } @@ -100,6 +116,7 @@ pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { /// Average packed unsigned 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpavgw))] pub fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 { unsafe { pavgw(a, b) } } @@ -107,6 +124,7 @@ pub fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 { /// Average packed unsigned 8-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpavgb))] pub fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 { unsafe { pavgb(a, b) } } @@ -118,6 +136,7 @@ pub fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 { /// Blend packed 8-bit integers from `a` and `b` using `mask`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpblendvb))] pub fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 { unsafe { pblendvb(a,b,mask) } } @@ -143,6 +162,7 @@ pub fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 { /// Compare packed 64-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpeqq))] pub fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 { a.eq(b) } @@ -150,6 +170,7 @@ pub fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 { /// Compare packed 32-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpeqd))] pub fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 { a.eq(b) } @@ -157,6 +178,7 @@ pub fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Compare packed 16-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpeqw))] pub fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 { a.eq(b) } @@ -164,6 +186,7 @@ pub fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Compare packed 8-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpeqb))] pub fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 { a.eq(b) } @@ -171,6 +194,7 @@ pub fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 { /// Compare packed 64-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpgtq))] pub fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 { a.gt(b) } @@ -178,6 +202,7 @@ pub fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 { /// Compare packed 32-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpgtd))] pub fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 { a.gt(b) } @@ -185,6 +210,7 @@ pub fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Compare packed 16-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpgtw))] pub fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 { a.gt(b) } @@ -192,6 +218,7 @@ pub fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Compare packed 8-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpgtb))] pub fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 { a.gt(b) } @@ -213,6 +240,7 @@ pub fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 { /// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphaddw))] pub fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { phaddw(a, b) } } @@ -220,6 +248,7 @@ pub fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Horizontally add adjacent pairs of 32-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphaddd))] pub fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { phaddd(a, b) } } @@ -228,6 +257,7 @@ pub fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 { /// using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphaddsw))] pub fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { phaddsw(a, b) } } @@ -235,6 +265,7 @@ pub fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Horizontally substract adjacent pairs of 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphsubw))] pub fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { phsubw(a, b) } } @@ -242,6 +273,7 @@ pub fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Horizontally substract adjacent pairs of 32-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphsubd))] pub fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { phsubd(a, b) } } @@ -250,6 +282,7 @@ pub fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 { /// using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphsubsw))] pub fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { phsubsw(a, b) } } @@ -294,6 +327,7 @@ pub fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 { /// of intermediate 32-bit integers. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaddwd))] pub fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 { unsafe { pmaddwd(a, b) } } @@ -304,6 +338,7 @@ pub fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 { /// signed 16-bit integers #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaddubsw))] pub fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 { unsafe { pmaddubsw(a, b) } } @@ -321,6 +356,7 @@ pub fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 { /// maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxsw))] pub fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { pmaxsw(a, b) } } @@ -329,6 +365,7 @@ pub fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 { /// maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxsd))] pub fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { pmaxsd(a, b) } } @@ -337,6 +374,7 @@ pub fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 { /// maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxsb))] pub fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { pmaxsb(a, b) } } @@ -345,6 +383,7 @@ pub fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 { /// the packed maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxuw))] pub fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { pmaxuw(a, b) } } @@ -353,6 +392,7 @@ pub fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 { /// the packed maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxud))] pub fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 { unsafe { pmaxud(a, b) } } @@ -361,6 +401,7 @@ pub fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 { /// the packed maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxub))] pub fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 { unsafe { pmaxub(a, b) } } @@ -369,6 +410,7 @@ pub fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 { /// minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminsw))] pub fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { pminsw(a, b) } } @@ -377,6 +419,7 @@ pub fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 { /// minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminsd))] pub fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { pminsd(a, b) } } @@ -385,6 +428,7 @@ pub fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 { /// minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminsb))] pub fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { pminsb(a, b) } } @@ -393,6 +437,7 @@ pub fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 { /// the packed minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminuw))] pub fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { pminuw(a, b) } } @@ -401,6 +446,7 @@ pub fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 { /// the packed minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminud))] pub fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 { unsafe { pminud(a, b) } } @@ -409,6 +455,7 @@ pub fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 { /// the packed minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminub))] pub fn _mm256_min_epu8(a: u8x32, b: u8x32) -> u8x32 { unsafe { pminub(a, b) } } @@ -444,6 +491,7 @@ pub fn _mm256_mpsadbw_epu8(a: u8x32, b: u8x32, imm8: i32) -> u16x16 { /// Return the 64-bit results. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmuldq))] pub fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 { unsafe { pmuldq(a, b) } } @@ -454,6 +502,7 @@ pub fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 { /// Return the unsigned 64-bit results. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmuludq))] pub fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 { unsafe { pmuludq(a, b) } } @@ -463,6 +512,7 @@ pub fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 { /// intermediate integers. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmulhw))] pub fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { pmulhw(a, b) } } @@ -472,6 +522,7 @@ pub fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 { /// intermediate integers. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmulhuw))] pub fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { pmulhuw(a, b) } } @@ -481,6 +532,7 @@ pub fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 { /// intermediate integers #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmullw))] pub fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 { a * b } @@ -491,6 +543,7 @@ pub fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 { /// intermediate integers #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmulld))] pub fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 { a * b } @@ -501,6 +554,7 @@ pub fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 { /// return bits [16:1] #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmulhrsw))] pub fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 { unsafe { pmulhrsw(a, b) } } @@ -509,6 +563,7 @@ pub fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 { /// and `b` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vorps))] pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { a | b } @@ -517,6 +572,7 @@ pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { /// using signed saturation #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpacksswb))] pub fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 { unsafe { packsswb(a, b) } } @@ -525,6 +581,7 @@ pub fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 { /// using signed saturation #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpackssdw))] pub fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 { unsafe { packssdw(a, b) } } @@ -533,6 +590,7 @@ pub fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 { /// using unsigned saturation #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpackuswb))] pub fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 { unsafe { packuswb(a, b) } } @@ -541,6 +599,7 @@ pub fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 { /// using unsigned saturation #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpackusdw))] pub fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 { unsafe { packusdw(a, b) } } @@ -557,6 +616,7 @@ pub fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 { /// integers in the low 16 bits of the 64-bit return value #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsadbw))] pub fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 { unsafe { psadbw(a, b) } } @@ -571,6 +631,7 @@ pub fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 { /// Results are zeroed out when the corresponding element in `b` is zero. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsignw))] pub fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { psignw(a, b) } } @@ -580,6 +641,7 @@ pub fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Results are zeroed out when the corresponding element in `b` is zero. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsignd))] pub fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { psignd(a, b) } } @@ -589,6 +651,7 @@ pub fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Results are zeroed out when the corresponding element in `b` is zero. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsignb))] pub fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { psignb(a, b) } } @@ -597,6 +660,7 @@ pub fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 { /// shifting in zeros, and return the result #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllw))] pub fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 { unsafe { psllw(a, count) } } @@ -605,6 +669,7 @@ pub fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 { /// shifting in zeros, and return the result #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpslld))] pub fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 { unsafe { pslld(a, count) } } @@ -613,6 +678,7 @@ pub fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 { /// shifting in zeros, and return the result #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllq))] pub fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 { unsafe { psllq(a, count) } } @@ -621,6 +687,7 @@ pub fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 { /// shifting in zeros, return the results; #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllw))] // TODO: should this be pslli pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 { unsafe { pslliw(a, imm8) } } @@ -629,6 +696,7 @@ pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 { /// shifting in zeros, return the results; #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpslld))] // TODO: should this be pslli pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 { unsafe { psllid(a, imm8) } } @@ -637,6 +705,7 @@ pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 { /// shifting in zeros, return the results; #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllq))] // TODO: should this be pslli pub fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 { unsafe { pslliq(a, imm8) } } @@ -648,6 +717,7 @@ pub fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 { /// shifting in zeros, and return the result. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllvd))] pub fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 { unsafe { psllvd(a, count) } } @@ -657,6 +727,7 @@ pub fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 { /// shifting in zeros, and return the result. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllvd))] pub fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 { unsafe { psllvd256(a, count) } } @@ -666,6 +737,7 @@ pub fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 { /// shifting in zeros, and return the result. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllvq))] pub fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 { unsafe { psllvq(a, count) } } @@ -675,6 +747,7 @@ pub fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 { /// shifting in zeros, and return the result. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllvq))] pub fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 { unsafe { psllvq256(a, count) } } @@ -683,6 +756,7 @@ pub fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 { /// shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsraw))] pub fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 { unsafe { psraw(a, count) } } @@ -691,6 +765,7 @@ pub fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 { /// shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrad))] pub fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 { unsafe { psrad(a, count) } } @@ -699,6 +774,7 @@ pub fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 { /// shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsraw))] // TODO: notvpsraiw? pub fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 { unsafe { psraiw(a, imm8) } } @@ -707,6 +783,7 @@ pub fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 { /// shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrad))] // TODO: not vpsraid? pub fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 { unsafe { psraid(a, imm8) } } @@ -715,6 +792,7 @@ pub fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 { /// corresponding element in `count` while shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsravd))] pub fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 { unsafe { psravd(a, count) } } @@ -723,6 +801,7 @@ pub fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 { /// corresponding element in `count` while shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsravd))] pub fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 { unsafe { psravd256(a, count) } } @@ -732,6 +811,7 @@ pub fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 { /// zeros. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlw))] pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 { unsafe { psrlw(a, count) } } @@ -740,6 +820,7 @@ pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 { /// zeros. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrld))] pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 { unsafe { psrld(a, count) } } @@ -748,6 +829,7 @@ pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 { /// zeros. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlq))] pub fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 { unsafe { psrlq(a, count) } } @@ -756,6 +838,7 @@ pub fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 { /// zeros #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlw))] // TODO not vpsrliw? pub fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 { unsafe { psrliw(a, imm8) } } @@ -764,6 +847,7 @@ pub fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 { /// zeros #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrld))] // TODO: not vpsrlid? pub fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 { unsafe { psrlid(a, imm8) } } @@ -772,6 +856,7 @@ pub fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 { /// zeros #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlq))] // TODO: not vpsrliq? pub fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 { unsafe { psrliq(a, imm8) } } @@ -780,6 +865,7 @@ pub fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 { /// the corresponding element in `count` while shifting in zeros, #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlvd))] pub fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 { unsafe { psrlvd(a, count) } } @@ -788,6 +874,7 @@ pub fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 { /// the corresponding element in `count` while shifting in zeros, #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlvd))] pub fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 { unsafe { psrlvd256(a, count) } } @@ -796,6 +883,7 @@ pub fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 { /// the corresponding element in `count` while shifting in zeros, #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlvq))] pub fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 { unsafe { psrlvq(a, count) } } @@ -804,6 +892,7 @@ pub fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 { /// the corresponding element in `count` while shifting in zeros, #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlvq))] pub fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 { unsafe { psrlvq256(a, count) } } @@ -813,6 +902,7 @@ pub fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 { /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubw))] pub fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 { a - b } @@ -820,6 +910,7 @@ pub fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubd))] pub fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 { a - b } @@ -827,6 +918,7 @@ pub fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubq))] pub fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 { a - b } @@ -834,6 +926,7 @@ pub fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 { /// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubb))] pub fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 { a - b } @@ -842,6 +935,7 @@ pub fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 { /// `a` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubsw))] pub fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { psubsw(a, b) } } @@ -850,6 +944,7 @@ pub fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 { /// `a` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubsb))] pub fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { psubsb(a, b) } } @@ -858,6 +953,7 @@ pub fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 { /// integers in `a` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubusw))] pub fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { psubusw(a, b) } } @@ -866,6 +962,7 @@ pub fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 { /// integers in `a` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubusb))] pub fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 { unsafe { psubusb(a, b) } } @@ -883,6 +980,7 @@ pub fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 { /// in `a` and `b` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vxorps))] pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { a ^ b } diff --git a/library/stdarch/src/x86/sse.rs b/library/stdarch/src/x86/sse.rs index 49bba60458fb..0500a371e9b1 100644 --- a/library/stdarch/src/x86/sse.rs +++ b/library/stdarch/src/x86/sse.rs @@ -127,7 +127,7 @@ pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 { } /// Compare the first single-precision (32-bit) floating-point element of `a` -/// and `b`, and return the minimum value in the first element of the return +/// and `b`, and return the minimum value in the first element of the return /// value, the other elements are copied from `a`. #[inline(always)] #[target_feature = "+sse"] @@ -146,7 +146,7 @@ pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 { } /// Compare the first single-precision (32-bit) floating-point element of `a` -/// and `b`, and return the maximum value in the first element of the return +/// and `b`, and return the maximum value in the first element of the return /// value, the other elements are copied from `a`. #[inline(always)] #[target_feature = "+sse"] @@ -168,6 +168,7 @@ pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 { /// from the high half of `a` and `b`; #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(unpckhps))] pub fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 { unsafe { simd_shuffle4(a, b, [2, 6, 3, 7]) } } diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index 955026e2b4d0..e9f34e8f1059 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -1,8 +1,12 @@ use v128::*; use x86::__m128i; +#[cfg(test)] +use assert_instr::assert_instr; + #[inline(always)] #[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pblendvb))] pub fn _mm_blendv_epi8( a: __m128i, b: __m128i, diff --git a/library/stdarch/src/x86/ssse3.rs b/library/stdarch/src/x86/ssse3.rs index 1d1497f5d0d1..a3ab4ed11cfc 100644 --- a/library/stdarch/src/x86/ssse3.rs +++ b/library/stdarch/src/x86/ssse3.rs @@ -1,15 +1,17 @@ use v128::*; +#[cfg(test)] +use assert_instr::assert_instr; + /// Compute the absolute value of packed 8-bit signed integers in `a` and /// return the unsigned results. #[inline(always)] #[target_feature = "+ssse3"] +#[cfg_attr(test, assert_instr(pabsb))] pub fn _mm_abs_epi8(a: i8x16) -> u8x16 { unsafe { pabsb128(a) } } - - /// Shuffle bytes from `a` according to the content of `b`. /// /// The last 4 bits of each byte of `b` are used as addresses @@ -36,6 +38,7 @@ pub fn _mm_abs_epi8(a: i8x16) -> u8x16 { /// ``` #[inline(always)] #[target_feature = "+ssse3"] +#[cfg_attr(test, assert_instr(pshufb))] pub fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 { unsafe { pshufb128(a, b) } } From b8bcdd93c609550d8ef0685d27f69816c7734e91 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 26 Sep 2017 12:37:39 -0700 Subject: [PATCH 3/7] Always test intrinsics unconditionally This commit alters the test suite to unconditionally compile and run all tests, regardless of the ambient target features enabled. This then uses a new convenience macro, `#[simd_test]`, to guard all tests with the appropriate `cfg_feature_enabled!` and also enable the `#[target_feature]` appropriately. --- library/stdarch/Cargo.toml | 2 +- library/stdarch/src/arm/v6.rs | 2 +- library/stdarch/src/arm/v7.rs | 2 +- library/stdarch/src/arm/v8.rs | 2 +- library/stdarch/src/lib.rs | 2 +- library/stdarch/src/macros.rs | 31 +- library/stdarch/src/x86/abm.rs | 18 +- library/stdarch/src/x86/avx.rs | 14 +- library/stdarch/src/x86/avx2.rs | 303 ++++++--------- library/stdarch/src/x86/bmi.rs | 43 +-- library/stdarch/src/x86/bmi2.rs | 34 +- library/stdarch/src/x86/mod.rs | 6 +- library/stdarch/src/x86/sse.rs | 65 ++-- library/stdarch/src/x86/sse2.rs | 357 +++++++++--------- library/stdarch/src/x86/sse41.rs | 13 +- library/stdarch/src/x86/sse42.rs | 7 +- library/stdarch/src/x86/ssse3.rs | 10 +- library/stdarch/src/x86/tbm.rs | 75 ++-- .../{assert-instr => stdsimd-test}/Cargo.toml | 3 +- .../assert-instr-macro/Cargo.toml | 0 .../assert-instr-macro/build.rs | 0 .../assert-instr-macro/src/lib.rs | 2 +- .../stdsimd-test/simd-test-macro/Cargo.toml | 11 + .../stdsimd-test/simd-test-macro/src/lib.rs | 76 ++++ .../{assert-instr => stdsimd-test}/src/lib.rs | 4 +- 25 files changed, 519 insertions(+), 563 deletions(-) rename library/stdarch/{assert-instr => stdsimd-test}/Cargo.toml (76%) rename library/stdarch/{assert-instr => stdsimd-test}/assert-instr-macro/Cargo.toml (100%) rename library/stdarch/{assert-instr => stdsimd-test}/assert-instr-macro/build.rs (100%) rename library/stdarch/{assert-instr => stdsimd-test}/assert-instr-macro/src/lib.rs (97%) create mode 100644 library/stdarch/stdsimd-test/simd-test-macro/Cargo.toml create mode 100644 library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs rename library/stdarch/{assert-instr => stdsimd-test}/src/lib.rs (98%) diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml index 59806630b68a..76f09868bcad 100644 --- a/library/stdarch/Cargo.toml +++ b/library/stdarch/Cargo.toml @@ -19,4 +19,4 @@ debug = true opt-level = 3 [dev-dependencies] -assert-instr = { path = "assert-instr" } +stdsimd-test = { path = "stdsimd-test" } diff --git a/library/stdarch/src/arm/v6.rs b/library/stdarch/src/arm/v6.rs index 233481c3be97..ef185a3e3ede 100644 --- a/library/stdarch/src/arm/v6.rs +++ b/library/stdarch/src/arm/v6.rs @@ -4,7 +4,7 @@ //! Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.html). #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; /// Reverse the order of the bytes. #[inline(always)] diff --git a/library/stdarch/src/arm/v7.rs b/library/stdarch/src/arm/v7.rs index 4112212aa363..b98b35c15e30 100644 --- a/library/stdarch/src/arm/v7.rs +++ b/library/stdarch/src/arm/v7.rs @@ -6,7 +6,7 @@ pub use super::v6::*; #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; /// Count Leading Zeros. #[inline(always)] diff --git a/library/stdarch/src/arm/v8.rs b/library/stdarch/src/arm/v8.rs index 6438f2d316ff..3a11939245d2 100644 --- a/library/stdarch/src/arm/v8.rs +++ b/library/stdarch/src/arm/v8.rs @@ -5,7 +5,7 @@ pub use super::v7::*; #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; /// Reverse the order of the bytes. #[inline(always)] diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index 46456f9dbff0..7c96e9b2a203 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -89,7 +89,7 @@ #![cfg_attr(test, feature(proc_macro))] #[cfg(test)] -extern crate assert_instr; +extern crate stdsimd_test; /// Platform independent SIMD vector types and operations. pub mod simd { diff --git a/library/stdarch/src/macros.rs b/library/stdarch/src/macros.rs index 7ba8c1e01b37..1fa28246ecaf 100644 --- a/library/stdarch/src/macros.rs +++ b/library/stdarch/src/macros.rs @@ -23,12 +23,12 @@ macro_rules! define_impl { $($elname:ident),+ ) => { impl $name { - #[inline] + #[inline(always)] pub fn new($($elname: $elemty),*) -> $name { $name($($elname),*) } - #[inline] + #[inline(always)] pub fn splat(value: $elemty) -> $name { $name($({ #[allow(non_camel_case_types, dead_code)] @@ -37,25 +37,25 @@ macro_rules! define_impl { }),*) } - #[inline] + #[inline(always)] pub fn extract(self, idx: u32) -> $elemty { assert!(idx < $nelems); unsafe { simd_extract(self, idx) } } - #[inline] + #[inline(always)] pub fn replace(self, idx: u32, val: $elemty) -> $name { assert!(idx < $nelems); unsafe { simd_insert(self, idx, val) } } - #[inline] + #[inline(always)] pub fn store(self, slice: &mut [$elemty], offset: usize) { assert!(slice[offset..].len() >= $nelems); unsafe { self.store_unchecked(slice, offset) } } - #[inline] + #[inline(always)] pub unsafe fn store_unchecked( self, slice: &mut [$elemty], @@ -70,13 +70,13 @@ macro_rules! define_impl { size_of::<$name>()); } - #[inline] + #[inline(always)] pub fn load(slice: &[$elemty], offset: usize) -> $name { assert!(slice[offset..].len() >= $nelems); unsafe { $name::load_unchecked(slice, offset) } } - #[inline] + #[inline(always)] pub unsafe fn load_unchecked( slice: &[$elemty], offset: usize, @@ -92,32 +92,32 @@ macro_rules! define_impl { x } - #[inline] + #[inline(always)] pub fn eq(self, other: $name) -> $boolname { unsafe { simd_eq(self, other) } } - #[inline] + #[inline(always)] pub fn ne(self, other: $name) -> $boolname { unsafe { simd_ne(self, other) } } - #[inline] + #[inline(always)] pub fn lt(self, other: $name) -> $boolname { unsafe { simd_lt(self, other) } } - #[inline] + #[inline(always)] pub fn le(self, other: $name) -> $boolname { unsafe { simd_le(self, other) } } - #[inline] + #[inline(always)] pub fn gt(self, other: $name) -> $boolname { unsafe { simd_gt(self, other) } } - #[inline] + #[inline(always)] pub fn ge(self, other: $name) -> $boolname { unsafe { simd_ge(self, other) } } @@ -129,6 +129,7 @@ macro_rules! define_from { ($to:ident, $($from:ident),+) => { $( impl From<$from> for $to { + #[inline(always)] fn from(f: $from) -> $to { unsafe { ::std::mem::transmute(f) } } @@ -259,7 +260,7 @@ macro_rules! define_casts { ($(($fromty:ident, $toty:ident, $cast:ident)),+) => { $( impl $fromty { - #[inline] + #[inline(always)] pub fn $cast(self) -> ::simd::$toty { unsafe { simd_cast(self) } } diff --git a/library/stdarch/src/x86/abm.rs b/library/stdarch/src/x86/abm.rs index 19f50de2190f..de47f7fcb6d0 100644 --- a/library/stdarch/src/x86/abm.rs +++ b/library/stdarch/src/x86/abm.rs @@ -11,7 +11,7 @@ //! provides a quick overview of the instructions available. #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; /// Counts the leading most significant zero bits. /// @@ -41,30 +41,28 @@ pub fn _popcnt32(x: u32) -> u32 { x.count_ones() } #[cfg_attr(test, assert_instr(popcnt))] pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 } -#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { + use stdsimd_test::simd_test; + use x86::abm; - #[test] - #[target_feature = "+lzcnt"] + #[simd_test = "lzcnt"] fn _lzcnt_u32() { assert_eq!(abm::_lzcnt_u32(0b0101_1010u32), 25u32); } - #[test] - #[target_feature = "+lzcnt"] + #[simd_test = "lzcnt"] fn _lzcnt_u64() { assert_eq!(abm::_lzcnt_u64(0b0101_1010u64), 57u64); } - #[test] - #[target_feature = "+popcnt"] + #[simd_test = "popcnt"] fn _popcnt32() { assert_eq!(abm::_popcnt32(0b0101_1010u32), 4); } - #[test] - #[target_feature = "+popcnt"] + #[simd_test = "popcnt"] fn _popcnt64() { assert_eq!(abm::_popcnt64(0b0101_1010u64), 4); } diff --git a/library/stdarch/src/x86/avx.rs b/library/stdarch/src/x86/avx.rs index 7b23d1e6cde5..7983f3e745c1 100644 --- a/library/stdarch/src/x86/avx.rs +++ b/library/stdarch/src/x86/avx.rs @@ -30,14 +30,14 @@ extern "C" { fn addsubpd256(a: f64x4, b:f64x4) -> f64x4; } - -#[cfg(all(test, target_feature = "avx", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { + use stdsimd_test::simd_test; + use v256::*; use x86::avx; - #[test] - #[target_feature = "+avx"] + #[simd_test = "avx"] fn _mm256_add_pd() { let a = f64x4::new(1.0, 2.0, 3.0, 4.0); let b = f64x4::new(5.0, 6.0, 7.0, 8.0); @@ -46,8 +46,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx"] + #[simd_test = "avx"] fn _mm256_add_ps() { let a = f32x8::new(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); let b = f32x8::new(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0); @@ -56,8 +55,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx"] + #[simd_test = "avx"] fn _mm256_addsub_pd() { let a = f64x4::new(1.0, 2.0, 3.0, 4.0); let b = f64x4::new(5.0, 6.0, 7.0, 8.0); diff --git a/library/stdarch/src/x86/avx2.rs b/library/stdarch/src/x86/avx2.rs index ac81ccb9dea1..f11192d87b58 100644 --- a/library/stdarch/src/x86/avx2.rs +++ b/library/stdarch/src/x86/avx2.rs @@ -1044,16 +1044,17 @@ extern "C" { } -#[cfg(all(test, target_feature = "avx2", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { + use stdsimd_test::simd_test; + use v256::*; use v128::*; use x86::avx2; use x86::__m256i; use std; - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_abs_epi32() { let a = i32x8::new( 0, 1, -1, std::i32::MAX, @@ -1065,8 +1066,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_abs_epi16() { let a = i16x16::new( 0, 1, -1, 2, @@ -1082,8 +1082,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_abs_epi8() { let a = i8x32::new( 0, 1, -1, 2, @@ -1103,8 +1102,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_add_epi64() { let a = i64x4::new(-10, 0, 100, 1_000_000_000); let b = i64x4::new(-1, 0, 1, 2); @@ -1113,8 +1111,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_add_epi32() { let a = i32x8::new(-1, 0, 1, 2, 3, 4, 5, 6); let b = i32x8::new(1, 2, 3, 4, 5, 6, 7, 8); @@ -1123,8 +1120,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_add_epi16() { let a = i16x16::new( 0, 1, 2, 3, 4, 5, 6, 7, @@ -1139,8 +1135,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_add_epi8() { let a = i8x32::new( 0, 1, 2, 3, 4, 5, 6, 7, @@ -1161,8 +1156,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epi8() { let a = i8x32::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -1177,8 +1171,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epi8_saturate_positive() { let a = i8x32::splat(0x7F); let b = i8x32::splat(1); @@ -1186,8 +1179,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epi8_saturate_negative() { let a = i8x32::splat(-0x80); let b = i8x32::splat(-1); @@ -1195,8 +1187,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epi16() { let a = i16x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -1209,8 +1200,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epi16_saturate_positive() { let a = i16x16::splat(0x7FFF); let b = i16x16::splat(1); @@ -1218,8 +1208,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epi16_saturate_negative() { let a = i16x16::splat(-0x8000); let b = i16x16::splat(-1); @@ -1227,8 +1216,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epu8() { let a = u8x32::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -1243,8 +1231,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epu8_saturate() { let a = u8x32::splat(0xFF); let b = u8x32::splat(1); @@ -1253,8 +1240,7 @@ mod tests { } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epu16() { let a = u16x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -1267,8 +1253,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_adds_epu16_saturate() { let a = u16x16::splat(0xFFFF); let b = u16x16::splat(1); @@ -1276,40 +1261,35 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_and_si256() { assert_eq!( avx2::_mm256_and_si256( __m256i::splat(5), __m256i::splat(3)),__m256i::splat(1)); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_andnot_si256() { assert_eq!( avx2::_mm256_andnot_si256(__m256i::splat(5), __m256i::splat(3)), __m256i::splat(2)); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_avg_epu8() { let (a, b) = (u8x32::splat(3), u8x32::splat(9)); let r = avx2::_mm256_avg_epu8(a, b); assert_eq!(r, u8x32::splat(6)); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_avg_epu16() { let (a, b) = (u16x16::splat(3), u16x16::splat(9)); let r = avx2::_mm256_avg_epu16(a, b); assert_eq!(r, u16x16::splat(6)); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_blendv_epi8() { let (a,b) = (i8x32::splat(4),i8x32::splat(2)); let mask = i8x32::splat(0).replace(2,-1); @@ -1318,7 +1298,7 @@ mod tests { assert_eq!(r,e); } - #[test] + #[simd_test = "avx2"] fn _mm256_cmpeq_epi8() { let a = i8x32::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -1330,7 +1310,7 @@ mod tests { assert_eq!(r, i8x32::splat(0).replace(2,0xFFu8 as i8)); } - #[test] + #[simd_test = "avx2"] fn _mm256_cmpeq_epi16() { let a = i16x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -1340,7 +1320,7 @@ mod tests { assert_eq!(r, i16x16::splat(0).replace(2, 0xFFFFu16 as i16)); } - #[test] + #[simd_test = "avx2"] fn _mm256_cmpeq_epi32() { let a = i32x8::new(0, 1, 2, 3,4,5,6,7); let b = i32x8::new(7,6,2,4,3, 2, 1, 0); @@ -1348,7 +1328,7 @@ mod tests { assert_eq!(r, i32x8::splat(0).replace(2, 0xFFFFFFFFu32 as i32)); } - #[test] + #[simd_test = "avx2"] fn _mm256_cmpeq_epi64() { let a = i64x4::new(0, 1, 2, 3); let b = i64x4::new(3, 2, 2, 0); @@ -1357,7 +1337,7 @@ mod tests { 2, 0xFFFFFFFFFFFFFFFFu64 as i64)); } - #[test] + #[simd_test = "avx2"] fn _mm256_cmpgt_epi8() { let a = i8x32::splat(0).replace(0, 5); let b = i8x32::splat(0); @@ -1365,7 +1345,7 @@ mod tests { assert_eq!(r, i8x32::splat(0).replace(0, 0xFFu8 as i8)); } - #[test] + #[simd_test = "avx2"] fn _mm256_cmpgt_epi16() { let a = i16x16::splat(0).replace(0, 5); let b = i16x16::splat(0); @@ -1373,7 +1353,7 @@ mod tests { assert_eq!(r, i16x16::splat(0).replace(0, 0xFFFFu16 as i16)); } - #[test] + #[simd_test = "avx2"] fn _mm256_cmpgt_epi32() { let a = i32x8::splat(0).replace(0, 5); let b = i32x8::splat(0); @@ -1381,7 +1361,7 @@ mod tests { assert_eq!(r, i32x8::splat(0).replace(0, 0xFFFFFFFFu32 as i32)); } - #[test] + #[simd_test = "avx2"] fn _mm256_cmpgt_epi64() { let a = i64x4::splat(0).replace(0, 5); let b = i64x4::splat(0); @@ -1390,8 +1370,7 @@ mod tests { 0, 0xFFFFFFFFFFFFFFFFu64 as i64)); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_hadd_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(4); @@ -1400,8 +1379,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_hadd_epi32() { let a = i32x8::splat(2); let b = i32x8::splat(4); @@ -1410,8 +1388,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_hadds_epi16() { let a = i16x16::splat(2).replace(0,0x7FFF).replace(1,1); let b = i16x16::splat(4); @@ -1421,8 +1398,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_hsub_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(4); @@ -1431,8 +1407,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_hsub_epi32() { let a = i32x8::splat(2); let b = i32x8::splat(4); @@ -1441,8 +1416,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_hsubs_epi16() { let a = i16x16::splat(2).replace(0,0x7FFF).replace(1,-1); let b = i16x16::splat(4); @@ -1451,8 +1425,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_madd_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(4); @@ -1461,8 +1434,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_maddubs_epi16() { let a = u8x32::splat(2); let b = u8x32::splat(4); @@ -1471,8 +1443,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_max_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(4); @@ -1480,8 +1451,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_max_epi32() { let a = i32x8::splat(2); let b = i32x8::splat(4); @@ -1489,8 +1459,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_max_epi8() { let a = i8x32::splat(2); let b = i8x32::splat(4); @@ -1498,8 +1467,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_max_epu16() { let a = u16x16::splat(2); let b = u16x16::splat(4); @@ -1507,8 +1475,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_max_epu32() { let a = u32x8::splat(2); let b = u32x8::splat(4); @@ -1516,8 +1483,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_max_epu8() { let a = u8x32::splat(2); let b = u8x32::splat(4); @@ -1525,8 +1491,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_min_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(4); @@ -1534,8 +1499,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_min_epi32() { let a = i32x8::splat(2); let b = i32x8::splat(4); @@ -1543,8 +1507,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_min_epi8() { let a = i8x32::splat(2); let b = i8x32::splat(4); @@ -1552,8 +1515,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_min_epu16() { let a = u16x16::splat(2); let b = u16x16::splat(4); @@ -1561,8 +1523,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_min_epu32() { let a = u32x8::splat(2); let b = u32x8::splat(4); @@ -1570,8 +1531,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_min_epu8() { let a = u8x32::splat(2); let b = u8x32::splat(4); @@ -1603,8 +1563,7 @@ mod tests { } **/ - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_mul_epi32() { let a = i32x8::new(0, 0, 0, 0, 2, 2, 2, 2); let b = i32x8::new(1, 2, 3, 4, 5, 6, 7, 8); @@ -1613,8 +1572,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_mul_epu32() { let a = u32x8::new(0, 0, 0, 0, 2, 2, 2, 2); let b = u32x8::new(1, 2, 3, 4, 5, 6, 7, 8); @@ -1623,8 +1581,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_mulhi_epi16() { let a = i16x16::splat(6535); let b = i16x16::splat(6535); @@ -1633,8 +1590,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_mulhi_epu16() { let a = u16x16::splat(6535); let b = u16x16::splat(6535); @@ -1643,8 +1599,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_mullo_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(4); @@ -1653,8 +1608,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_mullo_epi32() { let a = i32x8::splat(2); let b = i32x8::splat(4); @@ -1663,8 +1617,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_mulhrs_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(4); @@ -1673,8 +1626,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_or_si256() { let a = __m256i::splat(-1); let b = __m256i::splat(0); @@ -1682,8 +1634,7 @@ mod tests { assert_eq!(r, a); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_packs_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(4); @@ -1697,8 +1648,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_packs_epi32() { let a = i32x8::splat(2); let b = i32x8::splat(4); @@ -1712,8 +1662,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_packus_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(4); @@ -1727,8 +1676,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_packus_epi32() { let a = i32x8::splat(2); let b = i32x8::splat(4); @@ -1742,8 +1690,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sad_epu8() { let a = u8x32::splat(2); let b = u8x32::splat(4); @@ -1752,8 +1699,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sign_epi16() { let a = i16x16::splat(2); let b = i16x16::splat(-1); @@ -1762,8 +1708,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sign_epi32() { let a = i32x8::splat(2); let b = i32x8::splat(-1); @@ -1772,8 +1717,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sign_epi8() { let a = i8x32::splat(2); let b = i8x32::splat(-1); @@ -1782,8 +1726,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sll_epi16() { assert_eq!( avx2::_mm256_sll_epi16(i16x16::splat(0xFF), i16x8::splat(0).replace(0,4)), @@ -1791,8 +1734,7 @@ mod tests { } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sll_epi32() { assert_eq!( avx2::_mm256_sll_epi32(i32x8::splat(0xFFFF), i32x4::splat(0).replace(0,4)), @@ -1800,8 +1742,7 @@ mod tests { } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sll_epi64() { assert_eq!( avx2::_mm256_sll_epi64(i64x4::splat(0xFFFFFFFF), i64x2::splat(0).replace(0,4)), @@ -1809,32 +1750,28 @@ mod tests { } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_slli_epi16() { assert_eq!( avx2::_mm256_slli_epi16(i16x16::splat(0xFF), 4), i16x16::splat(0xFF0)); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_slli_epi32() { assert_eq!( avx2::_mm256_slli_epi32(i32x8::splat(0xFFFF), 4), i32x8::splat(0xFFFF0)); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_slli_epi64() { assert_eq!( avx2::_mm256_slli_epi64(i64x4::splat(0xFFFFFFFF), 4), i64x4::splat(0xFFFFFFFF0)); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm_sllv_epi32() { let a = i32x4::splat(2); let b = i32x4::splat(1); @@ -1843,8 +1780,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sllv_epi32() { let a = i32x8::splat(2); let b = i32x8::splat(1); @@ -1852,8 +1788,8 @@ mod tests { let e = i32x8::splat(4); assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + + #[simd_test = "avx2"] fn _mm_sllv_epi64() { let a = i64x2::splat(2); let b = i64x2::splat(1); @@ -1861,8 +1797,8 @@ mod tests { let e = i64x2::splat(4); assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + + #[simd_test = "avx2"] fn _mm256_sllv_epi64() { let a = i64x4::splat(2); let b = i64x4::splat(1); @@ -1871,8 +1807,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_sra_epi16() { assert_eq!( avx2::_mm256_sra_epi16( @@ -1880,8 +1815,7 @@ mod tests { i16x16::splat(-1)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_sra_epi32() { assert_eq!( avx2::_mm256_sra_epi32( @@ -1889,24 +1823,21 @@ mod tests { i32x8::splat(-1)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_srai_epi16() { assert_eq!( avx2::_mm256_srai_epi16( i16x16::splat(-1), 1), i16x16::splat(-1)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_srai_epi32() { assert_eq!( avx2::_mm256_srai_epi32( i32x8::splat(-1), 1), i32x8::splat(-1)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm_srav_epi32() { let a = i32x4::splat(4); let count = i32x4::splat(1); @@ -1915,8 +1846,7 @@ mod tests { assert_eq!(r, e ); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_srav_epi32() { let a = i32x8::splat(4); let count = i32x8::splat(1); @@ -1925,8 +1855,7 @@ mod tests { assert_eq!(r, e ); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_srl_epi16() { assert_eq!( avx2::_mm256_srl_epi16( @@ -1934,8 +1863,7 @@ mod tests { i16x16::splat(0xF)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_srl_epi32() { assert_eq!( avx2::_mm256_srl_epi32( @@ -1943,8 +1871,7 @@ mod tests { i32x8::splat(0xFFF)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_srl_epi64() { assert_eq!( avx2::_mm256_srl_epi64( @@ -1952,32 +1879,28 @@ mod tests { i64x4::splat(0xFFFFFFF)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_srli_epi16() { assert_eq!( avx2::_mm256_srli_epi16(i16x16::splat(0xFF), 4), i16x16::splat(0xF)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_srli_epi32() { assert_eq!( avx2::_mm256_srli_epi32(i32x8::splat(0xFFFF), 4), i32x8::splat(0xFFF)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm256_srli_epi64() { assert_eq!( avx2::_mm256_srli_epi64(i64x4::splat(0xFFFFFFFF), 4), i64x4::splat(0xFFFFFFF)); } - #[test] - #[target_feature ="+avx2"] + #[simd_test = "avx2"] fn _mm_srlv_epi32() { let a = i32x4::splat(2); let count = i32x4::splat(1); @@ -1986,8 +1909,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_srlv_epi32() { let a = i32x8::splat(2); let count = i32x8::splat(1); @@ -1996,8 +1918,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm_srlv_epi64() { let a = i64x2::splat(2); let count = i64x2::splat(1); @@ -2007,8 +1928,7 @@ mod tests { } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_srlv_epi64() { let a = i64x4::splat(2); let count = i64x4::splat(1); @@ -2017,8 +1937,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sub_epi16() { let a = i16x16::splat(4); let b = i16x16::splat(2); @@ -2026,8 +1945,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sub_epi32() { let a = i32x8::splat(4); let b = i32x8::splat(2); @@ -2035,8 +1953,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sub_epi64() { let a = i64x4::splat(4); let b = i64x4::splat(2); @@ -2044,8 +1961,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_sub_epi8() { let a = i8x32::splat(4); let b = i8x32::splat(2); @@ -2053,8 +1969,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_subs_epi16() { let a = i16x16::splat(4); let b = i16x16::splat(2); @@ -2062,8 +1977,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_subs_epi8() { let a = i8x32::splat(4); let b = i8x32::splat(2); @@ -2071,8 +1985,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_subs_epu16() { let a = u16x16::splat(4); let b = u16x16::splat(2); @@ -2080,8 +1993,7 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_subs_epu8() { let a = u8x32::splat(4); let b = u8x32::splat(2); @@ -2089,13 +2001,10 @@ mod tests { assert_eq!(r, b); } - #[test] - #[target_feature = "+avx2"] + #[simd_test = "avx2"] fn _mm256_xor_si256() { assert_eq!( avx2::_mm256_xor_si256(__m256i::splat(5), __m256i::splat(3)), __m256i::splat(6)); } - - } diff --git a/library/stdarch/src/x86/bmi.rs b/library/stdarch/src/x86/bmi.rs index b22a0608bf09..44842c82cdef 100644 --- a/library/stdarch/src/x86/bmi.rs +++ b/library/stdarch/src/x86/bmi.rs @@ -8,7 +8,7 @@ //! provides a quick overview of the available instructions. #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; #[allow(dead_code)] extern "C" { @@ -185,22 +185,21 @@ pub fn _mm_tzcnt_u64(x: u64) -> u64 { #[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { + use stdsimd_test::simd_test; + use x86::bmi; - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _bextr_u32() { assert_eq!(bmi::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _bextr_u64() { assert_eq!(bmi::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _andn_u32() { assert_eq!(bmi::_andn_u32(0, 0), 0); assert_eq!(bmi::_andn_u32(0, 1), 1); @@ -214,8 +213,7 @@ mod tests { assert_eq!(bmi::_andn_u32(0b0100_0000u32, 0b0101_1101u32), 0b0001_1101u32); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _andn_u64() { assert_eq!(bmi::_andn_u64(0, 0), 0); assert_eq!(bmi::_andn_u64(0, 1), 1); @@ -229,62 +227,53 @@ mod tests { assert_eq!(bmi::_andn_u64(0b0100_0000u64, 0b0101_1101u64), 0b0001_1101u64); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _blsi_u32() { assert_eq!(bmi::_blsi_u32(0b1101_0000u32), 0b0001_0000u32); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _blsi_u64() { assert_eq!(bmi::_blsi_u64(0b1101_0000u64), 0b0001_0000u64); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _blsmsk_u32() { assert_eq!(bmi::_blsmsk_u32(0b0011_0000u32), 0b0001_1111u32); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _blsmsk_u64() { assert_eq!(bmi::_blsmsk_u64(0b0011_0000u64), 0b0001_1111u64); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _blsr_u32() { /// TODO: test the behavior when the input is 0 assert_eq!(bmi::_blsr_u32(0b0011_0000u32), 0b0010_0000u32); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _blsr_u64() { /// TODO: test the behavior when the input is 0 assert_eq!(bmi::_blsr_u64(0b0011_0000u64), 0b0010_0000u64); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _tzcnt_u16() { assert_eq!(bmi::_tzcnt_u16(0b0000_0001u16), 0u16); assert_eq!(bmi::_tzcnt_u16(0b0000_0000u16), 16u16); assert_eq!(bmi::_tzcnt_u16(0b1001_0000u16), 4u16); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _tzcnt_u32() { assert_eq!(bmi::_tzcnt_u32(0b0000_0001u32), 0u32); assert_eq!(bmi::_tzcnt_u32(0b0000_0000u32), 32u32); assert_eq!(bmi::_tzcnt_u32(0b1001_0000u32), 4u32); } - #[test] - #[target_feature = "+bmi"] + #[simd_test = "bmi"] fn _tzcnt_u64() { assert_eq!(bmi::_tzcnt_u64(0b0000_0001u64), 0u64); assert_eq!(bmi::_tzcnt_u64(0b0000_0000u64), 64u64); diff --git a/library/stdarch/src/x86/bmi2.rs b/library/stdarch/src/x86/bmi2.rs index ed1c2e3b9353..09afe87e469a 100644 --- a/library/stdarch/src/x86/bmi2.rs +++ b/library/stdarch/src/x86/bmi2.rs @@ -8,7 +8,7 @@ //! provides a quick overview of the available instructions. #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; /// Unsigned multiply without affecting flags. /// @@ -112,12 +112,13 @@ pub fn _pext_u64(a: u64, mask: u64) -> u64 { unsafe { x86_bmi2_pext_64(a, mask) } } -#[cfg(all(test, target_feature = "bmi2", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { + use stdsimd_test::simd_test; + use x86::bmi2; - #[test] - #[target_feature = "+bmi2"] + #[simd_test = "bmi2"] fn _pext_u32() { let n = 0b1011_1110_1001_0011u32; @@ -131,8 +132,8 @@ mod tests { assert_eq!(bmi2::_pext_u32(n, m1), s1); } - #[test] - #[target_feature = "+bmi2"] + #[simd_test = "bmi2"] + #[cfg(not(target_arch = "x86"))] fn _pext_u64() { let n = 0b1011_1110_1001_0011u64; @@ -146,8 +147,7 @@ mod tests { assert_eq!(bmi2::_pext_u64(n, m1), s1); } - #[test] - #[target_feature = "+bmi2"] + #[simd_test = "bmi2"] fn _pdep_u32() { let n = 0b1011_1110_1001_0011u32; @@ -161,8 +161,8 @@ mod tests { assert_eq!(bmi2::_pdep_u32(n, m1), s1); } - #[test] - #[target_feature = "+bmi2"] + #[simd_test = "bmi2"] + #[cfg(not(target_arch = "x86"))] fn _pdep_u64() { let n = 0b1011_1110_1001_0011u64; @@ -176,24 +176,22 @@ mod tests { assert_eq!(bmi2::_pdep_u64(n, m1), s1); } - #[test] - #[target_feature = "+bmi2"] + #[simd_test = "bmi2"] fn _bzhi_u32() { let n = 0b1111_0010u32; let s = 0b0001_0010u32; assert_eq!(bmi2::_bzhi_u32(n, 5), s); } - #[test] - #[target_feature = "+bmi2"] + #[simd_test = "bmi2"] + #[cfg(not(target_arch = "x86"))] fn _bzhi_u64() { let n = 0b1111_0010u64; let s = 0b0001_0010u64; assert_eq!(bmi2::_bzhi_u64(n, 5), s); } - #[test] - #[target_feature = "+bmi2"] + #[simd_test = "bmi2"] fn _mulx_u32() { let a: u32 = 4_294_967_200; let b: u32 = 2; @@ -205,8 +203,8 @@ mod tests { assert_eq!(hi, 0b0001u32); } - #[test] - #[target_feature = "+bmi2"] + #[simd_test = "bmi2"] + #[cfg(not(target_arch = "x86"))] fn _mulx_u64() { let a: u64 = 9_223_372_036_854_775_800; let b: u64 = 100; diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs index 9b0f262bfbcd..13e11a48b64f 100644 --- a/library/stdarch/src/x86/mod.rs +++ b/library/stdarch/src/x86/mod.rs @@ -20,6 +20,9 @@ pub type __m256i = ::v256::i8x32; #[macro_use] mod macros; +#[macro_use] +mod runtime; + mod sse; mod sse2; mod ssse3; @@ -32,6 +35,3 @@ mod abm; mod bmi; mod bmi2; mod tbm; - -#[macro_use] -mod runtime; diff --git a/library/stdarch/src/x86/sse.rs b/library/stdarch/src/x86/sse.rs index 49bba60458fb..f6deae35ec0c 100644 --- a/library/stdarch/src/x86/sse.rs +++ b/library/stdarch/src/x86/sse.rs @@ -2,7 +2,7 @@ use simd_llvm::simd_shuffle4; use v128::*; #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; /// Adds the first component of `a` and `b`, the other components are copied /// from `a`. @@ -217,13 +217,13 @@ extern { fn movmskps(a: f32x4) -> i32; } -#[cfg(all(test, target_feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { use v128::*; use x86::sse; + use stdsimd_test::simd_test; - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_add_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -231,8 +231,7 @@ mod tests { assert_eq!(r, f32x4::new(-101.0, 25.0, 0.0, -15.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_add_ss() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -240,8 +239,7 @@ mod tests { assert_eq!(r, f32x4::new(-101.0, 5.0, 0.0, -10.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_sub_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -249,8 +247,7 @@ mod tests { assert_eq!(r, f32x4::new(99.0, -15.0, 0.0, -5.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_sub_ss() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -258,8 +255,7 @@ mod tests { assert_eq!(r, f32x4::new(99.0, 5.0, 0.0, -10.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_mul_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -267,8 +263,7 @@ mod tests { assert_eq!(r, f32x4::new(100.0, 100.0, 0.0, 50.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_mul_ss() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -276,8 +271,7 @@ mod tests { assert_eq!(r, f32x4::new(100.0, 5.0, 0.0, -10.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_div_ps() { let a = f32x4::new(-1.0, 5.0, 2.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.2, -5.0); @@ -285,8 +279,7 @@ mod tests { assert_eq!(r, f32x4::new(0.01, 0.25, 10.0, 2.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_div_ss() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -294,8 +287,7 @@ mod tests { assert_eq!(r, f32x4::new(0.01, 5.0, 0.0, -10.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_sqrt_ss() { let a = f32x4::new(4.0, 13.0, 16.0, 100.0); let r = sse::_mm_sqrt_ss(a); @@ -303,8 +295,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_sqrt_ps() { let a = f32x4::new(4.0, 13.0, 16.0, 100.0); let r = sse::_mm_sqrt_ps(a); @@ -312,8 +303,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_rcp_ss() { let a = f32x4::new(4.0, 13.0, 16.0, 100.0); let r = sse::_mm_rcp_ss(a); @@ -321,8 +311,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_rcp_ps() { let a = f32x4::new(4.0, 13.0, 16.0, 100.0); let r = sse::_mm_rcp_ps(a); @@ -330,8 +319,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_rsqrt_ss() { let a = f32x4::new(4.0, 13.0, 16.0, 100.0); let r = sse::_mm_rsqrt_ss(a); @@ -339,8 +327,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_rsqrt_ps() { let a = f32x4::new(4.0, 13.0, 16.0, 100.0); let r = sse::_mm_rsqrt_ps(a); @@ -348,8 +335,7 @@ mod tests { assert_eq!(r, e); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_min_ss() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -357,8 +343,7 @@ mod tests { assert_eq!(r, f32x4::new(-100.0, 5.0, 0.0, -10.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_min_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -366,8 +351,7 @@ mod tests { assert_eq!(r, f32x4::new(-100.0, 5.0, 0.0, -10.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_max_ss() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -375,8 +359,7 @@ mod tests { assert_eq!(r, f32x4::new(-1.0, 5.0, 0.0, -10.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_max_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); @@ -384,8 +367,7 @@ mod tests { assert_eq!(r, f32x4::new(-1.0, 20.0, 0.0, -5.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_unpackhi_ps() { let a = f32x4::new(1.0, 2.0, 3.0, 4.0); let b = f32x4::new(5.0, 6.0, 7.0, 8.0); @@ -393,8 +375,7 @@ mod tests { assert_eq!(r, f32x4::new(3.0, 7.0, 4.0, 8.0)); } - #[test] - #[target_feature = "+sse"] + #[simd_test = "sse"] fn _mm_movemask_ps() { let r = sse::_mm_movemask_ps(f32x4::new(-1.0, 5.0, -5.0, 0.0)); assert_eq!(r, 0b0101); diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs index e67c96518061..d2079bd5af0d 100644 --- a/library/stdarch/src/x86/sse2.rs +++ b/library/stdarch/src/x86/sse2.rs @@ -10,7 +10,7 @@ use v128::*; use v64::*; #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; /// Provide a hint to the processor that the code sequence is a spin-wait loop. /// @@ -1720,35 +1720,36 @@ extern { fn movmskpd(a: f64x2) -> i32; } -#[cfg(all(test, target_feature = "sse2", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { use std::os::raw::c_void; + use stdsimd_test::simd_test; use v128::*; use x86::{__m128i, sse2}; - #[test] + #[simd_test = "sse2"] fn _mm_pause() { sse2::_mm_pause(); } - #[test] + #[simd_test = "sse2"] fn _mm_clflush() { let x = 0; unsafe { sse2::_mm_clflush(&x as *const _ as *mut c_void); } } - #[test] + #[simd_test = "sse2"] fn _mm_lfence() { sse2::_mm_lfence(); } - #[test] + #[simd_test = "sse2"] fn _mm_mfence() { sse2::_mm_mfence(); } - #[test] + #[simd_test = "sse2"] fn _mm_add_epi8() { let a = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -1760,7 +1761,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_add_epi8_overflow() { let a = i8x16::splat(0x7F); let b = i8x16::splat(1); @@ -1768,7 +1769,7 @@ mod tests { assert_eq!(r, i8x16::splat(-128)); } - #[test] + #[simd_test = "sse2"] fn _mm_add_epi16() { let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); @@ -1777,7 +1778,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_add_epi32() { let a = i32x4::new(0, 1, 2, 3); let b = i32x4::new(4, 5, 6, 7); @@ -1786,7 +1787,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_add_epi64() { let a = i64x2::new(0, 1); let b = i64x2::new(2, 3); @@ -1795,7 +1796,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epi8() { let a = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -1807,7 +1808,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epi8_saturate_positive() { let a = i8x16::splat(0x7F); let b = i8x16::splat(1); @@ -1815,7 +1816,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epi8_saturate_negative() { let a = i8x16::splat(-0x80); let b = i8x16::splat(-1); @@ -1823,7 +1824,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epi16() { let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); @@ -1832,7 +1833,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epi16_saturate_positive() { let a = i16x8::splat(0x7FFF); let b = i16x8::splat(1); @@ -1840,7 +1841,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epi16_saturate_negative() { let a = i16x8::splat(-0x8000); let b = i16x8::splat(-1); @@ -1848,7 +1849,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epu8() { let a = u8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -1860,7 +1861,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epu8_saturate() { let a = u8x16::splat(0xFF); let b = u8x16::splat(1); @@ -1868,7 +1869,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epu16() { let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let b = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15); @@ -1877,7 +1878,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_adds_epu16_saturate() { let a = u16x8::splat(0xFFFF); let b = u16x8::splat(1); @@ -1885,21 +1886,21 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_avg_epu8() { let (a, b) = (u8x16::splat(3), u8x16::splat(9)); let r = sse2::_mm_avg_epu8(a, b); assert_eq!(r, u8x16::splat(6)); } - #[test] + #[simd_test = "sse2"] fn _mm_avg_epu16() { let (a, b) = (u16x8::splat(3), u16x8::splat(9)); let r = sse2::_mm_avg_epu16(a, b); assert_eq!(r, u16x8::splat(6)); } - #[test] + #[simd_test = "sse2"] fn _mm_madd_epi16() { let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = i16x8::new(9, 10, 11, 12, 13, 14, 15, 16); @@ -1908,7 +1909,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_max_epi16() { let a = i16x8::splat(1); let b = i16x8::splat(-1); @@ -1916,7 +1917,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_max_epu8() { let a = u8x16::splat(1); let b = u8x16::splat(255); @@ -1924,7 +1925,7 @@ mod tests { assert_eq!(r, b); } - #[test] + #[simd_test = "sse2"] fn _mm_min_epi16() { let a = i16x8::splat(1); let b = i16x8::splat(-1); @@ -1932,7 +1933,7 @@ mod tests { assert_eq!(r, b); } - #[test] + #[simd_test = "sse2"] fn _mm_min_epu8() { let a = u8x16::splat(1); let b = u8x16::splat(255); @@ -1940,28 +1941,28 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_mulhi_epi16() { let (a, b) = (i16x8::splat(1000), i16x8::splat(-1001)); let r = sse2::_mm_mulhi_epi16(a, b); assert_eq!(r, i16x8::splat(-16)); } - #[test] + #[simd_test = "sse2"] fn _mm_mulhi_epu16() { let (a, b) = (u16x8::splat(1000), u16x8::splat(1001)); let r = sse2::_mm_mulhi_epu16(a, b); assert_eq!(r, u16x8::splat(15)); } - #[test] + #[simd_test = "sse2"] fn _mm_mullo_epi16() { let (a, b) = (i16x8::splat(1000), i16x8::splat(-1001)); let r = sse2::_mm_mullo_epi16(a, b); assert_eq!(r, i16x8::splat(-17960)); } - #[test] + #[simd_test = "sse2"] fn _mm_mul_epu32() { let a = u32x4::from(u64x2::new(1_000_000_000, 1 << 34)); let b = u32x4::from(u64x2::new(1_000_000_000, 1 << 35)); @@ -1970,7 +1971,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_sad_epu8() { let a = u8x16::new( 255, 254, 253, 252, 1, 2, 3, 4, @@ -1983,42 +1984,42 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_sub_epi8() { let (a, b) = (i8x16::splat(5), i8x16::splat(6)); let r = sse2::_mm_sub_epi8(a, b); assert_eq!(r, i8x16::splat(-1)); } - #[test] + #[simd_test = "sse2"] fn _mm_sub_epi16() { let (a, b) = (i16x8::splat(5), i16x8::splat(6)); let r = sse2::_mm_sub_epi16(a, b); assert_eq!(r, i16x8::splat(-1)); } - #[test] + #[simd_test = "sse2"] fn _mm_sub_epi32() { let (a, b) = (i32x4::splat(5), i32x4::splat(6)); let r = sse2::_mm_sub_epi32(a, b); assert_eq!(r, i32x4::splat(-1)); } - #[test] + #[simd_test = "sse2"] fn _mm_sub_epi64() { let (a, b) = (i64x2::splat(5), i64x2::splat(6)); let r = sse2::_mm_sub_epi64(a, b); assert_eq!(r, i64x2::splat(-1)); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epi8() { let (a, b) = (i8x16::splat(5), i8x16::splat(2)); let r = sse2::_mm_subs_epi8(a, b); assert_eq!(r, i8x16::splat(3)); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epi8_saturate_positive() { let a = i8x16::splat(0x7F); let b = i8x16::splat(-1); @@ -2026,7 +2027,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epi8_saturate_negative() { let a = i8x16::splat(-0x80); let b = i8x16::splat(1); @@ -2034,14 +2035,14 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epi16() { let (a, b) = (i16x8::splat(5), i16x8::splat(2)); let r = sse2::_mm_subs_epi16(a, b); assert_eq!(r, i16x8::splat(3)); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epi16_saturate_positive() { let a = i16x8::splat(0x7FFF); let b = i16x8::splat(-1); @@ -2049,7 +2050,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epi16_saturate_negative() { let a = i16x8::splat(-0x8000); let b = i16x8::splat(1); @@ -2057,14 +2058,14 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epu8() { let (a, b) = (u8x16::splat(5), u8x16::splat(2)); let r = sse2::_mm_subs_epu8(a, b); assert_eq!(r, u8x16::splat(3)); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epu8_saturate() { let a = u8x16::splat(0); let b = u8x16::splat(1); @@ -2072,14 +2073,14 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epu16() { let (a, b) = (u16x8::splat(5), u16x8::splat(2)); let r = sse2::_mm_subs_epu16(a, b); assert_eq!(r, u16x8::splat(3)); } - #[test] + #[simd_test = "sse2"] fn _mm_subs_epu16_saturate() { let a = u16x8::splat(0); let b = u16x8::splat(1); @@ -2087,7 +2088,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_slli_si128() { let a = __m128i::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); @@ -2119,7 +2120,7 @@ mod tests { assert_eq!(r, __m128i::splat(0)); } - #[test] + #[simd_test = "sse2"] fn _mm_slli_epi16() { let a = i16x8::new( 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0); @@ -2130,7 +2131,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_sll_epi16() { let a = i16x8::new(0xFF, 0, 0, 0, 0, 0, 0, 0); let r = sse2::_mm_sll_epi16(a, i16x8::new(4, 0, 0, 0, 0, 0, 0, 0)); @@ -2139,28 +2140,28 @@ mod tests { assert_eq!(r, i16x8::new(0xFF, 0, 0, 0, 0, 0, 0, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_slli_epi32() { assert_eq!( sse2::_mm_slli_epi32(i32x4::splat(0xFFFF), 4), i32x4::splat(0xFFFF0)); } - #[test] + #[simd_test = "sse2"] fn _mm_sll_epi32() { assert_eq!( sse2::_mm_sll_epi32(i32x4::splat(0xFFFF), i32x4::new(4, 0, 0, 0)), i32x4::splat(0xFFFF0)); } - #[test] + #[simd_test = "sse2"] fn _mm_slli_epi64() { assert_eq!( sse2::_mm_slli_epi64(i64x2::splat(0xFFFFFFFF), 4), i64x2::splat(0xFFFFFFFF0)); } - #[test] + #[simd_test = "sse2"] fn _mm_sll_epi64() { assert_eq!( sse2::_mm_sll_epi64( @@ -2168,13 +2169,13 @@ mod tests { i64x2::splat(0xFFFFFFFF0)); } - #[test] + #[simd_test = "sse2"] fn _mm_srai_epi16() { assert_eq!( sse2::_mm_srai_epi16(i16x8::splat(-1), 1), i16x8::splat(-1)); } - #[test] + #[simd_test = "sse2"] fn _mm_sra_epi16() { assert_eq!( sse2::_mm_sra_epi16( @@ -2182,13 +2183,13 @@ mod tests { i16x8::splat(-1)); } - #[test] + #[simd_test = "sse2"] fn _mm_srai_epi32() { assert_eq!( sse2::_mm_srai_epi32(i32x4::splat(-1), 1), i32x4::splat(-1)); } - #[test] + #[simd_test = "sse2"] fn _mm_sra_epi32() { assert_eq!( sse2::_mm_sra_epi32( @@ -2196,7 +2197,7 @@ mod tests { i32x4::splat(-1)); } - #[test] + #[simd_test = "sse2"] fn _mm_srli_si128() { let a = __m128i::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); @@ -2228,7 +2229,7 @@ mod tests { assert_eq!(r, __m128i::splat(0)); } - #[test] + #[simd_test = "sse2"] fn _mm_srli_epi16() { let a = i16x8::new( 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0); @@ -2239,7 +2240,7 @@ mod tests { assert_eq!(r, e); } - #[test] + #[simd_test = "sse2"] fn _mm_srl_epi16() { let a = i16x8::new(0xFF, 0, 0, 0, 0, 0, 0, 0); let r = sse2::_mm_srl_epi16(a, i16x8::new(4, 0, 0, 0, 0, 0, 0, 0)); @@ -2248,28 +2249,28 @@ mod tests { assert_eq!(r, i16x8::new(0xFF, 0, 0, 0, 0, 0, 0, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_srli_epi32() { assert_eq!( sse2::_mm_srli_epi32(i32x4::splat(0xFFFF), 4), i32x4::splat(0xFFF)); } - #[test] + #[simd_test = "sse2"] fn _mm_srl_epi32() { assert_eq!( sse2::_mm_srl_epi32(i32x4::splat(0xFFFF), i32x4::new(4, 0, 0, 0)), i32x4::splat(0xFFF)); } - #[test] + #[simd_test = "sse2"] fn _mm_srli_epi64() { assert_eq!( sse2::_mm_srli_epi64(i64x2::splat(0xFFFFFFFF), 4), i64x2::splat(0xFFFFFFF)); } - #[test] + #[simd_test = "sse2"] fn _mm_srl_epi64() { assert_eq!( sse2::_mm_srl_epi64( @@ -2277,35 +2278,35 @@ mod tests { i64x2::splat(0xFFFFFFF)); } - #[test] + #[simd_test = "sse2"] fn _mm_and_si128() { assert_eq!( sse2::_mm_and_si128(__m128i::splat(5), __m128i::splat(3)), __m128i::splat(1)); } - #[test] + #[simd_test = "sse2"] fn _mm_andnot_si128() { assert_eq!( sse2::_mm_andnot_si128(__m128i::splat(5), __m128i::splat(3)), __m128i::splat(2)); } - #[test] + #[simd_test = "sse2"] fn _mm_or_si128() { assert_eq!( sse2::_mm_or_si128(__m128i::splat(5), __m128i::splat(3)), __m128i::splat(7)); } - #[test] + #[simd_test = "sse2"] fn _mm_xor_si128() { assert_eq!( sse2::_mm_xor_si128(__m128i::splat(5), __m128i::splat(3)), __m128i::splat(6)); } - #[test] + #[simd_test = "sse2"] fn _mm_cmpeq_epi8() { let a = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -2316,7 +2317,7 @@ mod tests { 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_cmpeq_epi16() { let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let b = i16x8::new(7, 6, 2, 4, 3, 2, 1, 0); @@ -2324,7 +2325,7 @@ mod tests { assert_eq!(r, i16x8::splat(0).replace(2, 0xFFFFu16 as i16)); } - #[test] + #[simd_test = "sse2"] fn _mm_cmpeq_epi32() { let a = i32x4::new(0, 1, 2, 3); let b = i32x4::new(3, 2, 2, 0); @@ -2332,7 +2333,7 @@ mod tests { assert_eq!(r, i32x4::splat(0).replace(2, 0xFFFFFFFFu32 as i32)); } - #[test] + #[simd_test = "sse2"] fn _mm_cmpgt_epi8() { let a = i8x16::splat(0).replace(0, 5); let b = i8x16::splat(0); @@ -2340,7 +2341,7 @@ mod tests { assert_eq!(r, i8x16::splat(0).replace(0, 0xFFu8 as i8)); } - #[test] + #[simd_test = "sse2"] fn _mm_cmpgt_epi16() { let a = i16x8::splat(0).replace(0, 5); let b = i16x8::splat(0); @@ -2348,7 +2349,7 @@ mod tests { assert_eq!(r, i16x8::splat(0).replace(0, 0xFFFFu16 as i16)); } - #[test] + #[simd_test = "sse2"] fn _mm_cmpgt_epi32() { let a = i32x4::splat(0).replace(0, 5); let b = i32x4::splat(0); @@ -2356,7 +2357,7 @@ mod tests { assert_eq!(r, i32x4::splat(0).replace(0, 0xFFFFFFFFu32 as i32)); } - #[test] + #[simd_test = "sse2"] fn _mm_cmplt_epi8() { let a = i8x16::splat(0); let b = i8x16::splat(0).replace(0, 5); @@ -2364,7 +2365,7 @@ mod tests { assert_eq!(r, i8x16::splat(0).replace(0, 0xFFu8 as i8)); } - #[test] + #[simd_test = "sse2"] fn _mm_cmplt_epi16() { let a = i16x8::splat(0); let b = i16x8::splat(0).replace(0, 5); @@ -2372,7 +2373,7 @@ mod tests { assert_eq!(r, i16x8::splat(0).replace(0, 0xFFFFu16 as i16)); } - #[test] + #[simd_test = "sse2"] fn _mm_cmplt_epi32() { let a = i32x4::splat(0); let b = i32x4::splat(0).replace(0, 5); @@ -2380,69 +2381,69 @@ mod tests { assert_eq!(r, i32x4::splat(0).replace(0, 0xFFFFFFFFu32 as i32)); } - #[test] + #[simd_test = "sse2"] fn _mm_cvtepi32_pd() { let a = sse2::_mm_set_epi32(35, 25, 15, 5); let r = sse2::_mm_cvtepi32_pd(a); assert_eq!(r, f64x2::new(5.0, 15.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_cvtsi32_sd() { let a = f64x2::splat(3.5); assert_eq!(sse2::_mm_cvtsi32_sd(a, 5), f64x2::new(5.0, 3.5)); } - #[test] + #[simd_test = "sse2"] fn _mm_cvtsi64_sd() { let a = f64x2::splat(3.5); assert_eq!(sse2::_mm_cvtsi64_sd(a, 5), f64x2::new(5.0, 3.5)); } - #[test] + #[simd_test = "sse2"] fn _mm_cvtepi32_ps() { let a = i32x4::new(1, 2, 3, 4); assert_eq!(sse2::_mm_cvtepi32_ps(a), f32x4::new(1.0, 2.0, 3.0, 4.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_cvtsi32_si128() { assert_eq!(sse2::_mm_cvtsi32_si128(5), i32x4::new(5, 0, 0, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_cvtsi64_si128() { assert_eq!(sse2::_mm_cvtsi64_si128(5), i64x2::new(5, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_cvtsi128_si32() { assert_eq!(sse2::_mm_cvtsi128_si32(i32x4::new(5, 0, 0, 0)), 5); } - #[test] + #[simd_test = "sse2"] fn _mm_cvtsi128_si64() { assert_eq!(sse2::_mm_cvtsi128_si64(i64x2::new(5, 0)), 5); } - #[test] + #[simd_test = "sse2"] fn _mm_set_epi64x() { assert_eq!(sse2::_mm_set_epi64x(0, 1), i64x2::new(1, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_set_epi32() { assert_eq!(sse2::_mm_set_epi32(0, 1, 2, 3), i32x4::new(3, 2, 1, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_set_epi16() { assert_eq!( sse2::_mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7), i16x8::new(7, 6, 5, 4, 3, 2, 1, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_set_epi8() { assert_eq!( sse2::_mm_set_epi8( @@ -2450,39 +2451,39 @@ mod tests { i8x16::new(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_set1_epi64x() { assert_eq!(sse2::_mm_set1_epi64x(1), i64x2::splat(1)); } - #[test] + #[simd_test = "sse2"] fn _mm_set1_epi32() { assert_eq!(sse2::_mm_set1_epi32(1), i32x4::splat(1)); } - #[test] + #[simd_test = "sse2"] fn _mm_set1_epi16() { assert_eq!(sse2::_mm_set1_epi16(1), i16x8::splat(1)); } - #[test] + #[simd_test = "sse2"] fn _mm_set1_epi8() { assert_eq!(sse2::_mm_set1_epi8(1), i8x16::splat(1)); } - #[test] + #[simd_test = "sse2"] fn _mm_setr_epi32() { assert_eq!(sse2::_mm_setr_epi32(0, 1, 2, 3), i32x4::new(0, 1, 2, 3)); } - #[test] + #[simd_test = "sse2"] fn _mm_setr_epi16() { assert_eq!( sse2::_mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), i16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); } - #[test] + #[simd_test = "sse2"] fn _mm_setr_epi8() { assert_eq!( sse2::_mm_setr_epi8( @@ -2490,33 +2491,33 @@ mod tests { i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); } - #[test] + #[simd_test = "sse2"] fn _mm_setzero_si128() { assert_eq!(sse2::_mm_setzero_si128(), __m128i::from(i64x2::splat(0))); } - #[test] + #[simd_test = "sse2"] fn _mm_loadl_epi64() { let a = i64x2::new(6, 5); let r = unsafe { sse2::_mm_loadl_epi64(&a as *const _) }; assert_eq!(r, i64x2::new(6, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_load_si128() { let a = sse2::_mm_set_epi64x(5, 6); let r = unsafe { sse2::_mm_load_si128(&a as *const _ as *const _) }; assert_eq!(a, i64x2::from(r)); } - #[test] + #[simd_test = "sse2"] fn _mm_loadu_si128() { let a = sse2::_mm_set_epi64x(5, 6); let r = unsafe { sse2::_mm_loadu_si128(&a as *const _ as *const _) }; assert_eq!(a, i64x2::from(r)); } - #[test] + #[simd_test = "sse2"] fn _mm_maskmoveu_si128() { let a = i8x16::splat(9); let mask = i8x16::splat(0).replace(2, 0x80u8 as i8); @@ -2527,7 +2528,7 @@ mod tests { assert_eq!(r, i8x16::splat(0).replace(2, 9)); } - #[test] + #[simd_test = "sse2"] fn _mm_store_si128() { let a = __m128i::splat(9); let mut r = __m128i::splat(0); @@ -2537,7 +2538,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_storeu_si128() { let a = __m128i::splat(9); let mut r = __m128i::splat(0); @@ -2547,7 +2548,7 @@ mod tests { assert_eq!(r, a); } - #[test] + #[simd_test = "sse2"] fn _mm_storel_epi64() { let a = __m128i::from(i64x2::new(2, 9)); let mut r = __m128i::splat(0); @@ -2557,13 +2558,13 @@ mod tests { assert_eq!(r, __m128i::from(i64x2::new(2, 0))); } - #[test] + #[simd_test = "sse2"] fn _mm_move_epi64() { let a = i64x2::new(5, 6); assert_eq!(sse2::_mm_move_epi64(a), i64x2::new(5, 0)); } - #[test] + #[simd_test = "sse2"] fn _mm_packs_epi16() { let a = i16x8::new(0x80, -0x81, 0, 0, 0, 0, 0, 0); let b = i16x8::new(0, 0, 0, 0, 0, 0, -0x81, 0x80); @@ -2573,7 +2574,7 @@ mod tests { 0, 0, 0, 0, 0, 0, -0x80, 0x7F)); } - #[test] + #[simd_test = "sse2"] fn _mm_packs_epi32() { let a = i32x4::new(0x8000, -0x8001, 0, 0); let b = i32x4::new(0, 0, -0x8001, 0x8000); @@ -2582,7 +2583,7 @@ mod tests { r, i16x8::new(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF)); } - #[test] + #[simd_test = "sse2"] fn _mm_packus_epi16() { let a = i16x8::new(0x100, -1, 0, 0, 0, 0, 0, 0); let b = i16x8::new(0, 0, 0, 0, 0, 0, -1, 0x100); @@ -2592,19 +2593,19 @@ mod tests { 0, 0, 0, 0, 0, 0, 0, 0xFF)); } - #[test] + #[simd_test = "sse2"] fn _mm_extract_epi16() { let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); assert_eq!(sse2::_mm_extract_epi16(a, 5), 5); } - #[test] + #[simd_test = "sse2"] fn _mm_insert_epi16() { let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); assert_eq!(sse2::_mm_insert_epi16(a, 9, 0), a.replace(0, 9)); } - #[test] + #[simd_test = "sse2"] fn _mm_movemask_epi8() { let a = i8x16::from(u8x16::new( 0b1000_0000, 0b0, 0b1000_0000, 0b01, 0b0101, 0b1111_0000, 0, 0, @@ -2612,28 +2613,28 @@ mod tests { assert_eq!(sse2::_mm_movemask_epi8(a), 0b10100100_00100101); } - #[test] + #[simd_test = "sse2"] fn _mm_shuffle_epi32() { let a = i32x4::new(5, 10, 15, 20); let e = i32x4::new(20, 10, 10, 5); assert_eq!(sse2::_mm_shuffle_epi32(a, 0b00_01_01_11), e); } - #[test] + #[simd_test = "sse2"] fn _mm_shufflehi_epi16() { let a = i16x8::new(1, 2, 3, 4, 5, 10, 15, 20); let e = i16x8::new(1, 2, 3, 4, 20, 10, 10, 5); assert_eq!(sse2::_mm_shufflehi_epi16(a, 0b00_01_01_11), e); } - #[test] + #[simd_test = "sse2"] fn _mm_shufflelo_epi16() { let a = i16x8::new(5, 10, 15, 20, 1, 2, 3, 4); let e = i16x8::new(20, 10, 10, 5, 1, 2, 3, 4); assert_eq!(sse2::_mm_shufflelo_epi16(a, 0b00_01_01_11), e); } - #[test] + #[simd_test = "sse2"] fn _mm_unpackhi_epi8() { let a = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -2644,7 +2645,7 @@ mod tests { assert_eq!(sse2::_mm_unpackhi_epi8(a, b), e); } - #[test] + #[simd_test = "sse2"] fn _mm_unpackhi_epi16() { let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); @@ -2652,7 +2653,7 @@ mod tests { assert_eq!(sse2::_mm_unpackhi_epi16(a, b), e); } - #[test] + #[simd_test = "sse2"] fn _mm_unpackhi_epi32() { let a = i32x4::new(0, 1, 2, 3); let b = i32x4::new(4, 5, 6, 7); @@ -2660,7 +2661,7 @@ mod tests { assert_eq!(sse2::_mm_unpackhi_epi32(a, b), e); } - #[test] + #[simd_test = "sse2"] fn _mm_unpackhi_epi64() { let a = i64x2::new(0, 1); let b = i64x2::new(2, 3); @@ -2668,7 +2669,7 @@ mod tests { assert_eq!(sse2::_mm_unpackhi_epi64(a, b), e); } - #[test] + #[simd_test = "sse2"] fn _mm_unpacklo_epi8() { let a = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -2679,7 +2680,7 @@ mod tests { assert_eq!(sse2::_mm_unpacklo_epi8(a, b), e); } - #[test] + #[simd_test = "sse2"] fn _mm_unpacklo_epi16() { let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); @@ -2687,7 +2688,7 @@ mod tests { assert_eq!(sse2::_mm_unpacklo_epi16(a, b), e); } - #[test] + #[simd_test = "sse2"] fn _mm_unpacklo_epi32() { let a = i32x4::new(0, 1, 2, 3); let b = i32x4::new(4, 5, 6, 7); @@ -2695,7 +2696,7 @@ mod tests { assert_eq!(sse2::_mm_unpacklo_epi32(a, b), e); } - #[test] + #[simd_test = "sse2"] fn _mm_unpacklo_epi64() { let a = i64x2::new(0, 1); let b = i64x2::new(2, 3); @@ -2703,105 +2704,105 @@ mod tests { assert_eq!(sse2::_mm_unpacklo_epi64(a, b), e); } - #[test] + #[simd_test = "sse2"] fn _mm_add_sd() { assert_eq!( sse2::_mm_add_sd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(6.0, 2.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_add_pd() { assert_eq!( sse2::_mm_add_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(6.0, 12.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_div_sd() { assert_eq!( sse2::_mm_div_sd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(0.2, 2.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_div_pd() { assert_eq!( sse2::_mm_div_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(0.2, 0.2)); } - #[test] + #[simd_test = "sse2"] fn _mm_max_sd() { assert_eq!( sse2::_mm_max_sd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(5.0, 2.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_max_pd() { assert_eq!( sse2::_mm_max_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(5.0, 10.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_min_sd() { assert_eq!( sse2::_mm_min_sd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(1.0, 2.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_min_pd() { assert_eq!( sse2::_mm_min_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(1.0, 2.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_mul_sd() { assert_eq!( sse2::_mm_mul_sd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(5.0, 2.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_mul_pd() { assert_eq!( sse2::_mm_mul_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(5.0, 20.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_sqrt_sd() { assert_eq!( sse2::_mm_sqrt_sd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(5.0f64.sqrt(), 2.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_sqrt_pd() { assert_eq!( sse2::_mm_sqrt_pd(f64x2::new(1.0, 2.0)), f64x2::new(1.0f64.sqrt(), 2.0f64.sqrt())); } - #[test] + #[simd_test = "sse2"] fn _mm_sub_sd() { assert_eq!( sse2::_mm_sub_sd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(-4.0, 2.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_sub_pd() { assert_eq!( sse2::_mm_sub_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(-4.0, -8.0)); } - #[test] + #[simd_test = "sse2"] fn _mm_and_pd() { use std::mem::transmute; @@ -2813,7 +2814,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_andnot_pd() { use std::mem::transmute; @@ -2825,7 +2826,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_or_pd() { use std::mem::transmute; @@ -2837,7 +2838,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_xor_pd() { use std::mem::transmute; @@ -2849,7 +2850,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpeq_sd() { use std::mem::transmute; @@ -2861,7 +2862,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmplt_sd() { use std::mem::transmute; @@ -2873,7 +2874,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmple_sd() { use std::mem::transmute; @@ -2885,7 +2886,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpgt_sd() { use std::mem::transmute; @@ -2897,7 +2898,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpge_sd() { use std::mem::transmute; @@ -2909,7 +2910,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpord_sd() { use std::f64::NAN; use std::mem::transmute; @@ -2922,7 +2923,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpunord_sd() { use std::f64::NAN; use std::mem::transmute; @@ -2935,7 +2936,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpneq_sd() { use std::mem::transmute; @@ -2947,7 +2948,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpnlt_sd() { use std::mem::transmute; @@ -2959,7 +2960,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpnle_sd() { use std::mem::transmute; @@ -2971,7 +2972,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpngt_sd() { use std::mem::transmute; @@ -2983,7 +2984,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpnge_sd() { use std::mem::transmute; @@ -2995,7 +2996,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpeq_pd() { use std::mem::transmute; @@ -3007,7 +3008,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmplt_pd() { use std::mem::transmute; @@ -3019,7 +3020,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmple_pd() { use std::mem::transmute; @@ -3031,7 +3032,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpgt_pd() { use std::mem::transmute; @@ -3043,7 +3044,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpge_pd() { use std::mem::transmute; @@ -3055,7 +3056,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpord_pd() { use std::f64::NAN; use std::mem::transmute; @@ -3068,7 +3069,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpunord_pd() { use std::f64::NAN; use std::mem::transmute; @@ -3081,7 +3082,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpneq_pd() { use std::mem::transmute; @@ -3093,7 +3094,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpnlt_pd() { use std::mem::transmute; @@ -3105,7 +3106,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpnle_pd() { use std::mem::transmute; @@ -3117,7 +3118,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpngt_pd() { use std::mem::transmute; @@ -3129,7 +3130,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_cmpnge_pd() { use std::mem::transmute; @@ -3141,7 +3142,7 @@ mod tests { } } - #[test] + #[simd_test = "sse2"] fn _mm_comieq_sd() { use std::f64::NAN; @@ -3152,37 +3153,37 @@ mod tests { assert!(!sse2::_mm_comieq_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_comilt_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(!sse2::_mm_comilt_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_comile_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(sse2::_mm_comile_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_comigt_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(!sse2::_mm_comigt_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_comige_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(sse2::_mm_comige_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_comineq_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(!sse2::_mm_comineq_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_ucomieq_sd() { use std::f64::NAN; @@ -3193,37 +3194,37 @@ mod tests { assert!(!sse2::_mm_ucomieq_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_ucomilt_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(!sse2::_mm_ucomilt_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_ucomile_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(sse2::_mm_ucomile_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_ucomigt_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(!sse2::_mm_ucomigt_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_ucomige_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(sse2::_mm_ucomige_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_ucomineq_sd() { let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); assert!(!sse2::_mm_ucomineq_sd(a, b)); } - #[test] + #[simd_test = "sse2"] fn _mm_movemask_pd() { let r = sse2::_mm_movemask_pd(f64x2::new(-1.0, 5.0)); assert_eq!(r, 0b01); diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index 955026e2b4d0..a91695d9bc43 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -57,13 +57,14 @@ extern { fn dpps(a: f32x4, b: f32x4, imm8: u8) -> f32x4; } -#[cfg(all(test, target_feature = "sse4.1", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { + use stdsimd_test::simd_test; + use v128::*; use x86::sse41; - #[test] - #[target_feature = "+sse4.1"] + #[simd_test = "sse4.1"] fn _mm_blendv_epi8() { let a = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -76,8 +77,7 @@ mod tests { assert_eq!(sse41::_mm_blendv_epi8(a, b, mask), e); } - #[test] - #[target_feature = "+sse4.1"] + #[simd_test = "sse4.1"] fn _mm_dp_pd() { let a = f64x2::new(2.0, 3.0); let b = f64x2::new(1.0, 4.0); @@ -85,8 +85,7 @@ mod tests { assert_eq!(sse41::_mm_dp_pd(a, b, 0b00110001), e); } - #[test] - #[target_feature = "+sse4.1"] + #[simd_test = "sse4.1"] fn _mm_dp_ps() { let a = f32x4::new(2.0, 3.0, 1.0, 10.0); let b = f32x4::new(1.0, 4.0, 0.5, 10.0); diff --git a/library/stdarch/src/x86/sse42.rs b/library/stdarch/src/x86/sse42.rs index 7459997f7b5c..22c90ed503a8 100644 --- a/library/stdarch/src/x86/sse42.rs +++ b/library/stdarch/src/x86/sse42.rs @@ -40,13 +40,14 @@ extern { fn pcmpestri128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32; } -#[cfg(all(test, target_feature = "sse4.2", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { + use stdsimd_test::simd_test; + use v128::*; use x86::{__m128i, sse42}; - #[test] - #[target_feature = "+sse4.2"] + #[simd_test = "sse4.2"] fn _mm_cmpestri() { let a = &b"bar "[..]; let b = &b"foobar "[..]; diff --git a/library/stdarch/src/x86/ssse3.rs b/library/stdarch/src/x86/ssse3.rs index 1d1497f5d0d1..5729dbca1997 100644 --- a/library/stdarch/src/x86/ssse3.rs +++ b/library/stdarch/src/x86/ssse3.rs @@ -50,20 +50,20 @@ extern { fn pshufb128(a: u8x16, b: u8x16) -> u8x16; } -#[cfg(all(test, target_feature = "ssse3", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { + use stdsimd_test::simd_test; + use v128::*; use x86::ssse3 as ssse3; - #[test] - #[target_feature = "+ssse3"] + #[simd_test = "ssse3"] fn _mm_abs_epi8() { let r = ssse3::_mm_abs_epi8(i8x16::splat(-5)); assert_eq!(r, u8x16::splat(5)); } - #[test] - #[target_feature = "+ssse3"] + #[simd_test = "ssse3"] fn _mm_shuffle_epi8() { let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); let b = u8x16::new(4, 128, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0); diff --git a/library/stdarch/src/x86/tbm.rs b/library/stdarch/src/x86/tbm.rs index 8ada36815c0b..ad8590f115f9 100644 --- a/library/stdarch/src/x86/tbm.rs +++ b/library/stdarch/src/x86/tbm.rs @@ -8,7 +8,7 @@ //! provides a quick overview of the available instructions. #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; // TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32 /* @@ -252,40 +252,38 @@ pub fn _tzmsk_u64(x: u64) -> u64 { !x & (x.wrapping_sub(1)) } -#[cfg(all(test, target_feature = "tbm", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { + use stdsimd_test::simd_test; + use x86::tbm; /* - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _bextr_u32() { assert_eq!(tbm::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _bextr_u64() { assert_eq!(tbm::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); } */ - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _blcfill_u32() { assert_eq!(tbm::_blcfill_u32(0b0101_0111u32), 0b0101_0000u32); assert_eq!(tbm::_blcfill_u32(0b1111_1111u32), 0u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] + #[cfg(not(target_arch = "x86"))] fn _blcfill_u64() { assert_eq!(tbm::_blcfill_u64(0b0101_0111u64), 0b0101_0000u64); assert_eq!(tbm::_blcfill_u64(0b1111_1111u64), 0u64); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _blci_u32() { assert_eq!(tbm::_blci_u32(0b0101_0000u32), 0b1111_1111_1111_1111_1111_1111_1111_1110u32); @@ -293,8 +291,8 @@ mod tests { 0b1111_1111_1111_1111_1111_1110_1111_1111u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] + #[cfg(not(target_arch = "x86"))] fn _blci_u64() { assert_eq!(tbm::_blci_u64(0b0101_0000u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64); @@ -302,99 +300,92 @@ mod tests { 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _blcic_u32() { assert_eq!(tbm::_blcic_u32(0b0101_0001u32), 0b0000_0010u32); assert_eq!(tbm::_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] + #[cfg(not(target_arch = "x86"))] fn _blcic_u64() { assert_eq!(tbm::_blcic_u64(0b0101_0001u64), 0b0000_0010u64); assert_eq!(tbm::_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _blcmsk_u32() { assert_eq!(tbm::_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32); assert_eq!(tbm::_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] + #[cfg(not(target_arch = "x86"))] fn _blcmsk_u64() { assert_eq!(tbm::_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64); assert_eq!(tbm::_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _blcs_u32() { assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32); assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] + #[cfg(not(target_arch = "x86"))] fn _blcs_u64() { assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64); assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _blsfill_u32() { assert_eq!(tbm::_blsfill_u32(0b0101_0100u32), 0b0101_0111u32); assert_eq!(tbm::_blsfill_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] + #[cfg(not(target_arch = "x86"))] fn _blsfill_u64() { assert_eq!(tbm::_blsfill_u64(0b0101_0100u64), 0b0101_0111u64); assert_eq!(tbm::_blsfill_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _blsic_u32() { assert_eq!(tbm::_blsic_u32(0b0101_0100u32), 0b1111_1111_1111_1111_1111_1111_1111_1011u32); assert_eq!(tbm::_blsic_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] + #[cfg(not(target_arch = "x86"))] fn _blsic_u64() { assert_eq!(tbm::_blsic_u64(0b0101_0100u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64); assert_eq!(tbm::_blsic_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _t1mskc_u32() { assert_eq!(tbm::_t1mskc_u32(0b0101_0111u32), 0b1111_1111_1111_1111_1111_1111_1111_1000u32); assert_eq!(tbm::_t1mskc_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] + #[cfg(not(target_arch = "x86"))] fn _t1mksc_u64() { assert_eq!(tbm::_t1mskc_u64(0b0101_0111u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64); assert_eq!(tbm::_t1mskc_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] fn _tzmsk_u32() { assert_eq!(tbm::_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32); assert_eq!(tbm::_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32); } - #[test] - #[target_feature = "+tbm"] + #[simd_test = "tbm"] + #[cfg(not(target_arch = "x86"))] fn _tzmsk_u64() { assert_eq!(tbm::_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64); assert_eq!(tbm::_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64); diff --git a/library/stdarch/assert-instr/Cargo.toml b/library/stdarch/stdsimd-test/Cargo.toml similarity index 76% rename from library/stdarch/assert-instr/Cargo.toml rename to library/stdarch/stdsimd-test/Cargo.toml index fda3e32c7682..0edd095d4354 100644 --- a/library/stdarch/assert-instr/Cargo.toml +++ b/library/stdarch/stdsimd-test/Cargo.toml @@ -1,10 +1,11 @@ [package] -name = "assert-instr" +name = "stdsimd-test" version = "0.1.0" authors = ["Alex Crichton "] [dependencies] assert-instr-macro = { path = "assert-instr-macro" } +simd-test-macro = { path = "simd-test-macro" } backtrace = "0.3" cc = "1.0" lazy_static = "0.2" diff --git a/library/stdarch/assert-instr/assert-instr-macro/Cargo.toml b/library/stdarch/stdsimd-test/assert-instr-macro/Cargo.toml similarity index 100% rename from library/stdarch/assert-instr/assert-instr-macro/Cargo.toml rename to library/stdarch/stdsimd-test/assert-instr-macro/Cargo.toml diff --git a/library/stdarch/assert-instr/assert-instr-macro/build.rs b/library/stdarch/stdsimd-test/assert-instr-macro/build.rs similarity index 100% rename from library/stdarch/assert-instr/assert-instr-macro/build.rs rename to library/stdarch/stdsimd-test/assert-instr-macro/build.rs diff --git a/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs b/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs similarity index 97% rename from library/stdarch/assert-instr/assert-instr-macro/src/lib.rs rename to library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs index 9d7093a52322..358119306962 100644 --- a/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs +++ b/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs @@ -44,7 +44,7 @@ pub fn assert_instr(attr: TokenStream, item: TokenStream) -> TokenStream { #[allow(non_snake_case)] {ignore} fn assert_instr_{name}() {{ - ::assert_instr::assert({name} as usize, + ::stdsimd_test::assert({name} as usize, \"{name}\", \"{instr}\"); }} diff --git a/library/stdarch/stdsimd-test/simd-test-macro/Cargo.toml b/library/stdarch/stdsimd-test/simd-test-macro/Cargo.toml new file mode 100644 index 000000000000..b4b860d3b124 --- /dev/null +++ b/library/stdarch/stdsimd-test/simd-test-macro/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "simd-test-macro" +version = "0.1.0" +authors = ["Alex Crichton "] + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = { version = "0.1", features = ["unstable"] } +quote = { git = 'https://github.com/dtolnay/quote' } diff --git a/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs b/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs new file mode 100644 index 000000000000..35581fb45695 --- /dev/null +++ b/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs @@ -0,0 +1,76 @@ +//! Implementation of the `#[simd_test]` macro +//! +//! This macro expands to a `#[test]` function which tests the local machine for +//! the appropriate cfg before calling the inner test function. + +#![feature(proc_macro)] + +#[macro_use] +extern crate quote; +extern crate proc_macro; +extern crate proc_macro2; + +use proc_macro2::{TokenStream, Term, TokenNode, TokenTree}; +use proc_macro2::Literal; + +fn string(s: &str) -> TokenTree { + TokenTree { + kind: TokenNode::Literal(Literal::string(s)), + + span: Default::default(), + } +} + +#[proc_macro_attribute] +pub fn simd_test(attr: proc_macro::TokenStream, + item: proc_macro::TokenStream) -> proc_macro::TokenStream { + let tokens = TokenStream::from(attr).into_iter().collect::>(); + if tokens.len() != 2 { + panic!("expected #[simd_test = \"feature\"]"); + } + match tokens[0].kind { + TokenNode::Op('=', _) => {} + _ => panic!("expected #[simd_test = \"feature\"]"), + } + let target_feature = &tokens[1]; + let enable_feature = match tokens[1].kind { + TokenNode::Literal(ref l) => l.to_string(), + _ => panic!("expected #[simd_test = \"feature\"]"), + }; + let enable_feature = enable_feature.trim_left_matches('"') + .trim_right_matches('"'); + let enable_feature = string(&format!("+{}", enable_feature)); + let item = TokenStream::from(item); + let name = find_name(item.clone()); + + let name: TokenStream = name.as_str().parse().unwrap(); + + let ret: TokenStream = quote! { + #[test] + fn #name() { + if cfg_feature_enabled!(#target_feature) { + return #name(); + } + + #[target_feature = #enable_feature] + #item + } + }.into(); + ret.into() +} + +fn find_name(item: TokenStream) -> Term { + let mut tokens = item.into_iter(); + while let Some(tok) = tokens.next() { + if let TokenNode::Term(word) = tok.kind { + if word.as_str() == "fn" { + break + } + } + } + + match tokens.next().map(|t| t.kind) { + Some(TokenNode::Term(word)) => word, + _ => panic!("failed to find function name"), + } +} diff --git a/library/stdarch/assert-instr/src/lib.rs b/library/stdarch/stdsimd-test/src/lib.rs similarity index 98% rename from library/stdarch/assert-instr/src/lib.rs rename to library/stdarch/stdsimd-test/src/lib.rs index df1336b2f7c1..4f1049357f7d 100644 --- a/library/stdarch/assert-instr/src/lib.rs +++ b/library/stdarch/stdsimd-test/src/lib.rs @@ -1,4 +1,4 @@ -//! Runtime support needed for the `#![assert_instr]` macro +//! Runtime support needed for testing the stdsimd crate. //! //! This basically just disassembles the current executable and then parses the //! output once globally and then provides the `assert` function which makes @@ -7,6 +7,7 @@ #![feature(proc_macro)] extern crate assert_instr_macro; +extern crate simd_test_macro; extern crate backtrace; extern crate cc; extern crate rustc_demangle; @@ -19,6 +20,7 @@ use std::process::Command; use std::str; pub use assert_instr_macro::*; +pub use simd_test_macro::*; lazy_static! { static ref DISASSEMBLY: HashMap> = disassemble_myself(); From 2660ba176e0da47e588d3198aceba6a60d4c7d71 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 26 Sep 2017 14:53:20 -0700 Subject: [PATCH 4/7] Fix a merge conflict --- library/stdarch/src/x86/avx2.rs | 2 +- library/stdarch/src/x86/sse41.rs | 2 +- library/stdarch/src/x86/ssse3.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/library/stdarch/src/x86/avx2.rs b/library/stdarch/src/x86/avx2.rs index 9c9c75f360fc..5cb85c161467 100644 --- a/library/stdarch/src/x86/avx2.rs +++ b/library/stdarch/src/x86/avx2.rs @@ -3,7 +3,7 @@ use v128::*; use x86::__m256i; #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; /// Computes the absolute values of packed 32-bit integers in `a`. #[inline(always)] diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index 447bdd19d127..0ca528a80b15 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -2,7 +2,7 @@ use v128::*; use x86::__m128i; #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; #[inline(always)] #[target_feature = "+sse4.1"] diff --git a/library/stdarch/src/x86/ssse3.rs b/library/stdarch/src/x86/ssse3.rs index 9adde849cd94..2ad0a8efe913 100644 --- a/library/stdarch/src/x86/ssse3.rs +++ b/library/stdarch/src/x86/ssse3.rs @@ -1,7 +1,7 @@ use v128::*; #[cfg(test)] -use assert_instr::assert_instr; +use stdsimd_test::assert_instr; /// Compute the absolute value of packed 8-bit signed integers in `a` and /// return the unsigned results. From 6fbab9af445d365dafc7ceeec76e472da78ea044 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 26 Sep 2017 14:34:13 -0700 Subject: [PATCH 5/7] Add a test for x86 runtime support Make sure we agree with the `cupid` crate --- library/stdarch/Cargo.toml | 1 + library/stdarch/tests/cpu-detection.rs | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 library/stdarch/tests/cpu-detection.rs diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml index 76f09868bcad..0db8852a01d8 100644 --- a/library/stdarch/Cargo.toml +++ b/library/stdarch/Cargo.toml @@ -20,3 +20,4 @@ opt-level = 3 [dev-dependencies] stdsimd-test = { path = "stdsimd-test" } +cupid = "0.3" diff --git a/library/stdarch/tests/cpu-detection.rs b/library/stdarch/tests/cpu-detection.rs new file mode 100644 index 000000000000..764de415345d --- /dev/null +++ b/library/stdarch/tests/cpu-detection.rs @@ -0,0 +1,25 @@ +#![feature(cfg_target_feature)] + +#[macro_use] +extern crate stdsimd; +extern crate cupid; + +#[test] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn works() { + let information = cupid::master().unwrap(); + assert_eq!(cfg_feature_enabled!("sse"), information.sse()); + assert_eq!(cfg_feature_enabled!("sse2"), information.sse2()); + assert_eq!(cfg_feature_enabled!("sse3"), information.sse3()); + assert_eq!(cfg_feature_enabled!("ssse3"), information.ssse3()); + assert_eq!(cfg_feature_enabled!("sse4.1"), information.sse4_1()); + assert_eq!(cfg_feature_enabled!("sse4.2"), information.sse4_2()); + assert_eq!(cfg_feature_enabled!("avx"), information.avx()); + assert_eq!(cfg_feature_enabled!("avx2"), information.avx2()); + assert_eq!(cfg_feature_enabled!("fma"), information.fma()); + assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1()); + assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2()); + assert_eq!(cfg_feature_enabled!("popcnt"), information.popcnt()); + + // TODO: tbm, abm, lzcnt +} From 1d6fbebf114e82b611a9b9b6fe0b041e8e75ccae Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 26 Sep 2017 15:08:40 -0700 Subject: [PATCH 6/7] x86 intrinsics TODO is now in an issue --- library/stdarch/TODO.md | 926 ---------------------------------------- 1 file changed, 926 deletions(-) delete mode 100644 library/stdarch/TODO.md diff --git a/library/stdarch/TODO.md b/library/stdarch/TODO.md deleted file mode 100644 index 95d072a6d4d4..000000000000 --- a/library/stdarch/TODO.md +++ /dev/null @@ -1,926 +0,0 @@ -**TIP**: Use the following command to generate a section in this list for -Intel intrinsics. Replace `SSE4.2` with the intended type. - -``` -rg '^> TODO.md -``` - -rg calls the ripgrep tool, which can be installed with `cargo install ripgrep` - -sse ---- -* [ ] `_MM_TRANSPOSE4_PS` -* [ ] `_mm_getcsr` -* [ ] `_mm_setcsr` -* [ ] `_MM_GET_EXCEPTION_STATE` -* [ ] `_MM_SET_EXCEPTION_STATE` -* [ ] `_MM_GET_EXCEPTION_MASK` -* [ ] `_MM_SET_EXCEPTION_MASK` -* [ ] `_MM_GET_ROUNDING_MODE` -* [ ] `_MM_SET_ROUNDING_MODE` -* [ ] `_MM_GET_FLUSH_ZERO_MODE` -* [ ] `_MM_SET_FLUSH_ZERO_MODE` -* [ ] `_mm_prefetch` -* [ ] `_mm_sfence` -* [ ] `_mm_max_pi16` -* [ ] `_m_pmaxsw` -* [ ] `_mm_max_pu8` -* [ ] `_m_pmaxub` -* [ ] `_mm_min_pi16` -* [ ] `_m_pminsw` -* [ ] `_mm_min_pu8` -* [ ] `_m_pminub` -* [ ] `_mm_mulhi_pu16` -* [ ] `_m_pmulhuw` -* [ ] `_mm_avg_pu8` -* [ ] `_m_pavgb` -* [ ] `_mm_avg_pu16` -* [ ] `_m_pavgw` -* [ ] `_mm_sad_pu8` -* [ ] `_m_psadbw` -* [ ] `_mm_cvtsi32_ss` -* [ ] `_mm_cvt_si2ss` -* [ ] `_mm_cvtsi64_ss` -* [ ] `_mm_cvtpi32_ps` -* [ ] `_mm_cvt_pi2ps` -* [ ] `_mm_cvtpi16_ps` -* [ ] `_mm_cvtpu16_ps` -* [ ] `_mm_cvtpi8_ps` -* [ ] `_mm_cvtpu8_ps` -* [ ] `_mm_cvtpi32x2_ps` -* [ ] `_mm_stream_pi` -* [ ] `_mm_maskmove_si64` -* [ ] `_m_maskmovq` -* [ ] `_mm_extract_pi16` -* [ ] `_m_pextrw` -* [ ] `_mm_insert_pi16` -* [ ] `_m_pinsrw` -* [ ] `_mm_movemask_pi8` -* [ ] `_m_pmovmskb` -* [ ] `_mm_shuffle_pi16` -* [ ] `_m_pshufw` -* [x] `_mm_add_ss` -* [x] `_mm_add_ps` -* [x] `_mm_sub_ss` -* [x] `_mm_sub_ps` -* [x] `_mm_mul_ss` -* [x] `_mm_mul_ps` -* [x] `_mm_div_ss` -* [x] `_mm_div_ps` -* [x] `_mm_sqrt_ss` -* [x] `_mm_sqrt_ps` -* [x] `_mm_rcp_ss` -* [x] `_mm_rcp_ps` -* [x] `_mm_rsqrt_ss` -* [x] `_mm_rsqrt_ps` -* [x] `_mm_min_ss` -* [x] `_mm_min_ps` -* [x] `_mm_max_ss` -* [x] `_mm_max_ps` -* [ ] `_mm_and_ps` -* [ ] `_mm_andnot_ps` -* [ ] `_mm_or_ps` -* [ ] `_mm_xor_ps` -* [ ] `_mm_cmpeq_ss` -* [ ] `_mm_cmpeq_ps` -* [ ] `_mm_cmplt_ss` -* [ ] `_mm_cmplt_ps` -* [ ] `_mm_cmple_ss` -* [ ] `_mm_cmple_ps` -* [ ] `_mm_cmpgt_ss` -* [ ] `_mm_cmpgt_ps` -* [ ] `_mm_cmpge_ss` -* [ ] `_mm_cmpge_ps` -* [ ] `_mm_cmpneq_ss` -* [ ] `_mm_cmpneq_ps` -* [ ] `_mm_cmpnlt_ss` -* [ ] `_mm_cmpnlt_ps` -* [ ] `_mm_cmpnle_ss` -* [ ] `_mm_cmpnle_ps` -* [ ] `_mm_cmpngt_ss` -* [ ] `_mm_cmpngt_ps` -* [ ] `_mm_cmpnge_ss` -* [ ] `_mm_cmpnge_ps` -* [ ] `_mm_cmpord_ss` -* [ ] `_mm_cmpord_ps` -* [ ] `_mm_cmpunord_ss` -* [ ] `_mm_cmpunord_ps` -* [ ] `_mm_comieq_ss` -* [ ] `_mm_comilt_ss` -* [ ] `_mm_comile_ss` -* [ ] `_mm_comigt_ss` -* [ ] `_mm_comige_ss` -* [ ] `_mm_comineq_ss` -* [ ] `_mm_ucomieq_ss` -* [ ] `_mm_ucomilt_ss` -* [ ] `_mm_ucomile_ss` -* [ ] `_mm_ucomigt_ss` -* [ ] `_mm_ucomige_ss` -* [ ] `_mm_ucomineq_ss` -* [ ] `_mm_cvtss_si32` -* [ ] `_mm_cvt_ss2si` -* [ ] `_mm_cvtss_si64` -* [ ] `_mm_cvtss_f32` -* [ ] `_mm_cvtps_pi32` -* [ ] `_mm_cvt_ps2pi` -* [ ] `_mm_cvttss_si32` -* [ ] `_mm_cvtt_ss2si` -* [ ] `_mm_cvttss_si64` -* [ ] `_mm_cvttps_pi32` -* [ ] `_mm_cvtt_ps2pi` -* [ ] `_mm_cvtps_pi16` -* [ ] `_mm_cvtps_pi8` -* [ ] `_mm_set_ss` -* [ ] `_mm_set1_ps` -* [ ] `_mm_set_ps1` -* [ ] `_mm_set_ps` -* [ ] `_mm_setr_ps` -* [ ] `_mm_setzero_ps` -* [ ] `_mm_loadh_pi` -* [ ] `_mm_loadl_pi` -* [ ] `_mm_load_ss` -* [ ] `_mm_load1_ps` -* [ ] `_mm_load_ps1` -* [ ] `_mm_load_ps` -* [ ] `_mm_loadu_ps` -* [ ] `_mm_loadr_ps` -* [ ] `_mm_stream_ps` -* [ ] `_mm_storeh_pi` -* [ ] `_mm_storel_pi` -* [ ] `_mm_store_ss` -* [ ] `_mm_store1_ps` -* [ ] `_mm_store_ps1` -* [ ] `_mm_store_ps` -* [ ] `_mm_storeu_ps` -* [ ] `_mm_storer_ps` -* [ ] `_mm_move_ss` -* [x] `_mm_shuffle_ps` -* [x] `_mm_unpackhi_ps` -* [x] `_mm_unpacklo_ps` -* [x] `_mm_movehl_ps` -* [x] `_mm_movelh_ps` -* [x] `_mm_movemask_ps` -* [ ] `_mm_undefined_ps` - - -sse2 ----- -* [x] `_mm_pause` -* [x] `_mm_clflush` -* [x] `_mm_lfence` -* [x] `_mm_mfence` -* [x] `_mm_add_epi8` -* [x] `_mm_add_epi16` -* [x] `_mm_add_epi32` -* [ ] `_mm_add_si64` -* [x] `_mm_add_epi64` -* [x] `_mm_adds_epi8` -* [x] `_mm_adds_epi16` -* [x] `_mm_adds_epu8` -* [x] `_mm_adds_epu16` -* [x] `_mm_avg_epu8` -* [x] `_mm_avg_epu16` -* [x] `_mm_madd_epi16` -* [x] `_mm_max_epi16` -* [x] `_mm_max_epu8` -* [x] `_mm_min_epi16` -* [x] `_mm_min_epu8` -* [x] `_mm_mulhi_epi16` -* [x] `_mm_mulhi_epu16` -* [x] `_mm_mullo_epi16` -* [ ] `_mm_mul_su32` -* [x] `_mm_mul_epu32` -* [x] `_mm_sad_epu8` -* [x] `_mm_sub_epi8` -* [x] `_mm_sub_epi16` -* [x] `_mm_sub_epi32` -* [ ] `_mm_sub_si64` -* [x] `_mm_sub_epi64` -* [x] `_mm_subs_epi8` -* [x] `_mm_subs_epi16` -* [x] `_mm_subs_epu8` -* [x] `_mm_subs_epu16` -* [x] `_mm_slli_si128` -* [x] `_mm_bslli_si128` -* [x] `_mm_bsrli_si128` -* [x] `_mm_slli_epi16` -* [x] `_mm_sll_epi16` -* [x] `_mm_slli_epi32` -* [x] `_mm_sll_epi32` -* [x] `_mm_slli_epi64` -* [x] `_mm_sll_epi64` -* [x] `_mm_srai_epi16` -* [x] `_mm_sra_epi16` -* [x] `_mm_srai_epi32` -* [x] `_mm_sra_epi32` -* [x] `_mm_srli_si128` -* [x] `_mm_srli_epi16` -* [x] `_mm_srl_epi16` -* [x] `_mm_srli_epi32` -* [x] `_mm_srl_epi32` -* [x] `_mm_srli_epi64` -* [x] `_mm_srl_epi64` -* [x] `_mm_and_si128` -* [x] `_mm_andnot_si128` -* [x] `_mm_or_si128` -* [x] `_mm_xor_si128` -* [x] `_mm_cmpeq_epi8` -* [x] `_mm_cmpeq_epi16` -* [x] `_mm_cmpeq_epi32` -* [x] `_mm_cmpgt_epi8` -* [x] `_mm_cmpgt_epi16` -* [x] `_mm_cmpgt_epi32` -* [x] `_mm_cmplt_epi8` -* [x] `_mm_cmplt_epi16` -* [x] `_mm_cmplt_epi32` -* [x] `_mm_cvtepi32_pd` -* [x] `_mm_cvtsi32_sd` -* [x] `_mm_cvtsi64_sd` -* [x] `_mm_cvtsi64x_sd` -* [x] `_mm_cvtepi32_ps` -* [ ] `_mm_cvtpi32_pd` -* [x] `_mm_cvtsi32_si128` -* [x] `_mm_cvtsi64_si128` -* [x] `_mm_cvtsi64x_si128` -* [x] `_mm_cvtsi128_si32` -* [x] `_mm_cvtsi128_si64` -* [x] `_mm_cvtsi128_si64x` -* [ ] `_mm_set_epi64` -* [x] `_mm_set_epi64x` -* [x] `_mm_set_epi32` -* [x] `_mm_set_epi16` -* [x] `_mm_set_epi8` -* [ ] `_mm_set1_epi64` -* [x] `_mm_set1_epi64x` -* [x] `_mm_set1_epi32` -* [x] `_mm_set1_epi16` -* [x] `_mm_set1_epi8` -* [ ] `_mm_setr_epi64` -* [x] `_mm_setr_epi32` -* [x] `_mm_setr_epi16` -* [x] `_mm_setr_epi8` -* [x] `_mm_setzero_si128` -* [x] `_mm_loadl_epi64` -* [x] `_mm_load_si128` -* [x] `_mm_loadu_si128` -* [x] `_mm_maskmoveu_si128` -* [x] `_mm_store_si128` -* [x] `_mm_storeu_si128` -* [x] `_mm_storel_epi64` -* [ ] `_mm_stream_si128` -* [ ] `_mm_stream_si32` -* [ ] `_mm_stream_si64` -* [ ] `_mm_movepi64_pi64` -* [ ] `_mm_movpi64_epi64` -* [x] `_mm_move_epi64` -* [x] `_mm_packs_epi16` -* [x] `_mm_packs_epi32` -* [x] `_mm_packus_epi16` -* [x] `_mm_extract_epi16` -* [x] `_mm_insert_epi16` -* [x] `_mm_movemask_epi8` -* [x] `_mm_shuffle_epi32` -* [x] `_mm_shufflehi_epi16` -* [x] `_mm_shufflelo_epi16` -* [x] `_mm_unpackhi_epi8` -* [x] `_mm_unpackhi_epi16` -* [x] `_mm_unpackhi_epi32` -* [x] `_mm_unpackhi_epi64` -* [x] `_mm_unpacklo_epi8` -* [x] `_mm_unpacklo_epi16` -* [x] `_mm_unpacklo_epi32` -* [x] `_mm_unpacklo_epi64` -* [x] `_mm_add_sd` -* [x] `_mm_add_pd` -* [x] `_mm_div_sd` -* [x] `_mm_div_pd` -* [x] `_mm_max_sd` -* [x] `_mm_max_pd` -* [x] `_mm_min_sd` -* [x] `_mm_min_pd` -* [x] `_mm_mul_sd` -* [x] `_mm_mul_pd` -* [x] `_mm_sqrt_sd` -* [x] `_mm_sqrt_pd` -* [x] `_mm_sub_sd` -* [x] `_mm_sub_pd` -* [x] `_mm_and_pd` -* [x] `_mm_andnot_pd` -* [x] `_mm_or_pd` -* [x] `_mm_xor_pd` -* [x] `_mm_cmpeq_sd` -* [x] `_mm_cmplt_sd` -* [x] `_mm_cmple_sd` -* [x] `_mm_cmpgt_sd` -* [x] `_mm_cmpge_sd` -* [x] `_mm_cmpord_sd` -* [x] `_mm_cmpunord_sd` -* [x] `_mm_cmpneq_sd` -* [x] `_mm_cmpnlt_sd` -* [x] `_mm_cmpnle_sd` -* [x] `_mm_cmpngt_sd` -* [x] `_mm_cmpnge_sd` -* [x] `_mm_cmpeq_pd` -* [x] `_mm_cmplt_pd` -* [x] `_mm_cmple_pd` -* [x] `_mm_cmpgt_pd` -* [x] `_mm_cmpge_pd` -* [x] `_mm_cmpord_pd` -* [x] `_mm_cmpunord_pd` -* [x] `_mm_cmpneq_pd` -* [x] `_mm_cmpnlt_pd` -* [x] `_mm_cmpnle_pd` -* [x] `_mm_cmpngt_pd` -* [x] `_mm_cmpnge_pd` -* [x] `_mm_comieq_sd` -* [x] `_mm_comilt_sd` -* [x] `_mm_comile_sd` -* [x] `_mm_comigt_sd` -* [x] `_mm_comige_sd` -* [x] `_mm_comineq_sd` -* [x] `_mm_ucomieq_sd` -* [x] `_mm_ucomilt_sd` -* [x] `_mm_ucomile_sd` -* [x] `_mm_ucomigt_sd` -* [x] `_mm_ucomige_sd` -* [x] `_mm_ucomineq_sd` -* [ ] `_mm_cvtpd_ps` -* [ ] `_mm_cvtps_pd` -* [ ] `_mm_cvtpd_epi32` -* [ ] `_mm_cvtsd_si32` -* [ ] `_mm_cvtsd_si64` -* [ ] `_mm_cvtsd_si64x` -* [ ] `_mm_cvtsd_ss` -* [ ] `_mm_cvtsd_f64` -* [ ] `_mm_cvtss_sd` -* [ ] `_mm_cvttpd_epi32` -* [ ] `_mm_cvttsd_si32` -* [ ] `_mm_cvttsd_si64` -* [ ] `_mm_cvttsd_si64x` -* [ ] `_mm_cvtps_epi32` -* [ ] `_mm_cvttps_epi32` -* [ ] `_mm_cvtpd_pi32` -* [ ] `_mm_cvttpd_pi32` -* [ ] `_mm_set_sd` -* [ ] `_mm_set1_pd` -* [ ] `_mm_set_pd1` -* [ ] `_mm_set_pd` -* [ ] `_mm_setr_pd` -* [ ] `_mm_setzero_pd` -* [ ] `_mm_load_pd` -* [ ] `_mm_load1_pd` -* [ ] `_mm_load_pd1` -* [ ] `_mm_loadr_pd` -* [ ] `_mm_loadu_pd` -* [ ] `_mm_load_sd` -* [ ] `_mm_loadh_pd` -* [ ] `_mm_loadl_pd` -* [ ] `_mm_stream_pd` -* [ ] `_mm_store_sd` -* [ ] `_mm_store1_pd` -* [ ] `_mm_store_pd1` -* [ ] `_mm_store_pd` -* [ ] `_mm_storeu_pd` -* [ ] `_mm_storer_pd` -* [ ] `_mm_storeh_pd` -* [ ] `_mm_storel_pd` -* [ ] `_mm_unpackhi_pd` -* [ ] `_mm_unpacklo_pd` -* [x] `_mm_movemask_pd` -* [ ] `_mm_shuffle_pd` -* [ ] `_mm_move_sd` -* [ ] `_mm_castpd_ps` -* [ ] `_mm_castpd_si128` -* [ ] `_mm_castps_pd` -* [ ] `_mm_castps_si128` -* [ ] `_mm_castsi128_pd` -* [ ] `_mm_castsi128_ps` -* [ ] `_mm_undefined_pd` -* [ ] `_mm_undefined_si128` - - -sse3 ----- -* [ ] `_mm_addsub_ps` -* [ ] `_mm_addsub_pd` -* [ ] `_mm_hadd_pd` -* [ ] `_mm_hadd_ps` -* [ ] `_mm_hsub_pd` -* [ ] `_mm_hsub_ps` -* [ ] `_mm_lddqu_si128` -* [ ] `_mm_movedup_pd` -* [ ] `_mm_loaddup_pd` -* [ ] `_mm_movehdup_ps` -* [ ] `_mm_moveldup_ps` - - -ssse3 ------ -* [ ] `_mm_abs_pi8` -* [x] `_mm_abs_epi8` -* [ ] `_mm_abs_pi16` -* [ ] `_mm_abs_epi16` -* [ ] `_mm_abs_pi32` -* [ ] `_mm_abs_epi32` -* [x] `_mm_shuffle_epi8` -* [ ] `_mm_shuffle_pi8` -* [ ] `_mm_alignr_epi8` -* [ ] `_mm_alignr_pi8` -* [ ] `_mm_hadd_epi16` -* [ ] `_mm_hadds_epi16` -* [ ] `_mm_hadd_epi32` -* [ ] `_mm_hadd_pi16` -* [ ] `_mm_hadd_pi32` -* [ ] `_mm_hadds_pi16` -* [ ] `_mm_hsub_epi16` -* [ ] `_mm_hsubs_epi16` -* [ ] `_mm_hsub_epi32` -* [ ] `_mm_hsub_pi16` -* [ ] `_mm_hsub_pi32` -* [ ] `_mm_hsubs_pi16` -* [ ] `_mm_maddubs_epi16` -* [ ] `_mm_maddubs_pi16` -* [ ] `_mm_mulhrs_epi16` -* [ ] `_mm_mulhrs_pi16` -* [ ] `_mm_sign_epi8` -* [ ] `_mm_sign_epi16` -* [ ] `_mm_sign_epi32` -* [ ] `_mm_sign_pi8` -* [ ] `_mm_sign_pi16` -* [ ] `_mm_sign_pi32` - - -sse4.1 ------- -* [ ] `_mm_blend_pd` -* [ ] `_mm_blend_ps` -* [ ] `_mm_blendv_pd` -* [ ] `_mm_blendv_ps` -* [x] `_mm_blendv_epi8` -* [ ] `_mm_blend_epi16` -* [x] `_mm_dp_pd` -* [x] `_mm_dp_ps` -* [ ] `_mm_extract_ps` -* [ ] `_mm_extract_epi8` -* [ ] `_mm_extract_epi32` -* [ ] `_mm_extract_epi64` -* [ ] `_mm_insert_ps` -* [ ] `_mm_insert_epi8` -* [ ] `_mm_insert_epi32` -* [ ] `_mm_insert_epi64` -* [ ] `_mm_max_epi8` -* [ ] `_mm_max_epi32` -* [ ] `_mm_max_epu32` -* [ ] `_mm_max_epu16` -* [ ] `_mm_min_epi8` -* [ ] `_mm_min_epi32` -* [ ] `_mm_min_epu32` -* [ ] `_mm_min_epu16` -* [ ] `_mm_packus_epi32` -* [ ] `_mm_cmpeq_epi64` -* [ ] `_mm_cvtepi8_epi16` -* [ ] `_mm_cvtepi8_epi32` -* [ ] `_mm_cvtepi8_epi64` -* [ ] `_mm_cvtepi16_epi32` -* [ ] `_mm_cvtepi16_epi64` -* [ ] `_mm_cvtepi32_epi64` -* [ ] `_mm_cvtepu8_epi16` -* [ ] `_mm_cvtepu8_epi32` -* [ ] `_mm_cvtepu8_epi64` -* [ ] `_mm_cvtepu16_epi32` -* [ ] `_mm_cvtepu16_epi64` -* [ ] `_mm_cvtepu32_epi64` -* [ ] `_mm_mul_epi32` -* [ ] `_mm_mullo_epi32` -* [ ] `_mm_testz_si128` -* [ ] `_mm_testc_si128` -* [ ] `_mm_testnzc_si128` -* [ ] `_mm_test_all_zeros` -* [ ] `_mm_test_mix_ones_zeros` -* [ ] `_mm_test_all_ones` -* [ ] `_mm_round_pd` -* [ ] `_mm_floor_pd` -* [ ] `_mm_ceil_pd` -* [ ] `_mm_round_ps` -* [ ] `_mm_floor_ps` -* [ ] `_mm_ceil_ps` -* [ ] `_mm_round_sd` -* [ ] `_mm_floor_sd` -* [ ] `_mm_ceil_sd` -* [ ] `_mm_round_ss` -* [ ] `_mm_floor_ss` -* [ ] `_mm_ceil_ss` -* [ ] `_mm_minpos_epu16` -* [ ] `_mm_mpsadbw_epu8` -* [ ] `_mm_stream_load_si128` - - -sse4.2 ------- -* [ ] `_mm_cmpistrm` -* [ ] `_mm_cmpistri` -* [ ] `_mm_cmpistrz` -* [ ] `_mm_cmpistrc` -* [ ] `_mm_cmpistrs` -* [ ] `_mm_cmpistro` -* [ ] `_mm_cmpistra` -* [ ] `_mm_cmpestrm` -* [ ] `_mm_cmpestri` -* [ ] `_mm_cmpestrz` -* [ ] `_mm_cmpestrc` -* [ ] `_mm_cmpestrs` -* [ ] `_mm_cmpestro` -* [ ] `_mm_cmpestra` -* [ ] `_mm_cmpgt_epi64` -* [ ] `_mm_crc32_u8` -* [ ] `_mm_crc32_u16` -* [ ] `_mm_crc32_u32` -* [ ] `_mm_crc32_u64` - - -avx ---- -* [x] `_mm256_add_pd` -* [x] `_mm256_add_ps` -* [x] `_mm256_addsub_pd` -* [ ] `_mm256_addsub_ps` -* [ ] `_mm256_and_pd` -* [ ] `_mm256_and_ps` -* [ ] `_mm256_andnot_pd` -* [ ] `_mm256_andnot_ps` -* [ ] `_mm256_blend_pd` -* [ ] `_mm256_blend_ps` -* [ ] `_mm256_blendv_pd` -* [ ] `_mm256_blendv_ps` -* [ ] `_mm256_div_pd` -* [ ] `_mm256_div_ps` -* [ ] `_mm256_dp_ps` -* [ ] `_mm256_hadd_pd` -* [ ] `_mm256_hadd_ps` -* [ ] `_mm256_hsub_pd` -* [ ] `_mm256_hsub_ps` -* [ ] `_mm256_max_pd` -* [ ] `_mm256_max_ps` -* [ ] `_mm256_min_pd` -* [ ] `_mm256_min_ps` -* [ ] `_mm256_mul_pd` -* [ ] `_mm256_mul_ps` -* [ ] `_mm256_or_pd` -* [ ] `_mm256_or_ps` -* [ ] `_mm256_shuffle_pd` -* [ ] `_mm256_shuffle_ps` -* [ ] `_mm256_sub_pd` -* [ ] `_mm256_sub_ps` -* [ ] `_mm256_xor_pd` -* [ ] `_mm256_xor_ps` -* [ ] `_mm_cmp_pd` -* [ ] `_mm256_cmp_pd` -* [ ] `_mm_cmp_ps` -* [ ] `_mm256_cmp_ps` -* [ ] `_mm_cmp_sd` -* [ ] `_mm_cmp_ss` -* [ ] `_mm256_cvtepi32_pd` -* [ ] `_mm256_cvtepi32_ps` -* [ ] `_mm256_cvtpd_ps` -* [ ] `_mm256_cvtps_epi32` -* [ ] `_mm256_cvtps_pd` -* [ ] `_mm256_cvttpd_epi32` -* [ ] `_mm256_cvtpd_epi32` -* [ ] `_mm256_cvttps_epi32` -* [ ] `_mm256_extractf128_ps` -* [ ] `_mm256_extractf128_pd` -* [ ] `_mm256_extractf128_si256` -* [ ] `_mm256_extract_epi8` -* [ ] `_mm256_extract_epi16` -* [ ] `_mm256_extract_epi32` -* [ ] `_mm256_extract_epi64` -* [ ] `_mm256_zeroall` -* [ ] `_mm256_zeroupper` -* [ ] `_mm256_permutevar_ps` -* [ ] `_mm_permutevar_ps` -* [ ] `_mm256_permute_ps` -* [ ] `_mm_permute_ps` -* [ ] `_mm256_permutevar_pd` -* [ ] `_mm_permutevar_pd` -* [ ] `_mm256_permute_pd` -* [ ] `_mm_permute_pd` -* [ ] `_mm256_permute2f128_ps` -* [ ] `_mm256_permute2f128_pd` -* [ ] `_mm256_permute2f128_si256` -* [ ] `_mm256_broadcast_ss` -* [ ] `_mm_broadcast_ss` -* [ ] `_mm256_broadcast_sd` -* [ ] `_mm256_broadcast_ps` -* [ ] `_mm256_broadcast_pd` -* [ ] `_mm256_insertf128_ps` -* [ ] `_mm256_insertf128_pd` -* [ ] `_mm256_insertf128_si256` -* [ ] `_mm256_insert_epi8` -* [ ] `_mm256_insert_epi16` -* [ ] `_mm256_insert_epi32` -* [ ] `_mm256_insert_epi64` -* [ ] `_mm256_load_pd` -* [ ] `_mm256_store_pd` -* [ ] `_mm256_load_ps` -* [ ] `_mm256_store_ps` -* [ ] `_mm256_loadu_pd` -* [ ] `_mm256_storeu_pd` -* [ ] `_mm256_loadu_ps` -* [ ] `_mm256_storeu_ps` -* [ ] `_mm256_load_si256` -* [ ] `_mm256_store_si256` -* [ ] `_mm256_loadu_si256` -* [ ] `_mm256_storeu_si256` -* [ ] `_mm256_maskload_pd` -* [ ] `_mm256_maskstore_pd` -* [ ] `_mm_maskload_pd` -* [ ] `_mm_maskstore_pd` -* [ ] `_mm256_maskload_ps` -* [ ] `_mm256_maskstore_ps` -* [ ] `_mm_maskload_ps` -* [ ] `_mm_maskstore_ps` -* [ ] `_mm256_movehdup_ps` -* [ ] `_mm256_moveldup_ps` -* [ ] `_mm256_movedup_pd` -* [ ] `_mm256_lddqu_si256` -* [ ] `_mm256_stream_si256` -* [ ] `_mm256_stream_pd` -* [ ] `_mm256_stream_ps` -* [ ] `_mm256_rcp_ps` -* [ ] `_mm256_rsqrt_ps` -* [ ] `_mm256_sqrt_pd` -* [ ] `_mm256_sqrt_ps` -* [ ] `_mm256_round_pd` -* [ ] `_mm256_round_ps` -* [ ] `_mm256_unpackhi_pd` -* [ ] `_mm256_unpackhi_ps` -* [ ] `_mm256_unpacklo_pd` -* [ ] `_mm256_unpacklo_ps` -* [ ] `_mm256_testz_si256` -* [ ] `_mm256_testc_si256` -* [ ] `_mm256_testnzc_si256` -* [ ] `_mm256_testz_pd` -* [ ] `_mm256_testc_pd` -* [ ] `_mm256_testnzc_pd` -* [ ] `_mm_testz_pd` -* [ ] `_mm_testc_pd` -* [ ] `_mm_testnzc_pd` -* [ ] `_mm256_testz_ps` -* [ ] `_mm256_testc_ps` -* [ ] `_mm256_testnzc_ps` -* [ ] `_mm_testz_ps` -* [ ] `_mm_testc_ps` -* [ ] `_mm_testnzc_ps` -* [ ] `_mm256_movemask_pd` -* [ ] `_mm256_movemask_ps` -* [ ] `_mm256_setzero_pd` -* [ ] `_mm256_setzero_ps` -* [ ] `_mm256_setzero_si256` -* [ ] `_mm256_set_pd` -* [ ] `_mm256_set_ps` -* [ ] `_mm256_set_epi8` -* [ ] `_mm256_set_epi16` -* [ ] `_mm256_set_epi32` -* [ ] `_mm256_set_epi64x` -* [ ] `_mm256_setr_pd` -* [ ] `_mm256_setr_ps` -* [ ] `_mm256_setr_epi8` -* [ ] `_mm256_setr_epi16` -* [ ] `_mm256_setr_epi32` -* [ ] `_mm256_setr_epi64x` -* [ ] `_mm256_set1_pd` -* [ ] `_mm256_set1_ps` -* [ ] `_mm256_set1_epi8` -* [ ] `_mm256_set1_epi16` -* [ ] `_mm256_set1_epi32` -* [ ] `_mm256_set1_epi64x` -* [ ] `_mm256_castpd_ps` -* [ ] `_mm256_castps_pd` -* [ ] `_mm256_castps_si256` -* [ ] `_mm256_castpd_si256` -* [ ] `_mm256_castsi256_ps` -* [ ] `_mm256_castsi256_pd` -* [ ] `_mm256_castps256_ps128` -* [ ] `_mm256_castpd256_pd128` -* [ ] `_mm256_castsi256_si128` -* [ ] `_mm256_castps128_ps256` -* [ ] `_mm256_castpd128_pd256` -* [ ] `_mm256_castsi128_si256` -* [ ] `_mm256_zextps128_ps256` -* [ ] `_mm256_zextpd128_pd256` -* [ ] `_mm256_zextsi128_si256` -* [ ] `_mm256_floor_ps` -* [ ] `_mm256_ceil_ps` -* [ ] `_mm256_floor_pd` -* [ ] `_mm256_ceil_pd` -* [ ] `_mm256_undefined_ps` -* [ ] `_mm256_undefined_pd` -* [ ] `_mm256_undefined_si256` -* [ ] `_mm256_set_m128` -* [ ] `_mm256_set_m128d` -* [ ] `_mm256_set_m128i` -* [ ] `_mm256_setr_m128` -* [ ] `_mm256_setr_m128d` -* [ ] `_mm256_setr_m128i` -* [ ] `_mm256_loadu2_m128` -* [ ] `_mm256_loadu2_m128d` -* [ ] `_mm256_loadu2_m128i` -* [ ] `_mm256_storeu2_m128` -* [ ] `_mm256_storeu2_m128d` -* [ ] `_mm256_storeu2_m128i` - - - -avx2 ----- -* [x] `_mm256_abs_epi8` -* [x] `_mm256_abs_epi16` -* [x] `_mm256_abs_epi32` -* [x] `_mm256_add_epi8` -* [x] `_mm256_add_epi16` -* [x] `_mm256_add_epi32` -* [x] `_mm256_add_epi64` -* [x] `_mm256_adds_epi8` -* [x] `_mm256_adds_epi16` -* [x] `_mm256_adds_epu8` -* [x] `_mm256_adds_epu16` -* [ ] `_mm256_alignr_epi8` -* [x] `_mm256_and_si256` -* [x] `_mm256_andnot_si256` -* [x] `_mm256_avg_epu8` -* [x] `_mm256_avg_epu16` -* [ ] `_mm256_blend_epi16` -* [ ] `_mm_blend_epi32` -* [ ] `_mm256_blend_epi32` -* [x] `_mm256_blendv_epi8` -* [ ] `_mm_broadcastb_epi8` -* [ ] `_mm256_broadcastb_epi8` -* [ ] `_mm_broadcastd_epi32` -* [ ] `_mm256_broadcastd_epi32` -* [ ] `_mm_broadcastq_epi64` -* [ ] `_mm256_broadcastq_epi64` -* [ ] `_mm_broadcastsd_pd` -* [ ] `_mm256_broadcastsd_pd` -* [ ] `_mm_broadcastsi128_si256` -* [ ] `_mm256_broadcastsi128_si256` -* [ ] `_mm_broadcastss_ps` -* [ ] `_mm256_broadcastss_ps` -* [ ] `_mm_broadcastw_epi16` -* [ ] `_mm256_broadcastw_epi16` -* [x] `_mm256_cmpeq_epi8` -* [x] `_mm256_cmpeq_epi16` -* [x] `_mm256_cmpeq_epi32` -* [x] `_mm256_cmpeq_epi64` -* [x] `_mm256_cmpgt_epi8` -* [x] `_mm256_cmpgt_epi16` -* [x] `_mm256_cmpgt_epi32` -* [x] `_mm256_cmpgt_epi64` -* [ ] `_mm256_cvtepi16_epi32` -* [ ] `_mm256_cvtepi16_epi64` -* [ ] `_mm256_cvtepi32_epi64` -* [ ] `_mm256_cvtepi8_epi16` -* [ ] `_mm256_cvtepi8_epi32` -* [ ] `_mm256_cvtepi8_epi64` -* [ ] `_mm256_cvtepu16_epi32` -* [ ] `_mm256_cvtepu16_epi64` -* [ ] `_mm256_cvtepu32_epi64` -* [ ] `_mm256_cvtepu8_epi16` -* [ ] `_mm256_cvtepu8_epi32` -* [ ] `_mm256_cvtepu8_epi64` -* [ ] `_mm256_extracti128_si256` -* [x] `_mm256_hadd_epi16` -* [x] `_mm256_hadd_epi32` -* [x] `_mm256_hadds_epi16` -* [x] `_mm256_hsub_epi16` -* [x] `_mm256_hsub_epi32` -* [x] `_mm256_hsubs_epi16` -* [ ] `_mm_i32gather_pd` -* [ ] `_mm256_i32gather_pd` -* [ ] `_mm_i32gather_ps` -* [ ] `_mm256_i32gather_ps` -* [ ] `_mm_i32gather_epi32` -* [ ] `_mm256_i32gather_epi32` -* [ ] `_mm_i32gather_epi64` -* [ ] `_mm256_i32gather_epi64` -* [ ] `_mm_i64gather_pd` -* [ ] `_mm256_i64gather_pd` -* [ ] `_mm_i64gather_ps` -* [ ] `_mm256_i64gather_ps` -* [ ] `_mm_i64gather_epi32` -* [ ] `_mm256_i64gather_epi32` -* [ ] `_mm_i64gather_epi64` -* [ ] `_mm256_i64gather_epi64` -* [ ] `_mm256_inserti128_si256` -* [x] `_mm256_madd_epi16` -* [x] `_mm256_maddubs_epi16` -* [ ] `_mm_mask_i32gather_pd` -* [ ] `_mm256_mask_i32gather_pd` -* [ ] `_mm_mask_i32gather_ps` -* [ ] `_mm256_mask_i32gather_ps` -* [ ] `_mm_mask_i32gather_epi32` -* [ ] `_mm256_mask_i32gather_epi32` -* [ ] `_mm_mask_i32gather_epi64` -* [ ] `_mm256_mask_i32gather_epi64` -* [ ] `_mm_mask_i64gather_pd` -* [ ] `_mm256_mask_i64gather_pd` -* [ ] `_mm_mask_i64gather_ps` -* [ ] `_mm256_mask_i64gather_ps` -* [ ] `_mm_mask_i64gather_epi32` -* [ ] `_mm256_mask_i64gather_epi32` -* [ ] `_mm_mask_i64gather_epi64` -* [ ] `_mm256_mask_i64gather_epi64` -* [ ] `_mm_maskload_epi32` -* [ ] `_mm256_maskload_epi32` -* [ ] `_mm_maskload_epi64` -* [ ] `_mm256_maskload_epi64` -* [ ] `_mm_maskstore_epi32` -* [ ] `_mm256_maskstore_epi32` -* [ ] `_mm_maskstore_epi64` -* [ ] `_mm256_maskstore_epi64` -* [x] `_mm256_max_epi8` -* [x] `_mm256_max_epi16` -* [x] `_mm256_max_epi32` -* [x] `_mm256_max_epu8` -* [x] `_mm256_max_epu16` -* [x] `_mm256_max_epu32` -* [x] `_mm256_min_epi8` -* [x] `_mm256_min_epi16` -* [x] `_mm256_min_epi32` -* [x] `_mm256_min_epu8` -* [x] `_mm256_min_epu16` -* [x] `_mm256_min_epu32` -* [ ] `_mm256_movemask_epi8` -* [ ] `_mm256_mpsadbw_epu8` -* [x] `_mm256_mul_epi32` -* [x] `_mm256_mul_epu32` -* [x] `_mm256_mulhi_epi16` -* [x] `_mm256_mulhi_epu16` -* [x] `_mm256_mulhrs_epi16` -* [x] `_mm256_mullo_epi16` -* [x] `_mm256_mullo_epi32` -* [x] `_mm256_or_si256` -* [x] `_mm256_packs_epi16` -* [x] `_mm256_packs_epi32` -* [x] `_mm256_packus_epi16` -* [x] `_mm256_packus_epi32` -* [ ] `_mm256_permute2x128_si256` -* [ ] `_mm256_permute4x64_epi64` -* [ ] `_mm256_permute4x64_pd` -* [ ] `_mm256_permutevar8x32_epi32` -* [ ] `_mm256_permutevar8x32_ps` -* [x] `_mm256_sad_epu8` -* [ ] `_mm256_shuffle_epi32` -* [ ] `_mm256_shuffle_epi8` -* [ ] `_mm256_shufflehi_epi16` -* [ ] `_mm256_shufflelo_epi16` -* [x] `_mm256_sign_epi8` -* [x] `_mm256_sign_epi16` -* [x] `_mm256_sign_epi32` -* [ ] `_mm256_slli_si256` -* [ ] `_mm256_bslli_epi128` -* [x] `_mm256_sll_epi16` -* [x] `_mm256_slli_epi16` -* [x] `_mm256_sll_epi32` -* [x] `_mm256_slli_epi32` -* [x] `_mm256_sll_epi64` -* [x] `_mm256_slli_epi64` -* [x] `_mm_sllv_epi32` -* [x] `_mm256_sllv_epi32` -* [x] `_mm_sllv_epi64` -* [x] `_mm256_sllv_epi64` -* [x] `_mm256_sra_epi16` -* [x] `_mm256_srai_epi16` -* [x] `_mm256_sra_epi32` -* [x] `_mm256_srai_epi32` -* [x] `_mm_srav_epi32` -* [x] `_mm256_srav_epi32` -* [x] `_mm256_srli_si256` -* [ ] `_mm256_bsrli_epi128` -* [x] `_mm256_srl_epi16` -* [x] `_mm256_srli_epi16` -* [x] `_mm256_srl_epi32` -* [x] `_mm256_srli_epi32` -* [x] `_mm256_srl_epi64` -* [x] `_mm256_srli_epi64` -* [x] `_mm_srlv_epi32` -* [x] `_mm256_srlv_epi32` -* [x] `_mm_srlv_epi64` -* [x] `_mm256_srlv_epi64` -* [ ] `_mm256_stream_load_si256` -* [x] `_mm256_sub_epi8` -* [x] `_mm256_sub_epi16` -* [x] `_mm256_sub_epi32` -* [x] `_mm256_sub_epi64` -* [x] `_mm256_subs_epi8` -* [x] `_mm256_subs_epi16` -* [x] `_mm256_subs_epu8` -* [x] `_mm256_subs_epu16` -* [x] `_mm256_xor_si256` -* [ ] `_mm256_unpackhi_epi8` -* [ ] `_mm256_unpackhi_epi16` -* [ ] `_mm256_unpackhi_epi32` -* [ ] `_mm256_unpackhi_epi64` -* [ ] `_mm256_unpacklo_epi8` -* [ ] `_mm256_unpacklo_epi16` -* [ ] `_mm256_unpacklo_epi32` -* [ ] `_mm256_unpacklo_epi64` From 60fbf44231e22dd40d4be12ef85a961d075784d0 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 26 Sep 2017 15:55:36 -0700 Subject: [PATCH 7/7] Run bmi tests everywhere --- library/stdarch/src/x86/bmi.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/library/stdarch/src/x86/bmi.rs b/library/stdarch/src/x86/bmi.rs index 44842c82cdef..5bf3e8974704 100644 --- a/library/stdarch/src/x86/bmi.rs +++ b/library/stdarch/src/x86/bmi.rs @@ -183,7 +183,7 @@ pub fn _mm_tzcnt_u64(x: u64) -> u64 { x.trailing_zeros() as u64 } -#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(test)] mod tests { use stdsimd_test::simd_test; @@ -195,6 +195,7 @@ mod tests { } #[simd_test = "bmi"] + #[cfg(not(target_arch = "x86"))] fn _bextr_u64() { assert_eq!(bmi::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); } @@ -214,6 +215,7 @@ mod tests { } #[simd_test = "bmi"] + #[cfg(not(target_arch = "x86"))] fn _andn_u64() { assert_eq!(bmi::_andn_u64(0, 0), 0); assert_eq!(bmi::_andn_u64(0, 1), 1); @@ -233,6 +235,7 @@ mod tests { } #[simd_test = "bmi"] + #[cfg(not(target_arch = "x86"))] fn _blsi_u64() { assert_eq!(bmi::_blsi_u64(0b1101_0000u64), 0b0001_0000u64); } @@ -243,6 +246,7 @@ mod tests { } #[simd_test = "bmi"] + #[cfg(not(target_arch = "x86"))] fn _blsmsk_u64() { assert_eq!(bmi::_blsmsk_u64(0b0011_0000u64), 0b0001_1111u64); } @@ -254,6 +258,7 @@ mod tests { } #[simd_test = "bmi"] + #[cfg(not(target_arch = "x86"))] fn _blsr_u64() { /// TODO: test the behavior when the input is 0 assert_eq!(bmi::_blsr_u64(0b0011_0000u64), 0b0010_0000u64); @@ -274,6 +279,7 @@ mod tests { } #[simd_test = "bmi"] + #[cfg(not(target_arch = "x86"))] fn _tzcnt_u64() { assert_eq!(bmi::_tzcnt_u64(0b0000_0001u64), 0u64); assert_eq!(bmi::_tzcnt_u64(0b0000_0000u64), 64u64);