diff --git a/library/stdarch/src/x86/avx2.rs b/library/stdarch/src/x86/avx2.rs index f11192d87b58..9c9c75f360fc 100644 --- a/library/stdarch/src/x86/avx2.rs +++ b/library/stdarch/src/x86/avx2.rs @@ -2,9 +2,13 @@ use v256::*; use v128::*; use x86::__m256i; +#[cfg(test)] +use assert_instr::assert_instr; + /// Computes the absolute values of packed 32-bit integers in `a`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpabsd))] pub fn _mm256_abs_epi32(a: i32x8) -> i32x8 { unsafe { pabsd(a) } } @@ -12,6 +16,7 @@ pub fn _mm256_abs_epi32(a: i32x8) -> i32x8 { /// Computes the absolute values of packed 16-bit integers in `a`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpabsw))] pub fn _mm256_abs_epi16(a: i16x16) -> i16x16 { unsafe { pabsw(a) } } @@ -19,6 +24,7 @@ pub fn _mm256_abs_epi16(a: i16x16) -> i16x16 { /// Computes the absolute values of packed 8-bit integers in `a`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpabsb))] pub fn _mm256_abs_epi8(a: i8x32) -> i8x32 { unsafe { pabsb(a) } } @@ -26,6 +32,7 @@ pub fn _mm256_abs_epi8(a: i8x32) -> i8x32 { /// Add packed 64-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddq))] pub fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 { a + b } @@ -33,6 +40,7 @@ pub fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 { /// Add packed 32-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddd))] pub fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 { a + b } @@ -40,6 +48,7 @@ pub fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Add packed 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddw))] pub fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 { a + b } @@ -47,6 +56,7 @@ pub fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Add packed 8-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddb))] pub fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 { a + b } @@ -54,6 +64,7 @@ pub fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 { /// Add packed 8-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddsb))] pub fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { paddsb(a, b) } } @@ -61,6 +72,7 @@ pub fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 { /// Add packed 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddsw))] pub fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { paddsw(a, b) } } @@ -68,6 +80,7 @@ pub fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddusb))] pub fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 { unsafe { paddusb(a, b) } } @@ -75,6 +88,7 @@ pub fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 { /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpaddusw))] pub fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { paddusw(a, b) } } @@ -85,6 +99,7 @@ pub fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 { /// in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vandps))] pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { a & b } @@ -93,6 +108,7 @@ pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { /// in `a` and then AND with `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vandnps))] pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { (!a) & b } @@ -100,6 +116,7 @@ pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { /// Average packed unsigned 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpavgw))] pub fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 { unsafe { pavgw(a, b) } } @@ -107,6 +124,7 @@ pub fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 { /// Average packed unsigned 8-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpavgb))] pub fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 { unsafe { pavgb(a, b) } } @@ -118,6 +136,7 @@ pub fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 { /// Blend packed 8-bit integers from `a` and `b` using `mask`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpblendvb))] pub fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 { unsafe { pblendvb(a,b,mask) } } @@ -143,6 +162,7 @@ pub fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 { /// Compare packed 64-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpeqq))] pub fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 { a.eq(b) } @@ -150,6 +170,7 @@ pub fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 { /// Compare packed 32-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpeqd))] pub fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 { a.eq(b) } @@ -157,6 +178,7 @@ pub fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Compare packed 16-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpeqw))] pub fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 { a.eq(b) } @@ -164,6 +186,7 @@ pub fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Compare packed 8-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpeqb))] pub fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 { a.eq(b) } @@ -171,6 +194,7 @@ pub fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 { /// Compare packed 64-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpgtq))] pub fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 { a.gt(b) } @@ -178,6 +202,7 @@ pub fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 { /// Compare packed 32-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpgtd))] pub fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 { a.gt(b) } @@ -185,6 +210,7 @@ pub fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Compare packed 16-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpgtw))] pub fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 { a.gt(b) } @@ -192,6 +218,7 @@ pub fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Compare packed 8-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpcmpgtb))] pub fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 { a.gt(b) } @@ -213,6 +240,7 @@ pub fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 { /// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphaddw))] pub fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { phaddw(a, b) } } @@ -220,6 +248,7 @@ pub fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Horizontally add adjacent pairs of 32-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphaddd))] pub fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { phaddd(a, b) } } @@ -228,6 +257,7 @@ pub fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 { /// using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphaddsw))] pub fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { phaddsw(a, b) } } @@ -235,6 +265,7 @@ pub fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Horizontally substract adjacent pairs of 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphsubw))] pub fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { phsubw(a, b) } } @@ -242,6 +273,7 @@ pub fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Horizontally substract adjacent pairs of 32-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphsubd))] pub fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { phsubd(a, b) } } @@ -250,6 +282,7 @@ pub fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 { /// using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vphsubsw))] pub fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { phsubsw(a, b) } } @@ -294,6 +327,7 @@ pub fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 { /// of intermediate 32-bit integers. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaddwd))] pub fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 { unsafe { pmaddwd(a, b) } } @@ -304,6 +338,7 @@ pub fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 { /// signed 16-bit integers #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaddubsw))] pub fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 { unsafe { pmaddubsw(a, b) } } @@ -321,6 +356,7 @@ pub fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 { /// maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxsw))] pub fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { pmaxsw(a, b) } } @@ -329,6 +365,7 @@ pub fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 { /// maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxsd))] pub fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { pmaxsd(a, b) } } @@ -337,6 +374,7 @@ pub fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 { /// maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxsb))] pub fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { pmaxsb(a, b) } } @@ -345,6 +383,7 @@ pub fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 { /// the packed maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxuw))] pub fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { pmaxuw(a, b) } } @@ -353,6 +392,7 @@ pub fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 { /// the packed maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxud))] pub fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 { unsafe { pmaxud(a, b) } } @@ -361,6 +401,7 @@ pub fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 { /// the packed maximum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmaxub))] pub fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 { unsafe { pmaxub(a, b) } } @@ -369,6 +410,7 @@ pub fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 { /// minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminsw))] pub fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { pminsw(a, b) } } @@ -377,6 +419,7 @@ pub fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 { /// minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminsd))] pub fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { pminsd(a, b) } } @@ -385,6 +428,7 @@ pub fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 { /// minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminsb))] pub fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { pminsb(a, b) } } @@ -393,6 +437,7 @@ pub fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 { /// the packed minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminuw))] pub fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { pminuw(a, b) } } @@ -401,6 +446,7 @@ pub fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 { /// the packed minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminud))] pub fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 { unsafe { pminud(a, b) } } @@ -409,6 +455,7 @@ pub fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 { /// the packed minimum values. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpminub))] pub fn _mm256_min_epu8(a: u8x32, b: u8x32) -> u8x32 { unsafe { pminub(a, b) } } @@ -444,6 +491,7 @@ pub fn _mm256_mpsadbw_epu8(a: u8x32, b: u8x32, imm8: i32) -> u16x16 { /// Return the 64-bit results. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmuldq))] pub fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 { unsafe { pmuldq(a, b) } } @@ -454,6 +502,7 @@ pub fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 { /// Return the unsigned 64-bit results. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmuludq))] pub fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 { unsafe { pmuludq(a, b) } } @@ -463,6 +512,7 @@ pub fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 { /// intermediate integers. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmulhw))] pub fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { pmulhw(a, b) } } @@ -472,6 +522,7 @@ pub fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 { /// intermediate integers. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmulhuw))] pub fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { pmulhuw(a, b) } } @@ -481,6 +532,7 @@ pub fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 { /// intermediate integers #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmullw))] pub fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 { a * b } @@ -491,6 +543,7 @@ pub fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 { /// intermediate integers #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmulld))] pub fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 { a * b } @@ -501,6 +554,7 @@ pub fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 { /// return bits [16:1] #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpmulhrsw))] pub fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 { unsafe { pmulhrsw(a, b) } } @@ -509,6 +563,7 @@ pub fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 { /// and `b` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vorps))] pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { a | b } @@ -517,6 +572,7 @@ pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { /// using signed saturation #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpacksswb))] pub fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 { unsafe { packsswb(a, b) } } @@ -525,6 +581,7 @@ pub fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 { /// using signed saturation #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpackssdw))] pub fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 { unsafe { packssdw(a, b) } } @@ -533,6 +590,7 @@ pub fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 { /// using unsigned saturation #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpackuswb))] pub fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 { unsafe { packuswb(a, b) } } @@ -541,6 +599,7 @@ pub fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 { /// using unsigned saturation #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpackusdw))] pub fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 { unsafe { packusdw(a, b) } } @@ -557,6 +616,7 @@ pub fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 { /// integers in the low 16 bits of the 64-bit return value #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsadbw))] pub fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 { unsafe { psadbw(a, b) } } @@ -571,6 +631,7 @@ pub fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 { /// Results are zeroed out when the corresponding element in `b` is zero. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsignw))] pub fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { psignw(a, b) } } @@ -580,6 +641,7 @@ pub fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Results are zeroed out when the corresponding element in `b` is zero. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsignd))] pub fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 { unsafe { psignd(a, b) } } @@ -589,6 +651,7 @@ pub fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Results are zeroed out when the corresponding element in `b` is zero. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsignb))] pub fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { psignb(a, b) } } @@ -597,6 +660,7 @@ pub fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 { /// shifting in zeros, and return the result #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllw))] pub fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 { unsafe { psllw(a, count) } } @@ -605,6 +669,7 @@ pub fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 { /// shifting in zeros, and return the result #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpslld))] pub fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 { unsafe { pslld(a, count) } } @@ -613,6 +678,7 @@ pub fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 { /// shifting in zeros, and return the result #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllq))] pub fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 { unsafe { psllq(a, count) } } @@ -621,6 +687,7 @@ pub fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 { /// shifting in zeros, return the results; #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllw))] // TODO: should this be pslli pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 { unsafe { pslliw(a, imm8) } } @@ -629,6 +696,7 @@ pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 { /// shifting in zeros, return the results; #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpslld))] // TODO: should this be pslli pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 { unsafe { psllid(a, imm8) } } @@ -637,6 +705,7 @@ pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 { /// shifting in zeros, return the results; #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllq))] // TODO: should this be pslli pub fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 { unsafe { pslliq(a, imm8) } } @@ -648,6 +717,7 @@ pub fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 { /// shifting in zeros, and return the result. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllvd))] pub fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 { unsafe { psllvd(a, count) } } @@ -657,6 +727,7 @@ pub fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 { /// shifting in zeros, and return the result. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllvd))] pub fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 { unsafe { psllvd256(a, count) } } @@ -666,6 +737,7 @@ pub fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 { /// shifting in zeros, and return the result. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllvq))] pub fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 { unsafe { psllvq(a, count) } } @@ -675,6 +747,7 @@ pub fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 { /// shifting in zeros, and return the result. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsllvq))] pub fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 { unsafe { psllvq256(a, count) } } @@ -683,6 +756,7 @@ pub fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 { /// shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsraw))] pub fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 { unsafe { psraw(a, count) } } @@ -691,6 +765,7 @@ pub fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 { /// shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrad))] pub fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 { unsafe { psrad(a, count) } } @@ -699,6 +774,7 @@ pub fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 { /// shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsraw))] // TODO: notvpsraiw? pub fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 { unsafe { psraiw(a, imm8) } } @@ -707,6 +783,7 @@ pub fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 { /// shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrad))] // TODO: not vpsraid? pub fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 { unsafe { psraid(a, imm8) } } @@ -715,6 +792,7 @@ pub fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 { /// corresponding element in `count` while shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsravd))] pub fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 { unsafe { psravd(a, count) } } @@ -723,6 +801,7 @@ pub fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 { /// corresponding element in `count` while shifting in sign bits. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsravd))] pub fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 { unsafe { psravd256(a, count) } } @@ -732,6 +811,7 @@ pub fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 { /// zeros. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlw))] pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 { unsafe { psrlw(a, count) } } @@ -740,6 +820,7 @@ pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 { /// zeros. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrld))] pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 { unsafe { psrld(a, count) } } @@ -748,6 +829,7 @@ pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 { /// zeros. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlq))] pub fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 { unsafe { psrlq(a, count) } } @@ -756,6 +838,7 @@ pub fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 { /// zeros #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlw))] // TODO not vpsrliw? pub fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 { unsafe { psrliw(a, imm8) } } @@ -764,6 +847,7 @@ pub fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 { /// zeros #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrld))] // TODO: not vpsrlid? pub fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 { unsafe { psrlid(a, imm8) } } @@ -772,6 +856,7 @@ pub fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 { /// zeros #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlq))] // TODO: not vpsrliq? pub fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 { unsafe { psrliq(a, imm8) } } @@ -780,6 +865,7 @@ pub fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 { /// the corresponding element in `count` while shifting in zeros, #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlvd))] pub fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 { unsafe { psrlvd(a, count) } } @@ -788,6 +874,7 @@ pub fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 { /// the corresponding element in `count` while shifting in zeros, #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlvd))] pub fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 { unsafe { psrlvd256(a, count) } } @@ -796,6 +883,7 @@ pub fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 { /// the corresponding element in `count` while shifting in zeros, #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlvq))] pub fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 { unsafe { psrlvq(a, count) } } @@ -804,6 +892,7 @@ pub fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 { /// the corresponding element in `count` while shifting in zeros, #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsrlvq))] pub fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 { unsafe { psrlvq256(a, count) } } @@ -813,6 +902,7 @@ pub fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 { /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubw))] pub fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 { a - b } @@ -820,6 +910,7 @@ pub fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 { /// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubd))] pub fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 { a - b } @@ -827,6 +918,7 @@ pub fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 { /// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubq))] pub fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 { a - b } @@ -834,6 +926,7 @@ pub fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 { /// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubb))] pub fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 { a - b } @@ -842,6 +935,7 @@ pub fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 { /// `a` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubsw))] pub fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 { unsafe { psubsw(a, b) } } @@ -850,6 +944,7 @@ pub fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 { /// `a` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubsb))] pub fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 { unsafe { psubsb(a, b) } } @@ -858,6 +953,7 @@ pub fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 { /// integers in `a` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubusw))] pub fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 { unsafe { psubusw(a, b) } } @@ -866,6 +962,7 @@ pub fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 { /// integers in `a` using saturation. #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vpsubusb))] pub fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 { unsafe { psubusb(a, b) } } @@ -883,6 +980,7 @@ pub fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 { /// in `a` and `b` #[inline(always)] #[target_feature = "+avx2"] +#[cfg_attr(test, assert_instr(vxorps))] pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { a ^ b } diff --git a/library/stdarch/src/x86/sse.rs b/library/stdarch/src/x86/sse.rs index f6deae35ec0c..ef3a8ab58f4b 100644 --- a/library/stdarch/src/x86/sse.rs +++ b/library/stdarch/src/x86/sse.rs @@ -127,7 +127,7 @@ pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 { } /// Compare the first single-precision (32-bit) floating-point element of `a` -/// and `b`, and return the minimum value in the first element of the return +/// and `b`, and return the minimum value in the first element of the return /// value, the other elements are copied from `a`. #[inline(always)] #[target_feature = "+sse"] @@ -146,7 +146,7 @@ pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 { } /// Compare the first single-precision (32-bit) floating-point element of `a` -/// and `b`, and return the maximum value in the first element of the return +/// and `b`, and return the maximum value in the first element of the return /// value, the other elements are copied from `a`. #[inline(always)] #[target_feature = "+sse"] @@ -168,6 +168,7 @@ pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 { /// from the high half of `a` and `b`; #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(unpckhps))] pub fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 { unsafe { simd_shuffle4(a, b, [2, 6, 3, 7]) } } diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index a91695d9bc43..447bdd19d127 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -1,8 +1,12 @@ use v128::*; use x86::__m128i; +#[cfg(test)] +use assert_instr::assert_instr; + #[inline(always)] #[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pblendvb))] pub fn _mm_blendv_epi8( a: __m128i, b: __m128i, diff --git a/library/stdarch/src/x86/ssse3.rs b/library/stdarch/src/x86/ssse3.rs index 5729dbca1997..9adde849cd94 100644 --- a/library/stdarch/src/x86/ssse3.rs +++ b/library/stdarch/src/x86/ssse3.rs @@ -1,15 +1,17 @@ use v128::*; +#[cfg(test)] +use assert_instr::assert_instr; + /// Compute the absolute value of packed 8-bit signed integers in `a` and /// return the unsigned results. #[inline(always)] #[target_feature = "+ssse3"] +#[cfg_attr(test, assert_instr(pabsb))] pub fn _mm_abs_epi8(a: i8x16) -> u8x16 { unsafe { pabsb128(a) } } - - /// Shuffle bytes from `a` according to the content of `b`. /// /// The last 4 bits of each byte of `b` are used as addresses @@ -36,6 +38,7 @@ pub fn _mm_abs_epi8(a: i8x16) -> u8x16 { /// ``` #[inline(always)] #[target_feature = "+ssse3"] +#[cfg_attr(test, assert_instr(pshufb))] pub fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 { unsafe { pshufb128(a, b) } }