diff --git a/library/stdarch/src/x86/avx2.rs b/library/stdarch/src/x86/avx2.rs
index f11192d87b58..9c9c75f360fc 100644
--- a/library/stdarch/src/x86/avx2.rs
+++ b/library/stdarch/src/x86/avx2.rs
@@ -2,9 +2,13 @@ use v256::*;
 use v128::*;
 use x86::__m256i;
 
+#[cfg(test)]
+use assert_instr::assert_instr;
+
 /// Computes the absolute values of packed 32-bit integers in `a`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpabsd))]
 pub fn _mm256_abs_epi32(a: i32x8) -> i32x8 {
     unsafe { pabsd(a) }
 }
@@ -12,6 +16,7 @@ pub fn _mm256_abs_epi32(a: i32x8) -> i32x8 {
 /// Computes the absolute values of packed 16-bit integers in `a`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpabsw))]
 pub fn _mm256_abs_epi16(a: i16x16) -> i16x16 {
     unsafe { pabsw(a) }
 }
@@ -19,6 +24,7 @@ pub fn _mm256_abs_epi16(a: i16x16) -> i16x16 {
 /// Computes the absolute values of packed 8-bit integers in `a`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpabsb))]
 pub fn _mm256_abs_epi8(a: i8x32) -> i8x32 {
     unsafe { pabsb(a) }
 }
@@ -26,6 +32,7 @@ pub fn _mm256_abs_epi8(a: i8x32) -> i8x32 {
 /// Add packed 64-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpaddq))]
 pub fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 {
     a + b
 }
@@ -33,6 +40,7 @@ pub fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 {
 /// Add packed 32-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpaddd))]
 pub fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a + b
 }
@@ -40,6 +48,7 @@ pub fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 {
 /// Add packed 16-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpaddw))]
 pub fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a + b
 }
@@ -47,6 +56,7 @@ pub fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// Add packed 8-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpaddb))]
 pub fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 {
     a + b
 }
@@ -54,6 +64,7 @@ pub fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// Add packed 8-bit integers in `a` and `b` using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpaddsb))]
 pub fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 {
     unsafe { paddsb(a, b) }
 }
@@ -61,6 +72,7 @@ pub fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// Add packed 16-bit integers in `a` and `b` using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpaddsw))]
 pub fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { paddsw(a, b) }
 }
@@ -68,6 +80,7 @@ pub fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpaddusb))]
 pub fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 {
     unsafe { paddusb(a, b) }
 }
@@ -75,6 +88,7 @@ pub fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 {
 /// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpaddusw))]
 pub fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
     unsafe { paddusw(a, b) }
 }
@@ -85,6 +99,7 @@ pub fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
 /// in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vandps))]
 pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
     a & b
 }
@@ -93,6 +108,7 @@ pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
 /// in `a` and then AND with `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vandnps))]
 pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
     (!a) & b
 }
@@ -100,6 +116,7 @@ pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
 /// Average packed unsigned 16-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpavgw))]
 pub fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 {
     unsafe { pavgw(a, b) }
 }
@@ -107,6 +124,7 @@ pub fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 {
 /// Average packed unsigned 8-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpavgb))]
 pub fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 {
     unsafe { pavgb(a, b) }
 }
@@ -118,6 +136,7 @@ pub fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 {
 /// Blend packed 8-bit integers from `a` and `b` using `mask`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpblendvb))]
 pub fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 {
     unsafe { pblendvb(a,b,mask) }
 }
@@ -143,6 +162,7 @@ pub fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 {
 /// Compare packed 64-bit integers in `a` and `b` for equality.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpcmpeqq))]
 pub fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 {
     a.eq(b)
 }
@@ -150,6 +170,7 @@ pub fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 {
 /// Compare packed 32-bit integers in `a` and `b` for equality.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpcmpeqd))]
 pub fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a.eq(b)
 }
@@ -157,6 +178,7 @@ pub fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 {
 /// Compare packed 16-bit integers in `a` and `b` for equality.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpcmpeqw))]
 pub fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a.eq(b)
 }
@@ -164,6 +186,7 @@ pub fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// Compare packed 8-bit integers in `a` and `b` for equality.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpcmpeqb))]
 pub fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 {
     a.eq(b)
 }
@@ -171,6 +194,7 @@ pub fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// Compare packed 64-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpcmpgtq))]
 pub fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 {
     a.gt(b)
 }
@@ -178,6 +202,7 @@ pub fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 {
 /// Compare packed 32-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpcmpgtd))]
 pub fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a.gt(b)
 }
@@ -185,6 +210,7 @@ pub fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 {
 /// Compare packed 16-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpcmpgtw))]
 pub fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a.gt(b)
 }
@@ -192,6 +218,7 @@ pub fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// Compare packed 8-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpcmpgtb))]
 pub fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 {
     a.gt(b)
 }
@@ -213,6 +240,7 @@ pub fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vphaddw))]
 pub fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { phaddw(a, b) }
 }
@@ -220,6 +248,7 @@ pub fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// Horizontally add adjacent pairs of 32-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vphaddd))]
 pub fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 {
     unsafe { phaddd(a, b) }
 }
@@ -228,6 +257,7 @@ pub fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 {
 /// using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vphaddsw))]
 pub fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { phaddsw(a, b) }
 }
@@ -235,6 +265,7 @@ pub fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// Horizontally substract adjacent pairs of 16-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vphsubw))]
 pub fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { phsubw(a, b) }
 }
@@ -242,6 +273,7 @@ pub fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// Horizontally substract adjacent pairs of 32-bit integers in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vphsubd))]
 pub fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 {
     unsafe { phsubd(a, b) }
 }
@@ -250,6 +282,7 @@ pub fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 {
 /// using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vphsubsw))]
 pub fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { phsubsw(a, b) }
 }
@@ -294,6 +327,7 @@ pub fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// of intermediate 32-bit integers.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmaddwd))]
 pub fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 {
     unsafe { pmaddwd(a, b) }
 }
@@ -304,6 +338,7 @@ pub fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 {
 /// signed 16-bit integers
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmaddubsw))]
 pub fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 {
     unsafe { pmaddubsw(a, b) }
 }
@@ -321,6 +356,7 @@ pub fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 {
 /// maximum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmaxsw))]
 pub fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { pmaxsw(a, b) }
 }
@@ -329,6 +365,7 @@ pub fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// maximum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmaxsd))]
 pub fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 {
     unsafe { pmaxsd(a, b) }
 }
@@ -337,6 +374,7 @@ pub fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 {
 /// maximum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmaxsb))]
 pub fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 {
     unsafe { pmaxsb(a, b) }
 }
@@ -345,6 +383,7 @@ pub fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// the packed maximum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmaxuw))]
 pub fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 {
     unsafe { pmaxuw(a, b) }
 }
@@ -353,6 +392,7 @@ pub fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 {
 /// the packed maximum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmaxud))]
 pub fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 {
     unsafe { pmaxud(a, b) }
 }
@@ -361,6 +401,7 @@ pub fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 {
 /// the packed maximum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmaxub))]
 pub fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 {
     unsafe { pmaxub(a, b) }
 }
@@ -369,6 +410,7 @@ pub fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 {
 /// minimum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpminsw))]
 pub fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { pminsw(a, b) }
 }
@@ -377,6 +419,7 @@ pub fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// minimum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpminsd))]
 pub fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 {
     unsafe { pminsd(a, b) }
 }
@@ -385,6 +428,7 @@ pub fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 {
 /// minimum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpminsb))]
 pub fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 {
     unsafe { pminsb(a, b) }
 }
@@ -393,6 +437,7 @@ pub fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// the packed minimum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpminuw))]
 pub fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 {
     unsafe { pminuw(a, b) }
 }
@@ -401,6 +446,7 @@ pub fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 {
 /// the packed minimum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpminud))]
 pub fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 {
     unsafe { pminud(a, b) }
 }
@@ -409,6 +455,7 @@ pub fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 {
 /// the packed minimum values.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpminub))]
 pub fn _mm256_min_epu8(a: u8x32, b: u8x32) -> u8x32 {
     unsafe { pminub(a, b) }
 }
@@ -444,6 +491,7 @@ pub fn _mm256_mpsadbw_epu8(a: u8x32, b: u8x32, imm8: i32) -> u16x16 {
 /// Return the 64-bit results.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmuldq))]
 pub fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 {
     unsafe { pmuldq(a, b) }
 }
@@ -454,6 +502,7 @@ pub fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 {
 /// Return the unsigned 64-bit results.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmuludq))]
 pub fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 {
     unsafe { pmuludq(a, b) }
 }
@@ -463,6 +512,7 @@ pub fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 {
 /// intermediate integers.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmulhw))]
 pub fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { pmulhw(a, b) }
 }
@@ -472,6 +522,7 @@ pub fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// intermediate integers.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmulhuw))]
 pub fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 {
     unsafe { pmulhuw(a, b) }
 }
@@ -481,6 +532,7 @@ pub fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 {
 /// intermediate integers
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmullw))]
 pub fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 {
     a * b
 }
@@ -491,6 +543,7 @@ pub fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 {
 /// intermediate integers
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmulld))]
 pub fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 {
     a * b
 }
@@ -501,6 +554,7 @@ pub fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 {
 /// return bits [16:1]
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpmulhrsw))]
 pub fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 {
     unsafe { pmulhrsw(a, b) }
 }
@@ -509,6 +563,7 @@ pub fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 {
 /// and `b`
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vorps))]
 pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
     a | b
 }
@@ -517,6 +572,7 @@ pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
 /// using signed saturation
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpacksswb))]
 pub fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 {
     unsafe { packsswb(a, b) }
 }
@@ -525,6 +581,7 @@ pub fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 {
 /// using signed saturation
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpackssdw))]
 pub fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 {
     unsafe { packssdw(a, b) }
 }
@@ -533,6 +590,7 @@ pub fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 {
 /// using unsigned saturation
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpackuswb))]
 pub fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 {
     unsafe { packuswb(a, b) }
 }
@@ -541,6 +599,7 @@ pub fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 {
 /// using unsigned saturation
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpackusdw))]
 pub fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 {
     unsafe { packusdw(a, b) }
 }
@@ -557,6 +616,7 @@ pub fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 {
 /// integers in the low 16 bits of the 64-bit return value
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsadbw))]
 pub fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 {
     unsafe { psadbw(a, b) }
 }
@@ -571,6 +631,7 @@ pub fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 {
 /// Results are zeroed out when the corresponding element in `b` is zero.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsignw))]
 pub fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { psignw(a, b) }
 }
@@ -580,6 +641,7 @@ pub fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// Results are zeroed out when the corresponding element in `b` is zero.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsignd))]
 pub fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 {
     unsafe { psignd(a, b) }
 }
@@ -589,6 +651,7 @@ pub fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 {
 /// Results are zeroed out when the corresponding element in `b` is zero.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsignb))]
 pub fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 {
     unsafe { psignb(a, b) }
 }
@@ -597,6 +660,7 @@ pub fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// shifting in zeros, and return the result
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsllw))]
 pub fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 {
     unsafe { psllw(a, count) }
 }
@@ -605,6 +669,7 @@ pub fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 {
 /// shifting in zeros, and return the result
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpslld))]
 pub fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 {
     unsafe { pslld(a, count) }
 }
@@ -613,6 +678,7 @@ pub fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 {
 /// shifting in zeros, and return the result
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsllq))]
 pub fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 {
     unsafe { psllq(a, count) }
 }
@@ -621,6 +687,7 @@ pub fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 {
 /// shifting in zeros, return the results;
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsllw))] // TODO: should this be pslli
 pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 {
     unsafe { pslliw(a, imm8) }
 }
@@ -629,6 +696,7 @@ pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 {
 /// shifting in zeros, return the results;
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpslld))] // TODO: should this be pslli
 pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 {
     unsafe { psllid(a, imm8) }
 }
@@ -637,6 +705,7 @@ pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 {
 /// shifting in zeros, return the results;
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsllq))] // TODO: should this be pslli
 pub fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
     unsafe { pslliq(a, imm8) }
 }
@@ -648,6 +717,7 @@ pub fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
 /// shifting in zeros, and return the result.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsllvd))]
 pub fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 {
     unsafe { psllvd(a, count) }
 }
@@ -657,6 +727,7 @@ pub fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 {
 /// shifting in zeros, and return the result.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsllvd))]
 pub fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 {
     unsafe { psllvd256(a, count) }
 }
@@ -666,6 +737,7 @@ pub fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 {
 /// shifting in zeros, and return the result.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsllvq))]
 pub fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 {
     unsafe { psllvq(a, count) }
 }
@@ -675,6 +747,7 @@ pub fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 {
 /// shifting in zeros, and return the result.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsllvq))]
 pub fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 {
     unsafe { psllvq256(a, count) }
 }
@@ -683,6 +756,7 @@ pub fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 {
 /// shifting in sign bits.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsraw))]
 pub fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 {
     unsafe { psraw(a, count) }
 }
@@ -691,6 +765,7 @@ pub fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 {
 /// shifting in sign bits.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrad))]
 pub fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 {
     unsafe { psrad(a, count) }
 }
@@ -699,6 +774,7 @@ pub fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 {
 /// shifting in sign bits.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsraw))] // TODO: notvpsraiw?
 pub fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 {
     unsafe { psraiw(a, imm8) }
 }
@@ -707,6 +783,7 @@ pub fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 {
 /// shifting in sign bits.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrad))] // TODO: not vpsraid?
 pub fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 {
     unsafe { psraid(a, imm8) }
 }
@@ -715,6 +792,7 @@ pub fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 {
 /// corresponding element in `count` while shifting in sign bits.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsravd))]
 pub fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 {
     unsafe { psravd(a, count) }
 }
@@ -723,6 +801,7 @@ pub fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 {
 /// corresponding element in `count` while shifting in sign bits.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsravd))]
 pub fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
     unsafe { psravd256(a, count) }
 }
@@ -732,6 +811,7 @@ pub fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
 /// zeros.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrlw))]
 pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 {
     unsafe { psrlw(a, count) }
 }
@@ -740,6 +820,7 @@ pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 {
 /// zeros.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrld))]
 pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 {
     unsafe { psrld(a, count) }
 }
@@ -748,6 +829,7 @@ pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 {
 /// zeros.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrlq))]
 pub fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 {
     unsafe { psrlq(a, count) }
 }
@@ -756,6 +838,7 @@ pub fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 {
 /// zeros
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrlw))] // TODO not vpsrliw?
 pub fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 {
     unsafe { psrliw(a, imm8) }
 }
@@ -764,6 +847,7 @@ pub fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 {
 /// zeros
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrld))] // TODO: not vpsrlid?
 pub fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 {
     unsafe { psrlid(a, imm8) }
 }
@@ -772,6 +856,7 @@ pub fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 {
 /// zeros
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrlq))] // TODO: not vpsrliq?
 pub fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 {
     unsafe { psrliq(a, imm8) }
 }
@@ -780,6 +865,7 @@ pub fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 {
 /// the corresponding element in `count` while shifting in zeros,
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrlvd))]
 pub fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 {
     unsafe { psrlvd(a, count) }
 }
@@ -788,6 +874,7 @@ pub fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 {
 /// the corresponding element in `count` while shifting in zeros,
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrlvd))]
 pub fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 {
     unsafe { psrlvd256(a, count) }
 }
@@ -796,6 +883,7 @@ pub fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 {
 /// the corresponding element in `count` while shifting in zeros,
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrlvq))]
 pub fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 {
     unsafe { psrlvq(a, count) }
 }
@@ -804,6 +892,7 @@ pub fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 {
 /// the corresponding element in `count` while shifting in zeros,
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsrlvq))]
 pub fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 {
     unsafe { psrlvq256(a, count) }
 }
@@ -813,6 +902,7 @@ pub fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 {
 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsubw))]
 pub fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a - b
 }
@@ -820,6 +910,7 @@ pub fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a`
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsubd))]
 pub fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a - b
 }
@@ -827,6 +918,7 @@ pub fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 {
 /// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a`
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsubq))]
 pub fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 {
     a - b
 }
@@ -834,6 +926,7 @@ pub fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 {
 /// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a`
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsubb))]
 pub fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 {
     a - b
 }
@@ -842,6 +935,7 @@ pub fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// `a` using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsubsw))]
 pub fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 {
     unsafe { psubsw(a, b) }
 }
@@ -850,6 +944,7 @@ pub fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 {
 /// `a` using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsubsb))]
 pub fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 {
     unsafe { psubsb(a, b) }
 }
@@ -858,6 +953,7 @@ pub fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// integers in `a` using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsubusw))]
 pub fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 {
     unsafe { psubusw(a, b) }
 }
@@ -866,6 +962,7 @@ pub fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 {
 /// integers in `a` using saturation.
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vpsubusb))]
 pub fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
     unsafe { psubusb(a, b) }
 }
@@ -883,6 +980,7 @@ pub fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
 /// in `a` and `b`
 #[inline(always)]
 #[target_feature = "+avx2"]
+#[cfg_attr(test, assert_instr(vxorps))]
 pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
     a ^ b
 }
diff --git a/library/stdarch/src/x86/sse.rs b/library/stdarch/src/x86/sse.rs
index f6deae35ec0c..ef3a8ab58f4b 100644
--- a/library/stdarch/src/x86/sse.rs
+++ b/library/stdarch/src/x86/sse.rs
@@ -127,7 +127,7 @@ pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 {
 }
 
 /// Compare the first single-precision (32-bit) floating-point element of `a`
-/// and `b`, and return the minimum value in the first element of the return 
+/// and `b`, and return the minimum value in the first element of the return
 /// value, the other elements are copied from `a`.
 #[inline(always)]
 #[target_feature = "+sse"]
@@ -146,7 +146,7 @@ pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare the first single-precision (32-bit) floating-point element of `a`
-/// and `b`, and return the maximum value in the first element of the return 
+/// and `b`, and return the maximum value in the first element of the return
 /// value, the other elements are copied from `a`.
 #[inline(always)]
 #[target_feature = "+sse"]
@@ -168,6 +168,7 @@ pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 {
 /// from the high half of `a` and `b`;
 #[inline(always)]
 #[target_feature = "+sse"]
+#[cfg_attr(test, assert_instr(unpckhps))]
 pub fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 {
     unsafe { simd_shuffle4(a, b, [2, 6, 3, 7]) }
 }
diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs
index a91695d9bc43..447bdd19d127 100644
--- a/library/stdarch/src/x86/sse41.rs
+++ b/library/stdarch/src/x86/sse41.rs
@@ -1,8 +1,12 @@
 use v128::*;
 use x86::__m128i;
 
+#[cfg(test)]
+use assert_instr::assert_instr;
+
 #[inline(always)]
 #[target_feature = "+sse4.1"]
+#[cfg_attr(test, assert_instr(pblendvb))]
 pub fn _mm_blendv_epi8(
     a: __m128i,
     b: __m128i,
diff --git a/library/stdarch/src/x86/ssse3.rs b/library/stdarch/src/x86/ssse3.rs
index 5729dbca1997..9adde849cd94 100644
--- a/library/stdarch/src/x86/ssse3.rs
+++ b/library/stdarch/src/x86/ssse3.rs
@@ -1,15 +1,17 @@
 use v128::*;
 
+#[cfg(test)]
+use assert_instr::assert_instr;
+
 /// Compute the absolute value of packed 8-bit signed integers in `a` and
 /// return the unsigned results.
 #[inline(always)]
 #[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(pabsb))]
 pub fn _mm_abs_epi8(a: i8x16) -> u8x16 {
     unsafe { pabsb128(a) }
 }
 
-
-
 /// Shuffle bytes from `a` according to the content of `b`.
 ///
 /// The last 4 bits of each byte of `b` are used as addresses
@@ -36,6 +38,7 @@ pub fn _mm_abs_epi8(a: i8x16) -> u8x16 {
 /// ```
 #[inline(always)]
 #[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(pshufb))]
 pub fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
     unsafe { pshufb128(a, b) }
 }