Add AVX2 instruction assertiosn

Also a few other assorted modules
This commit is contained in:
Alex Crichton 2017-09-26 11:12:16 -07:00
parent 299b2f3c29
commit 1fa49dfe5d
4 changed files with 110 additions and 4 deletions

View file

@ -2,9 +2,13 @@ use v256::*;
use v128::*;
use x86::__m256i;
#[cfg(test)]
use assert_instr::assert_instr;
/// Computes the absolute values of packed 32-bit integers in `a`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpabsd))]
pub fn _mm256_abs_epi32(a: i32x8) -> i32x8 {
unsafe { pabsd(a) }
}
@ -12,6 +16,7 @@ pub fn _mm256_abs_epi32(a: i32x8) -> i32x8 {
/// Computes the absolute values of packed 16-bit integers in `a`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpabsw))]
pub fn _mm256_abs_epi16(a: i16x16) -> i16x16 {
unsafe { pabsw(a) }
}
@ -19,6 +24,7 @@ pub fn _mm256_abs_epi16(a: i16x16) -> i16x16 {
/// Computes the absolute values of packed 8-bit integers in `a`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpabsb))]
pub fn _mm256_abs_epi8(a: i8x32) -> i8x32 {
unsafe { pabsb(a) }
}
@ -26,6 +32,7 @@ pub fn _mm256_abs_epi8(a: i8x32) -> i8x32 {
/// Add packed 64-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpaddq))]
pub fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 {
a + b
}
@ -33,6 +40,7 @@ pub fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 {
/// Add packed 32-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpaddd))]
pub fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 {
a + b
}
@ -40,6 +48,7 @@ pub fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 {
/// Add packed 16-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpaddw))]
pub fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 {
a + b
}
@ -47,6 +56,7 @@ pub fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// Add packed 8-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpaddb))]
pub fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 {
a + b
}
@ -54,6 +64,7 @@ pub fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 {
/// Add packed 8-bit integers in `a` and `b` using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpaddsb))]
pub fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 {
unsafe { paddsb(a, b) }
}
@ -61,6 +72,7 @@ pub fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 {
/// Add packed 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpaddsw))]
pub fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { paddsw(a, b) }
}
@ -68,6 +80,7 @@ pub fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpaddusb))]
pub fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 {
unsafe { paddusb(a, b) }
}
@ -75,6 +88,7 @@ pub fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 {
/// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpaddusw))]
pub fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
unsafe { paddusw(a, b) }
}
@ -85,6 +99,7 @@ pub fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
/// in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vandps))]
pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
a & b
}
@ -93,6 +108,7 @@ pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
/// in `a` and then AND with `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vandnps))]
pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
(!a) & b
}
@ -100,6 +116,7 @@ pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
/// Average packed unsigned 16-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpavgw))]
pub fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 {
unsafe { pavgw(a, b) }
}
@ -107,6 +124,7 @@ pub fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 {
/// Average packed unsigned 8-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpavgb))]
pub fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 {
unsafe { pavgb(a, b) }
}
@ -118,6 +136,7 @@ pub fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 {
/// Blend packed 8-bit integers from `a` and `b` using `mask`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpblendvb))]
pub fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 {
unsafe { pblendvb(a,b,mask) }
}
@ -143,6 +162,7 @@ pub fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 {
/// Compare packed 64-bit integers in `a` and `b` for equality.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpcmpeqq))]
pub fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 {
a.eq(b)
}
@ -150,6 +170,7 @@ pub fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 {
/// Compare packed 32-bit integers in `a` and `b` for equality.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpcmpeqd))]
pub fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 {
a.eq(b)
}
@ -157,6 +178,7 @@ pub fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 {
/// Compare packed 16-bit integers in `a` and `b` for equality.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpcmpeqw))]
pub fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 {
a.eq(b)
}
@ -164,6 +186,7 @@ pub fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// Compare packed 8-bit integers in `a` and `b` for equality.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpcmpeqb))]
pub fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 {
a.eq(b)
}
@ -171,6 +194,7 @@ pub fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 {
/// Compare packed 64-bit integers in `a` and `b` for greater-than.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpcmpgtq))]
pub fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 {
a.gt(b)
}
@ -178,6 +202,7 @@ pub fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 {
/// Compare packed 32-bit integers in `a` and `b` for greater-than.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpcmpgtd))]
pub fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 {
a.gt(b)
}
@ -185,6 +210,7 @@ pub fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 {
/// Compare packed 16-bit integers in `a` and `b` for greater-than.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpcmpgtw))]
pub fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 {
a.gt(b)
}
@ -192,6 +218,7 @@ pub fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// Compare packed 8-bit integers in `a` and `b` for greater-than.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpcmpgtb))]
pub fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 {
a.gt(b)
}
@ -213,6 +240,7 @@ pub fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 {
/// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vphaddw))]
pub fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { phaddw(a, b) }
}
@ -220,6 +248,7 @@ pub fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// Horizontally add adjacent pairs of 32-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vphaddd))]
pub fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 {
unsafe { phaddd(a, b) }
}
@ -228,6 +257,7 @@ pub fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 {
/// using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vphaddsw))]
pub fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { phaddsw(a, b) }
}
@ -235,6 +265,7 @@ pub fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// Horizontally substract adjacent pairs of 16-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vphsubw))]
pub fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { phsubw(a, b) }
}
@ -242,6 +273,7 @@ pub fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// Horizontally substract adjacent pairs of 32-bit integers in `a` and `b`.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vphsubd))]
pub fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 {
unsafe { phsubd(a, b) }
}
@ -250,6 +282,7 @@ pub fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 {
/// using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vphsubsw))]
pub fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { phsubsw(a, b) }
}
@ -294,6 +327,7 @@ pub fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// of intermediate 32-bit integers.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmaddwd))]
pub fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 {
unsafe { pmaddwd(a, b) }
}
@ -304,6 +338,7 @@ pub fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 {
/// signed 16-bit integers
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmaddubsw))]
pub fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 {
unsafe { pmaddubsw(a, b) }
}
@ -321,6 +356,7 @@ pub fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 {
/// maximum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmaxsw))]
pub fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { pmaxsw(a, b) }
}
@ -329,6 +365,7 @@ pub fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// maximum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 {
unsafe { pmaxsd(a, b) }
}
@ -337,6 +374,7 @@ pub fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 {
/// maximum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmaxsb))]
pub fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 {
unsafe { pmaxsb(a, b) }
}
@ -345,6 +383,7 @@ pub fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 {
/// the packed maximum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmaxuw))]
pub fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 {
unsafe { pmaxuw(a, b) }
}
@ -353,6 +392,7 @@ pub fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 {
/// the packed maximum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 {
unsafe { pmaxud(a, b) }
}
@ -361,6 +401,7 @@ pub fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 {
/// the packed maximum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmaxub))]
pub fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 {
unsafe { pmaxub(a, b) }
}
@ -369,6 +410,7 @@ pub fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 {
/// minimum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpminsw))]
pub fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { pminsw(a, b) }
}
@ -377,6 +419,7 @@ pub fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// minimum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpminsd))]
pub fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 {
unsafe { pminsd(a, b) }
}
@ -385,6 +428,7 @@ pub fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 {
/// minimum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpminsb))]
pub fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 {
unsafe { pminsb(a, b) }
}
@ -393,6 +437,7 @@ pub fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 {
/// the packed minimum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpminuw))]
pub fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 {
unsafe { pminuw(a, b) }
}
@ -401,6 +446,7 @@ pub fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 {
/// the packed minimum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpminud))]
pub fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 {
unsafe { pminud(a, b) }
}
@ -409,6 +455,7 @@ pub fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 {
/// the packed minimum values.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpminub))]
pub fn _mm256_min_epu8(a: u8x32, b: u8x32) -> u8x32 {
unsafe { pminub(a, b) }
}
@ -444,6 +491,7 @@ pub fn _mm256_mpsadbw_epu8(a: u8x32, b: u8x32, imm8: i32) -> u16x16 {
/// Return the 64-bit results.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 {
unsafe { pmuldq(a, b) }
}
@ -454,6 +502,7 @@ pub fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 {
/// Return the unsigned 64-bit results.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 {
unsafe { pmuludq(a, b) }
}
@ -463,6 +512,7 @@ pub fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 {
/// intermediate integers.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmulhw))]
pub fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { pmulhw(a, b) }
}
@ -472,6 +522,7 @@ pub fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// intermediate integers.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmulhuw))]
pub fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 {
unsafe { pmulhuw(a, b) }
}
@ -481,6 +532,7 @@ pub fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 {
/// intermediate integers
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmullw))]
pub fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 {
a * b
}
@ -491,6 +543,7 @@ pub fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 {
/// intermediate integers
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmulld))]
pub fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 {
a * b
}
@ -501,6 +554,7 @@ pub fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 {
/// return bits [16:1]
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpmulhrsw))]
pub fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 {
unsafe { pmulhrsw(a, b) }
}
@ -509,6 +563,7 @@ pub fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 {
/// and `b`
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vorps))]
pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
a | b
}
@ -517,6 +572,7 @@ pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
/// using signed saturation
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpacksswb))]
pub fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 {
unsafe { packsswb(a, b) }
}
@ -525,6 +581,7 @@ pub fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 {
/// using signed saturation
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpackssdw))]
pub fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 {
unsafe { packssdw(a, b) }
}
@ -533,6 +590,7 @@ pub fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 {
/// using unsigned saturation
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpackuswb))]
pub fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 {
unsafe { packuswb(a, b) }
}
@ -541,6 +599,7 @@ pub fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 {
/// using unsigned saturation
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpackusdw))]
pub fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 {
unsafe { packusdw(a, b) }
}
@ -557,6 +616,7 @@ pub fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 {
/// integers in the low 16 bits of the 64-bit return value
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsadbw))]
pub fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 {
unsafe { psadbw(a, b) }
}
@ -571,6 +631,7 @@ pub fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 {
/// Results are zeroed out when the corresponding element in `b` is zero.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsignw))]
pub fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { psignw(a, b) }
}
@ -580,6 +641,7 @@ pub fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// Results are zeroed out when the corresponding element in `b` is zero.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsignd))]
pub fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 {
unsafe { psignd(a, b) }
}
@ -589,6 +651,7 @@ pub fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 {
/// Results are zeroed out when the corresponding element in `b` is zero.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsignb))]
pub fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 {
unsafe { psignb(a, b) }
}
@ -597,6 +660,7 @@ pub fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 {
/// shifting in zeros, and return the result
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsllw))]
pub fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 {
unsafe { psllw(a, count) }
}
@ -605,6 +669,7 @@ pub fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 {
/// shifting in zeros, and return the result
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpslld))]
pub fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 {
unsafe { pslld(a, count) }
}
@ -613,6 +678,7 @@ pub fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 {
/// shifting in zeros, and return the result
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsllq))]
pub fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 {
unsafe { psllq(a, count) }
}
@ -621,6 +687,7 @@ pub fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 {
/// shifting in zeros, return the results;
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsllw))] // TODO: should this be pslli
pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 {
unsafe { pslliw(a, imm8) }
}
@ -629,6 +696,7 @@ pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 {
/// shifting in zeros, return the results;
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpslld))] // TODO: should this be pslli
pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 {
unsafe { psllid(a, imm8) }
}
@ -637,6 +705,7 @@ pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 {
/// shifting in zeros, return the results;
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsllq))] // TODO: should this be pslli
pub fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
unsafe { pslliq(a, imm8) }
}
@ -648,6 +717,7 @@ pub fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
/// shifting in zeros, and return the result.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 {
unsafe { psllvd(a, count) }
}
@ -657,6 +727,7 @@ pub fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 {
/// shifting in zeros, and return the result.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 {
unsafe { psllvd256(a, count) }
}
@ -666,6 +737,7 @@ pub fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 {
/// shifting in zeros, and return the result.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 {
unsafe { psllvq(a, count) }
}
@ -675,6 +747,7 @@ pub fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 {
/// shifting in zeros, and return the result.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 {
unsafe { psllvq256(a, count) }
}
@ -683,6 +756,7 @@ pub fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 {
/// shifting in sign bits.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsraw))]
pub fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 {
unsafe { psraw(a, count) }
}
@ -691,6 +765,7 @@ pub fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 {
/// shifting in sign bits.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrad))]
pub fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 {
unsafe { psrad(a, count) }
}
@ -699,6 +774,7 @@ pub fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 {
/// shifting in sign bits.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsraw))] // TODO: notvpsraiw?
pub fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 {
unsafe { psraiw(a, imm8) }
}
@ -707,6 +783,7 @@ pub fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 {
/// shifting in sign bits.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrad))] // TODO: not vpsraid?
pub fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 {
unsafe { psraid(a, imm8) }
}
@ -715,6 +792,7 @@ pub fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 {
/// corresponding element in `count` while shifting in sign bits.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsravd))]
pub fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 {
unsafe { psravd(a, count) }
}
@ -723,6 +801,7 @@ pub fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 {
/// corresponding element in `count` while shifting in sign bits.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsravd))]
pub fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
unsafe { psravd256(a, count) }
}
@ -732,6 +811,7 @@ pub fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
/// zeros.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrlw))]
pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 {
unsafe { psrlw(a, count) }
}
@ -740,6 +820,7 @@ pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 {
/// zeros.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrld))]
pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 {
unsafe { psrld(a, count) }
}
@ -748,6 +829,7 @@ pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 {
/// zeros.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 {
unsafe { psrlq(a, count) }
}
@ -756,6 +838,7 @@ pub fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 {
/// zeros
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrlw))] // TODO not vpsrliw?
pub fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 {
unsafe { psrliw(a, imm8) }
}
@ -764,6 +847,7 @@ pub fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 {
/// zeros
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrld))] // TODO: not vpsrlid?
pub fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 {
unsafe { psrlid(a, imm8) }
}
@ -772,6 +856,7 @@ pub fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 {
/// zeros
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrlq))] // TODO: not vpsrliq?
pub fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 {
unsafe { psrliq(a, imm8) }
}
@ -780,6 +865,7 @@ pub fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 {
/// the corresponding element in `count` while shifting in zeros,
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 {
unsafe { psrlvd(a, count) }
}
@ -788,6 +874,7 @@ pub fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 {
/// the corresponding element in `count` while shifting in zeros,
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 {
unsafe { psrlvd256(a, count) }
}
@ -796,6 +883,7 @@ pub fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 {
/// the corresponding element in `count` while shifting in zeros,
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 {
unsafe { psrlvq(a, count) }
}
@ -804,6 +892,7 @@ pub fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 {
/// the corresponding element in `count` while shifting in zeros,
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 {
unsafe { psrlvq256(a, count) }
}
@ -813,6 +902,7 @@ pub fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 {
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsubw))]
pub fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 {
a - b
}
@ -820,6 +910,7 @@ pub fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a`
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsubd))]
pub fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 {
a - b
}
@ -827,6 +918,7 @@ pub fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 {
/// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a`
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsubq))]
pub fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 {
a - b
}
@ -834,6 +926,7 @@ pub fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 {
/// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a`
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsubb))]
pub fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 {
a - b
}
@ -842,6 +935,7 @@ pub fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 {
/// `a` using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsubsw))]
pub fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 {
unsafe { psubsw(a, b) }
}
@ -850,6 +944,7 @@ pub fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 {
/// `a` using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsubsb))]
pub fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 {
unsafe { psubsb(a, b) }
}
@ -858,6 +953,7 @@ pub fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 {
/// integers in `a` using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsubusw))]
pub fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 {
unsafe { psubusw(a, b) }
}
@ -866,6 +962,7 @@ pub fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 {
/// integers in `a` using saturation.
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vpsubusb))]
pub fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
unsafe { psubusb(a, b) }
}
@ -883,6 +980,7 @@ pub fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
/// in `a` and `b`
#[inline(always)]
#[target_feature = "+avx2"]
#[cfg_attr(test, assert_instr(vxorps))]
pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
a ^ b
}

View file

@ -127,7 +127,7 @@ pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 {
}
/// Compare the first single-precision (32-bit) floating-point element of `a`
/// and `b`, and return the minimum value in the first element of the return
/// and `b`, and return the minimum value in the first element of the return
/// value, the other elements are copied from `a`.
#[inline(always)]
#[target_feature = "+sse"]
@ -146,7 +146,7 @@ pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 {
}
/// Compare the first single-precision (32-bit) floating-point element of `a`
/// and `b`, and return the maximum value in the first element of the return
/// and `b`, and return the maximum value in the first element of the return
/// value, the other elements are copied from `a`.
#[inline(always)]
#[target_feature = "+sse"]
@ -168,6 +168,7 @@ pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 {
/// from the high half of `a` and `b`;
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(unpckhps))]
pub fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 {
unsafe { simd_shuffle4(a, b, [2, 6, 3, 7]) }
}

View file

@ -1,8 +1,12 @@
use v128::*;
use x86::__m128i;
#[cfg(test)]
use assert_instr::assert_instr;
#[inline(always)]
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(pblendvb))]
pub fn _mm_blendv_epi8(
a: __m128i,
b: __m128i,

View file

@ -1,15 +1,17 @@
use v128::*;
#[cfg(test)]
use assert_instr::assert_instr;
/// Compute the absolute value of packed 8-bit signed integers in `a` and
/// return the unsigned results.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pabsb))]
pub fn _mm_abs_epi8(a: i8x16) -> u8x16 {
unsafe { pabsb128(a) }
}
/// Shuffle bytes from `a` according to the content of `b`.
///
/// The last 4 bits of each byte of `b` are used as addresses
@ -36,6 +38,7 @@ pub fn _mm_abs_epi8(a: i8x16) -> u8x16 {
/// ```
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pshufb))]
pub fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
unsafe { pshufb128(a, b) }
}