Add _mm256_shuffle_epi8 and _mm256_permutevar8x32_epi32 (#133)
* Add _mm256_shuffle_epi8 * Add _mm256_permutevar8x32_epi32
This commit is contained in:
parent
d5fd2b09a7
commit
5fb563aabc
1 changed files with 84 additions and 3 deletions
|
|
@ -897,10 +897,20 @@ pub unsafe fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 {
|
|||
packusdw(a, b)
|
||||
}
|
||||
|
||||
/// Permutes packed 32-bit integers from `a` according to the content of `b`.
|
||||
///
|
||||
/// The last 3 bits of each integer of `b` are used as addresses into the 8
|
||||
/// integers of `a`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+avx2"]
|
||||
#[cfg_attr(test, assert_instr(vpermd))]
|
||||
pub unsafe fn _mm256_permutevar8x32_epi32(a: u32x8, b: u32x8) -> u32x8 {
|
||||
permd(a, b)
|
||||
}
|
||||
|
||||
// TODO _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
|
||||
// TODO _mm256_permute4x64_epi64 (__m256i a, const int imm8)
|
||||
// TODO _mm256_permute4x64_pd (__m256d a, const int imm8)
|
||||
// TODO _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx)
|
||||
// TODO _mm256_permutevar8x32_ps (__m256 a, __m256i idx)
|
||||
|
||||
/// Compute the absolute differences of packed unsigned 8-bit integers in `a`
|
||||
|
|
@ -914,8 +924,43 @@ pub unsafe fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 {
|
|||
psadbw(a, b)
|
||||
}
|
||||
|
||||
/// Shuffle bytes from `a` according to the content of `b`.
|
||||
///
|
||||
/// The last 4 bits of each byte of `b` are used as addresses into the 32 bytes
|
||||
/// of `a`.
|
||||
///
|
||||
/// In addition, if the highest significant bit of a byte of `b` is set, the
|
||||
/// respective destination byte is set to 0.
|
||||
///
|
||||
/// The low and high halves of the vectors are shuffled separately.
|
||||
///
|
||||
/// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
|
||||
/// equivalent to:
|
||||
///
|
||||
/// ```
|
||||
/// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
|
||||
/// let mut r = [0; 32];
|
||||
/// for i in 0..16 {
|
||||
/// // if the most significant bit of b is set,
|
||||
/// // then the destination byte is set to 0.
|
||||
/// if b[i] & 0x80 == 0u8 {
|
||||
/// r[i] = a[(b[i] % 16) as usize];
|
||||
/// }
|
||||
/// if b[i + 16] & 0x80 == 0u8 {
|
||||
/// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize];
|
||||
/// }
|
||||
/// }
|
||||
/// r
|
||||
/// }
|
||||
/// ```
|
||||
#[inline(always)]
|
||||
#[target_feature = "+avx2"]
|
||||
#[cfg_attr(test, assert_instr(vpshufb))]
|
||||
pub unsafe fn _mm256_shuffle_epi8(a: u8x32, b: u8x32) -> u8x32 {
|
||||
pshufb(a, b)
|
||||
}
|
||||
|
||||
// TODO _mm256_shuffle_epi32 (__m256i a, const int imm8)
|
||||
// TODO _mm256_shuffle_epi8 (__m256i a, __m256i b)
|
||||
// TODO _mm256_shufflehi_epi16 (__m256i a, const int imm8)
|
||||
// TODO _mm256_shufflelo_epi16 (__m256i a, const int imm8)
|
||||
|
||||
|
|
@ -1430,7 +1475,10 @@ extern "C" {
|
|||
fn psubusb(a: u8x32, b: u8x32) -> u8x32;
|
||||
#[link_name = "llvm.x86.avx2.psubus.w"]
|
||||
fn psubusw(a: u16x16, b: u16x16) -> u16x16;
|
||||
|
||||
#[link_name = "llvm.x86.avx2.pshuf.b"]
|
||||
fn pshufb(a: u8x32, b: u8x32) -> u8x32;
|
||||
#[link_name = "llvm.x86.avx2.permd"]
|
||||
fn permd(a: u32x8, b: u32x8) -> u32x8;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -2566,4 +2614,37 @@ mod tests {
|
|||
let r = avx2::_mm256_alignr_epi8(a, b, 0);
|
||||
assert_eq!(r, b);
|
||||
}
|
||||
|
||||
#[simd_test = "avx2"]
|
||||
unsafe fn _mm256_shuffle_epi8() {
|
||||
let a = u8x32::new(
|
||||
1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16,
|
||||
17, 18, 19, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31, 32
|
||||
);
|
||||
let b = u8x32::new(
|
||||
4, 128, 4, 3, 24, 12, 6, 19,
|
||||
12, 5, 5, 10, 4, 1, 8, 0,
|
||||
4, 128, 4, 3, 24, 12, 6, 19,
|
||||
12, 5, 5, 10, 4, 1, 8, 0,
|
||||
);
|
||||
let expected = u8x32::new(
|
||||
5, 0, 5, 4, 9, 13, 7, 4,
|
||||
13, 6, 6, 11, 5, 2, 9, 1,
|
||||
21, 0, 21, 20, 25, 29, 23, 20,
|
||||
29, 22, 22, 27, 21, 18, 25, 17,
|
||||
);
|
||||
let r = avx2::_mm256_shuffle_epi8(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "avx2"]
|
||||
unsafe fn _mm256_permutevar8x32_epi32() {
|
||||
let a = u32x8::new(100, 200, 300, 400, 500, 600, 700, 800);
|
||||
let b = u32x8::new(5, 0, 5, 1, 7, 6, 3, 4);
|
||||
let expected = u32x8::new(600, 100, 600, 200, 800, 700, 400, 500);
|
||||
let r = avx2::_mm256_permutevar8x32_epi32(a, b);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue