Fix the implementation of _mm256_alignr_epi8 (#330)
This seems likely to have mostly just been a copy/paste error, so this re-reviews the intrinsics and aligns it with the implementation in clang. Closes #328
This commit is contained in:
parent
746ab07521
commit
3579853e20
1 changed files with 114 additions and 46 deletions
|
|
@ -121,7 +121,7 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
|
|||
/// result, shift the result right by `n` bytes, and return the low 16 bytes.
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpalignr, n = 15))]
|
||||
#[cfg_attr(test, assert_instr(vpalignr, n = 7))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i {
|
||||
let n = n as u32;
|
||||
|
|
@ -141,46 +141,104 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i {
|
|||
let a = a.as_i8x32();
|
||||
let b = b.as_i8x32();
|
||||
|
||||
macro_rules! shuffle {
|
||||
($shift:expr) => {
|
||||
let r: i8x32 = match n {
|
||||
0 => {
|
||||
simd_shuffle32(b, a, [
|
||||
0 + $shift, 1 + $shift,
|
||||
2 + $shift, 3 + $shift,
|
||||
4 + $shift, 5 + $shift,
|
||||
6 + $shift, 7 + $shift,
|
||||
8 + $shift, 9 + $shift,
|
||||
10 + $shift, 11 + $shift,
|
||||
12 + $shift, 13 + $shift,
|
||||
14 + $shift, 15 + $shift,
|
||||
16 + $shift, 17 + $shift,
|
||||
18 + $shift, 19 + $shift,
|
||||
20 + $shift, 21 + $shift,
|
||||
22 + $shift, 23 + $shift,
|
||||
24 + $shift, 25 + $shift,
|
||||
26 + $shift, 27 + $shift,
|
||||
28 + $shift, 29 + $shift,
|
||||
30 + $shift, 31 + $shift,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
])
|
||||
}
|
||||
}
|
||||
let r: i8x32 = match n {
|
||||
0 => shuffle!(0),
|
||||
1 => shuffle!(1),
|
||||
2 => shuffle!(2),
|
||||
3 => shuffle!(3),
|
||||
4 => shuffle!(4),
|
||||
5 => shuffle!(5),
|
||||
6 => shuffle!(6),
|
||||
7 => shuffle!(7),
|
||||
8 => shuffle!(8),
|
||||
9 => shuffle!(9),
|
||||
10 => shuffle!(10),
|
||||
11 => shuffle!(11),
|
||||
12 => shuffle!(12),
|
||||
13 => shuffle!(13),
|
||||
14 => shuffle!(14),
|
||||
15 => shuffle!(15),
|
||||
_ => shuffle!(16),
|
||||
1 => {
|
||||
simd_shuffle32(b, a, [
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32,
|
||||
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48,
|
||||
])
|
||||
}
|
||||
2 => {
|
||||
simd_shuffle32(b, a, [
|
||||
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33,
|
||||
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49,
|
||||
])
|
||||
}
|
||||
3 => {
|
||||
simd_shuffle32(b, a, [
|
||||
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34,
|
||||
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
|
||||
])
|
||||
}
|
||||
4 => {
|
||||
simd_shuffle32(b, a, [
|
||||
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35,
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
|
||||
])
|
||||
}
|
||||
5 => {
|
||||
simd_shuffle32(b, a, [
|
||||
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
|
||||
])
|
||||
}
|
||||
6 => {
|
||||
simd_shuffle32(b, a, [
|
||||
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37,
|
||||
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
|
||||
])
|
||||
}
|
||||
7 => {
|
||||
simd_shuffle32(b, a, [
|
||||
7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38,
|
||||
23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
|
||||
])
|
||||
}
|
||||
8 => {
|
||||
simd_shuffle32(b, a, [
|
||||
8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
|
||||
])
|
||||
}
|
||||
9 => {
|
||||
simd_shuffle32(b, a, [
|
||||
9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
|
||||
])
|
||||
}
|
||||
10 => {
|
||||
simd_shuffle32(b, a, [
|
||||
10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
|
||||
26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
|
||||
])
|
||||
}
|
||||
11 => {
|
||||
simd_shuffle32(b, a, [
|
||||
11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
|
||||
27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
|
||||
])
|
||||
}
|
||||
12 => {
|
||||
simd_shuffle32(b, a, [
|
||||
12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
|
||||
28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
||||
])
|
||||
}
|
||||
13 => {
|
||||
simd_shuffle32(b, a, [
|
||||
13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
|
||||
29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
])
|
||||
}
|
||||
14 => {
|
||||
simd_shuffle32(b, a, [
|
||||
14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
|
||||
30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
|
||||
])
|
||||
}
|
||||
15 => {
|
||||
simd_shuffle32(b, a, [
|
||||
15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
|
||||
31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
|
||||
])
|
||||
}
|
||||
_ => b,
|
||||
};
|
||||
mem::transmute(r)
|
||||
}
|
||||
|
|
@ -4747,18 +4805,28 @@ mod tests {
|
|||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||
let expected = _mm256_setr_epi8(
|
||||
2, 3, 4, 5, 6, 7, 8, 9,
|
||||
10, 11, 12, 13, 14, 15, 16, 17,
|
||||
10, 11, 12, 13, 14, 15, 16, 0,
|
||||
18, 19, 20, 21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30, 31, 32, 0,
|
||||
);
|
||||
assert_eq_m256i(r, expected);
|
||||
|
||||
let r = _mm256_alignr_epi8(a, b, 4);
|
||||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||
let expected = _mm256_setr_epi8(
|
||||
-17, -18, -19, -20, -21, -22, -23, -24,
|
||||
-25, -26, -27, -28, -29, -30, -31, -32,
|
||||
1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16,
|
||||
-5, -6, -7, -8, -9, -10, -11, -12,
|
||||
-13, -14, -15, -16, 1, 2, 3, 4,
|
||||
-21, -22, -23, -24, -25, -26, -27, -28,
|
||||
-29, -30, -31, -32, 17, 18, 19, 20,
|
||||
);
|
||||
assert_eq_m256i(r, expected);
|
||||
|
||||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||
let expected = _mm256_setr_epi8(
|
||||
-1, -2, -3, -4, -5, -6, -7, -8,
|
||||
-9, -10, -11, -12, -13, -14, -15, -16, -17,
|
||||
-18, -19, -20, -21, -22, -23, -24, -25,
|
||||
-26, -27, -28, -29, -30, -31, -32,
|
||||
);
|
||||
let r = _mm256_alignr_epi8(a, b, 16);
|
||||
assert_eq_m256i(r, expected);
|
||||
|
|
@ -4766,10 +4834,10 @@ mod tests {
|
|||
let r = _mm256_alignr_epi8(a, b, 15);
|
||||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||
let expected = _mm256_setr_epi8(
|
||||
-16, -17, -18, -19, -20, -21, -22, -23,
|
||||
-24, -25, -26, -27, -28, -29, -30, -31,
|
||||
-32, 1, 2, 3, 4, 5, 6, 7,
|
||||
-16, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
-32, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
);
|
||||
assert_eq_m256i(r, expected);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue