Changed implementation of _mm{256,512}_alignr_epi8 to match that of _mm_alignr_epi8 in ssse3.rs, also removed the import of the unreachable unchecked hint as it was no longer necessary

This commit is contained in:
satiscugcat 2025-06-13 11:12:08 +05:30 committed by Amanieu d'Antras
parent 48dafaa1c7
commit 64dd3e4489
2 changed files with 149 additions and 350 deletions

View file

@ -18,7 +18,7 @@
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
use core::hint::unreachable_unchecked;
use crate::core_arch::{simd::*, x86::*};
use crate::intrinsics::simd::*;
@ -170,158 +170,74 @@ pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
// If palignr is shifting the pair of vectors more than the size of two
// lanes, emit zero.
if IMM8 >= 32 {
return _mm256_setzero_si256();
}
// If palignr is shifting the pair of input vectors more than one lane,
// but less than two lanes, convert to shifting in zeroes.
let (a, b) = if IMM8 > 16 {
(_mm256_setzero_si256(), a)
} else {
(a, b)
};
unsafe {
// If palignr is shifting the pair of vectors more than the size of two
// lanes, emit zero.
if IMM8 >= 32 {
return _mm256_setzero_si256();
}
// If palignr is shifting the pair of input vectors more than one lane,
// but less than two lanes, convert to shifting in zeroes.
let (a, b) = if IMM8 > 16 {
(_mm256_setzero_si256(), a)
} else {
(a, b)
};
let a = a.as_i8x32();
let b = b.as_i8x32();
if IMM8 == 16 {
return transmute(a);
}
let r: i8x32 = match IMM8 % 16 {
0 => simd_shuffle!(
b,
a,
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
],
),
1 => simd_shuffle!(
b,
a,
[
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 48,
],
),
2 => simd_shuffle!(
b,
a,
[
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 48, 49,
],
),
3 => simd_shuffle!(
b,
a,
[
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
],
),
4 => simd_shuffle!(
b,
a,
[
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
],
),
5 => simd_shuffle!(
b,
a,
[
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
],
),
6 => simd_shuffle!(
b,
a,
[
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
],
),
7 => simd_shuffle!(
b,
a,
[
7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26,
27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
],
),
8 => simd_shuffle!(
b,
a,
[
8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27,
28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
],
),
9 => simd_shuffle!(
b,
a,
[
9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28,
29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
],
),
10 => simd_shuffle!(
b,
a,
[
10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29,
30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
],
),
11 => simd_shuffle!(
b,
a,
[
11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30,
31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
],
),
12 => simd_shuffle!(
b,
a,
[
12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
],
),
13 => simd_shuffle!(
b,
a,
[
13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
],
),
14 => simd_shuffle!(
b,
a,
[
14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49,
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
],
),
15 => simd_shuffle!(
b,
a,
[
15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
],
),
_ => unreachable_unchecked(),
};
if IMM8 == 16 {
return transmute(a)
}
}
const fn mask(shift: u32, i: u32) -> u32 {
let shift = shift % 16;
let mod_i = i%16;
if mod_i < (16 - shift) {
i + shift
} else {
i + 16 + shift
}
}
unsafe {
let r: i8x32 = simd_shuffle!(
b.as_i8x32(),
a.as_i8x32(),
[
mask(IMM8 as u32, 0),
mask(IMM8 as u32, 1),
mask(IMM8 as u32, 2),
mask(IMM8 as u32, 3),
mask(IMM8 as u32, 4),
mask(IMM8 as u32, 5),
mask(IMM8 as u32, 6),
mask(IMM8 as u32, 7),
mask(IMM8 as u32, 8),
mask(IMM8 as u32, 9),
mask(IMM8 as u32, 10),
mask(IMM8 as u32, 11),
mask(IMM8 as u32, 12),
mask(IMM8 as u32, 13),
mask(IMM8 as u32, 14),
mask(IMM8 as u32, 15),
mask(IMM8 as u32, 16),
mask(IMM8 as u32, 17),
mask(IMM8 as u32, 18),
mask(IMM8 as u32, 19),
mask(IMM8 as u32, 20),
mask(IMM8 as u32, 21),
mask(IMM8 as u32, 22),
mask(IMM8 as u32, 23),
mask(IMM8 as u32, 24),
mask(IMM8 as u32, 25),
mask(IMM8 as u32, 26),
mask(IMM8 as u32, 27),
mask(IMM8 as u32, 28),
mask(IMM8 as u32, 29),
mask(IMM8 as u32, 30),
mask(IMM8 as u32, 31),
],
);
transmute(r)
}
}

View file

@ -4,7 +4,7 @@ use crate::{
ptr,
};
use core::hint::unreachable_unchecked;
#[cfg(test)]
use stdarch_test::assert_instr;
@ -11316,206 +11316,89 @@ pub fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
} else {
(a, b)
};
let a = a.as_i8x64();
let b = b.as_i8x64();
if IMM8 == 16 {
return transmute(a);
}
let r: i8x64 = match IMM8 % 16 {
0 => {
simd_shuffle!(
b,
a,
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63,
],
)
}
1 => {
simd_shuffle!(
b,
a,
[
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 112,
],
)
}
2 => {
simd_shuffle!(
b,
a,
[
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 112, 113,
],
)
}
3 => {
simd_shuffle!(
b,
a,
[
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 112, 113, 114,
],
)
}
4 => {
simd_shuffle!(
b,
a,
[
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 112, 113, 114, 115,
],
)
}
5 => {
simd_shuffle!(
b,
a,
[
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 112, 113, 114, 115, 116,
],
)
}
6 => {
simd_shuffle!(
b,
a,
[
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 96, 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62,
63, 112, 113, 114, 115, 116, 117,
],
)
}
7 => {
simd_shuffle!(
b,
a,
[
7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25,
26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44,
45, 46, 47, 96, 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62,
63, 112, 113, 114, 115, 116, 117, 118,
],
)
}
8 => {
simd_shuffle!(
b,
a,
[
8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26,
27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45,
46, 47, 96, 97, 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63,
112, 113, 114, 115, 116, 117, 118, 119,
],
)
}
9 => {
simd_shuffle!(
b,
a,
[
9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27,
28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46,
47, 96, 97, 98, 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63,
112, 113, 114, 115, 116, 117, 118, 119, 120,
],
)
}
10 => {
simd_shuffle!(
b,
a,
[
10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28,
29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47,
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112,
113, 114, 115, 116, 117, 118, 119, 120, 121,
],
)
}
11 => {
simd_shuffle!(
b,
a,
[
11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29,
30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96,
97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112,
113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
],
)
}
12 => {
simd_shuffle!(
b,
a,
[
12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30,
31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97,
98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113,
114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
],
)
}
13 => {
simd_shuffle!(
b,
a,
[
13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98,
99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114,
115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
],
)
}
14 => {
simd_shuffle!(
b,
a,
[
14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99,
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114,
115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
],
)
}
15 => {
simd_shuffle!(
b,
a,
[
15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99,
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114,
115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
],
)
}
_ => unreachable_unchecked(),
};
const fn mask(shift: u32, i: u32) -> u32 {
let shift = shift % 16;
let mod_i = i%16;
if mod_i < (16 - shift) {
i + shift
} else {
i + 48 + shift
}
}
let r: i8x64 = simd_shuffle!(
b.as_i8x64(),
a.as_i8x64(),
[
mask(IMM8 as u32, 0),
mask(IMM8 as u32, 1),
mask(IMM8 as u32, 2),
mask(IMM8 as u32, 3),
mask(IMM8 as u32, 4),
mask(IMM8 as u32, 5),
mask(IMM8 as u32, 6),
mask(IMM8 as u32, 7),
mask(IMM8 as u32, 8),
mask(IMM8 as u32, 9),
mask(IMM8 as u32, 10),
mask(IMM8 as u32, 11),
mask(IMM8 as u32, 12),
mask(IMM8 as u32, 13),
mask(IMM8 as u32, 14),
mask(IMM8 as u32, 15),
mask(IMM8 as u32, 16),
mask(IMM8 as u32, 17),
mask(IMM8 as u32, 18),
mask(IMM8 as u32, 19),
mask(IMM8 as u32, 20),
mask(IMM8 as u32, 21),
mask(IMM8 as u32, 22),
mask(IMM8 as u32, 23),
mask(IMM8 as u32, 24),
mask(IMM8 as u32, 25),
mask(IMM8 as u32, 26),
mask(IMM8 as u32, 27),
mask(IMM8 as u32, 28),
mask(IMM8 as u32, 29),
mask(IMM8 as u32, 30),
mask(IMM8 as u32, 31),
mask(IMM8 as u32, 32),
mask(IMM8 as u32, 33),
mask(IMM8 as u32, 34),
mask(IMM8 as u32, 35),
mask(IMM8 as u32, 36),
mask(IMM8 as u32, 37),
mask(IMM8 as u32, 38),
mask(IMM8 as u32, 39),
mask(IMM8 as u32, 40),
mask(IMM8 as u32, 41),
mask(IMM8 as u32, 42),
mask(IMM8 as u32, 43),
mask(IMM8 as u32, 44),
mask(IMM8 as u32, 45),
mask(IMM8 as u32, 46),
mask(IMM8 as u32, 47),
mask(IMM8 as u32, 48),
mask(IMM8 as u32, 49),
mask(IMM8 as u32, 50),
mask(IMM8 as u32, 51),
mask(IMM8 as u32, 52),
mask(IMM8 as u32, 53),
mask(IMM8 as u32, 54),
mask(IMM8 as u32, 55),
mask(IMM8 as u32, 56),
mask(IMM8 as u32, 57),
mask(IMM8 as u32, 58),
mask(IMM8 as u32, 59),
mask(IMM8 as u32, 60),
mask(IMM8 as u32, 61),
mask(IMM8 as u32, 62),
mask(IMM8 as u32, 63),
],
);
transmute(r)
}
}