Fix x86 SIMD byte shift intrinsics (#1168)

This commit is contained in:
Amanieu d'Antras 2021-05-20 01:47:38 +01:00 committed by GitHub
parent 15749b0ed3
commit b216e9f9c4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 115 additions and 98 deletions

View file

@ -2585,44 +2585,52 @@ pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_imm8!(IMM8);
const fn mask(shift: i32, i: u32) -> u32 {
let shift = shift as u32 & 0xff;
if shift > 15 || i % 16 < shift {
0
} else {
32 + (i - shift)
}
}
let a = a.as_i8x32();
let zero = _mm256_setzero_si256().as_i8x32();
let r: i8x32 = simd_shuffle32!(
zero,
a,
<const IMM8: i32> [
32 - (IMM8 as u32 & 0xff),
33 - (IMM8 as u32 & 0xff),
34 - (IMM8 as u32 & 0xff),
35 - (IMM8 as u32 & 0xff),
36 - (IMM8 as u32 & 0xff),
37 - (IMM8 as u32 & 0xff),
38 - (IMM8 as u32 & 0xff),
39 - (IMM8 as u32 & 0xff),
40 - (IMM8 as u32 & 0xff),
41 - (IMM8 as u32 & 0xff),
42 - (IMM8 as u32 & 0xff),
43 - (IMM8 as u32 & 0xff),
44 - (IMM8 as u32 & 0xff),
45 - (IMM8 as u32 & 0xff),
46 - (IMM8 as u32 & 0xff),
47 - (IMM8 as u32 & 0xff),
48 - (IMM8 as u32 & 0xff) - 16,
49 - (IMM8 as u32 & 0xff) - 16,
50 - (IMM8 as u32 & 0xff) - 16,
51 - (IMM8 as u32 & 0xff) - 16,
52 - (IMM8 as u32 & 0xff) - 16,
53 - (IMM8 as u32 & 0xff) - 16,
54 - (IMM8 as u32 & 0xff) - 16,
55 - (IMM8 as u32 & 0xff) - 16,
56 - (IMM8 as u32 & 0xff) - 16,
57 - (IMM8 as u32 & 0xff) - 16,
58 - (IMM8 as u32 & 0xff) - 16,
59 - (IMM8 as u32 & 0xff) - 16,
60 - (IMM8 as u32 & 0xff) - 16,
61 - (IMM8 as u32 & 0xff) - 16,
62 - (IMM8 as u32 & 0xff) - 16,
63 - (IMM8 as u32 & 0xff) - 16,
mask(IMM8, 0),
mask(IMM8, 1),
mask(IMM8, 2),
mask(IMM8, 3),
mask(IMM8, 4),
mask(IMM8, 5),
mask(IMM8, 6),
mask(IMM8, 7),
mask(IMM8, 8),
mask(IMM8, 9),
mask(IMM8, 10),
mask(IMM8, 11),
mask(IMM8, 12),
mask(IMM8, 13),
mask(IMM8, 14),
mask(IMM8, 15),
mask(IMM8, 16),
mask(IMM8, 17),
mask(IMM8, 18),
mask(IMM8, 19),
mask(IMM8, 20),
mask(IMM8, 21),
mask(IMM8, 22),
mask(IMM8, 23),
mask(IMM8, 24),
mask(IMM8, 25),
mask(IMM8, 26),
mask(IMM8, 27),
mask(IMM8, 28),
mask(IMM8, 29),
mask(IMM8, 30),
mask(IMM8, 31),
],
);
transmute(r)

View file

@ -8873,76 +8873,84 @@ pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
static_assert_imm8!(IMM8);
const fn mask(shift: i32, i: u32) -> u32 {
let shift = shift as u32 & 0xff;
if shift > 15 || i % 16 < shift {
0
} else {
64 + (i - shift)
}
}
let a = a.as_i8x64();
let zero = _mm512_setzero_si512().as_i8x64();
let r: i8x64 = simd_shuffle64!(
zero,
a,
<const IMM8: i32> [
64 - (IMM8 as u32 & 0xff),
65 - (IMM8 as u32 & 0xff),
66 - (IMM8 as u32 & 0xff),
67 - (IMM8 as u32 & 0xff),
68 - (IMM8 as u32 & 0xff),
69 - (IMM8 as u32 & 0xff),
70 - (IMM8 as u32 & 0xff),
71 - (IMM8 as u32 & 0xff),
72 - (IMM8 as u32 & 0xff),
73 - (IMM8 as u32 & 0xff),
74 - (IMM8 as u32 & 0xff),
75 - (IMM8 as u32 & 0xff),
76 - (IMM8 as u32 & 0xff),
77 - (IMM8 as u32 & 0xff),
78 - (IMM8 as u32 & 0xff),
79 - (IMM8 as u32 & 0xff),
80 - (IMM8 as u32 & 0xff) - 16,
81 - (IMM8 as u32 & 0xff) - 16,
82 - (IMM8 as u32 & 0xff) - 16,
83 - (IMM8 as u32 & 0xff) - 16,
84 - (IMM8 as u32 & 0xff) - 16,
85 - (IMM8 as u32 & 0xff) - 16,
86 - (IMM8 as u32 & 0xff) - 16,
87 - (IMM8 as u32 & 0xff) - 16,
88 - (IMM8 as u32 & 0xff) - 16,
89 - (IMM8 as u32 & 0xff) - 16,
90 - (IMM8 as u32 & 0xff) - 16,
91 - (IMM8 as u32 & 0xff) - 16,
92 - (IMM8 as u32 & 0xff) - 16,
93 - (IMM8 as u32 & 0xff) - 16,
94 - (IMM8 as u32 & 0xff) - 16,
95 - (IMM8 as u32 & 0xff) - 16,
96 - (IMM8 as u32 & 0xff) - 32,
97 - (IMM8 as u32 & 0xff) - 32,
98 - (IMM8 as u32 & 0xff) - 32,
99 - (IMM8 as u32 & 0xff) - 32,
100 - (IMM8 as u32 & 0xff) - 32,
101 - (IMM8 as u32 & 0xff) - 32,
102 - (IMM8 as u32 & 0xff) - 32,
103 - (IMM8 as u32 & 0xff) - 32,
104 - (IMM8 as u32 & 0xff) - 32,
105 - (IMM8 as u32 & 0xff) - 32,
106 - (IMM8 as u32 & 0xff) - 32,
107 - (IMM8 as u32 & 0xff) - 32,
108 - (IMM8 as u32 & 0xff) - 32,
109 - (IMM8 as u32 & 0xff) - 32,
110 - (IMM8 as u32 & 0xff) - 32,
111 - (IMM8 as u32 & 0xff) - 32,
112 - (IMM8 as u32 & 0xff) - 48,
113 - (IMM8 as u32 & 0xff) - 48,
114 - (IMM8 as u32 & 0xff) - 48,
115 - (IMM8 as u32 & 0xff) - 48,
116 - (IMM8 as u32 & 0xff) - 48,
117 - (IMM8 as u32 & 0xff) - 48,
118 - (IMM8 as u32 & 0xff) - 48,
119 - (IMM8 as u32 & 0xff) - 48,
120 - (IMM8 as u32 & 0xff) - 48,
121 - (IMM8 as u32 & 0xff) - 48,
122 - (IMM8 as u32 & 0xff) - 48,
123 - (IMM8 as u32 & 0xff) - 48,
124 - (IMM8 as u32 & 0xff) - 48,
125 - (IMM8 as u32 & 0xff) - 48,
126 - (IMM8 as u32 & 0xff) - 48,
127 - (IMM8 as u32 & 0xff) - 48,
mask(IMM8, 0),
mask(IMM8, 1),
mask(IMM8, 2),
mask(IMM8, 3),
mask(IMM8, 4),
mask(IMM8, 5),
mask(IMM8, 6),
mask(IMM8, 7),
mask(IMM8, 8),
mask(IMM8, 9),
mask(IMM8, 10),
mask(IMM8, 11),
mask(IMM8, 12),
mask(IMM8, 13),
mask(IMM8, 14),
mask(IMM8, 15),
mask(IMM8, 16),
mask(IMM8, 17),
mask(IMM8, 18),
mask(IMM8, 19),
mask(IMM8, 20),
mask(IMM8, 21),
mask(IMM8, 22),
mask(IMM8, 23),
mask(IMM8, 24),
mask(IMM8, 25),
mask(IMM8, 26),
mask(IMM8, 27),
mask(IMM8, 28),
mask(IMM8, 29),
mask(IMM8, 30),
mask(IMM8, 31),
mask(IMM8, 32),
mask(IMM8, 33),
mask(IMM8, 34),
mask(IMM8, 35),
mask(IMM8, 36),
mask(IMM8, 37),
mask(IMM8, 38),
mask(IMM8, 39),
mask(IMM8, 40),
mask(IMM8, 41),
mask(IMM8, 42),
mask(IMM8, 43),
mask(IMM8, 44),
mask(IMM8, 45),
mask(IMM8, 46),
mask(IMM8, 47),
mask(IMM8, 48),
mask(IMM8, 49),
mask(IMM8, 50),
mask(IMM8, 51),
mask(IMM8, 52),
mask(IMM8, 53),
mask(IMM8, 54),
mask(IMM8, 55),
mask(IMM8, 56),
mask(IMM8, 57),
mask(IMM8, 58),
mask(IMM8, 59),
mask(IMM8, 60),
mask(IMM8, 61),
mask(IMM8, 62),
mask(IMM8, 63),
],
);
transmute(r)

View file

@ -425,10 +425,11 @@ pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
#[target_feature(enable = "sse2")]
unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
const fn mask(shift: i32, i: u32) -> u32 {
if (shift as u32) > 15 {
let shift = shift as u32 & 0xff;
if shift > 15 {
i
} else {
16 - (shift as u32) + i
16 - shift + i
}
}
let zero = _mm_set1_epi8(0).as_i8x16();