Fix x86 SIMD byte shift intrinsics (#1168)
This commit is contained in:
parent
15749b0ed3
commit
b216e9f9c4
3 changed files with 115 additions and 98 deletions
|
|
@ -2585,44 +2585,52 @@ pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
||||
static_assert_imm8!(IMM8);
|
||||
const fn mask(shift: i32, i: u32) -> u32 {
|
||||
let shift = shift as u32 & 0xff;
|
||||
if shift > 15 || i % 16 < shift {
|
||||
0
|
||||
} else {
|
||||
32 + (i - shift)
|
||||
}
|
||||
}
|
||||
let a = a.as_i8x32();
|
||||
let zero = _mm256_setzero_si256().as_i8x32();
|
||||
let r: i8x32 = simd_shuffle32!(
|
||||
zero,
|
||||
a,
|
||||
<const IMM8: i32> [
|
||||
32 - (IMM8 as u32 & 0xff),
|
||||
33 - (IMM8 as u32 & 0xff),
|
||||
34 - (IMM8 as u32 & 0xff),
|
||||
35 - (IMM8 as u32 & 0xff),
|
||||
36 - (IMM8 as u32 & 0xff),
|
||||
37 - (IMM8 as u32 & 0xff),
|
||||
38 - (IMM8 as u32 & 0xff),
|
||||
39 - (IMM8 as u32 & 0xff),
|
||||
40 - (IMM8 as u32 & 0xff),
|
||||
41 - (IMM8 as u32 & 0xff),
|
||||
42 - (IMM8 as u32 & 0xff),
|
||||
43 - (IMM8 as u32 & 0xff),
|
||||
44 - (IMM8 as u32 & 0xff),
|
||||
45 - (IMM8 as u32 & 0xff),
|
||||
46 - (IMM8 as u32 & 0xff),
|
||||
47 - (IMM8 as u32 & 0xff),
|
||||
48 - (IMM8 as u32 & 0xff) - 16,
|
||||
49 - (IMM8 as u32 & 0xff) - 16,
|
||||
50 - (IMM8 as u32 & 0xff) - 16,
|
||||
51 - (IMM8 as u32 & 0xff) - 16,
|
||||
52 - (IMM8 as u32 & 0xff) - 16,
|
||||
53 - (IMM8 as u32 & 0xff) - 16,
|
||||
54 - (IMM8 as u32 & 0xff) - 16,
|
||||
55 - (IMM8 as u32 & 0xff) - 16,
|
||||
56 - (IMM8 as u32 & 0xff) - 16,
|
||||
57 - (IMM8 as u32 & 0xff) - 16,
|
||||
58 - (IMM8 as u32 & 0xff) - 16,
|
||||
59 - (IMM8 as u32 & 0xff) - 16,
|
||||
60 - (IMM8 as u32 & 0xff) - 16,
|
||||
61 - (IMM8 as u32 & 0xff) - 16,
|
||||
62 - (IMM8 as u32 & 0xff) - 16,
|
||||
63 - (IMM8 as u32 & 0xff) - 16,
|
||||
mask(IMM8, 0),
|
||||
mask(IMM8, 1),
|
||||
mask(IMM8, 2),
|
||||
mask(IMM8, 3),
|
||||
mask(IMM8, 4),
|
||||
mask(IMM8, 5),
|
||||
mask(IMM8, 6),
|
||||
mask(IMM8, 7),
|
||||
mask(IMM8, 8),
|
||||
mask(IMM8, 9),
|
||||
mask(IMM8, 10),
|
||||
mask(IMM8, 11),
|
||||
mask(IMM8, 12),
|
||||
mask(IMM8, 13),
|
||||
mask(IMM8, 14),
|
||||
mask(IMM8, 15),
|
||||
mask(IMM8, 16),
|
||||
mask(IMM8, 17),
|
||||
mask(IMM8, 18),
|
||||
mask(IMM8, 19),
|
||||
mask(IMM8, 20),
|
||||
mask(IMM8, 21),
|
||||
mask(IMM8, 22),
|
||||
mask(IMM8, 23),
|
||||
mask(IMM8, 24),
|
||||
mask(IMM8, 25),
|
||||
mask(IMM8, 26),
|
||||
mask(IMM8, 27),
|
||||
mask(IMM8, 28),
|
||||
mask(IMM8, 29),
|
||||
mask(IMM8, 30),
|
||||
mask(IMM8, 31),
|
||||
],
|
||||
);
|
||||
transmute(r)
|
||||
|
|
|
|||
|
|
@ -8873,76 +8873,84 @@ pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
|
||||
static_assert_imm8!(IMM8);
|
||||
const fn mask(shift: i32, i: u32) -> u32 {
|
||||
let shift = shift as u32 & 0xff;
|
||||
if shift > 15 || i % 16 < shift {
|
||||
0
|
||||
} else {
|
||||
64 + (i - shift)
|
||||
}
|
||||
}
|
||||
let a = a.as_i8x64();
|
||||
let zero = _mm512_setzero_si512().as_i8x64();
|
||||
let r: i8x64 = simd_shuffle64!(
|
||||
zero,
|
||||
a,
|
||||
<const IMM8: i32> [
|
||||
64 - (IMM8 as u32 & 0xff),
|
||||
65 - (IMM8 as u32 & 0xff),
|
||||
66 - (IMM8 as u32 & 0xff),
|
||||
67 - (IMM8 as u32 & 0xff),
|
||||
68 - (IMM8 as u32 & 0xff),
|
||||
69 - (IMM8 as u32 & 0xff),
|
||||
70 - (IMM8 as u32 & 0xff),
|
||||
71 - (IMM8 as u32 & 0xff),
|
||||
72 - (IMM8 as u32 & 0xff),
|
||||
73 - (IMM8 as u32 & 0xff),
|
||||
74 - (IMM8 as u32 & 0xff),
|
||||
75 - (IMM8 as u32 & 0xff),
|
||||
76 - (IMM8 as u32 & 0xff),
|
||||
77 - (IMM8 as u32 & 0xff),
|
||||
78 - (IMM8 as u32 & 0xff),
|
||||
79 - (IMM8 as u32 & 0xff),
|
||||
80 - (IMM8 as u32 & 0xff) - 16,
|
||||
81 - (IMM8 as u32 & 0xff) - 16,
|
||||
82 - (IMM8 as u32 & 0xff) - 16,
|
||||
83 - (IMM8 as u32 & 0xff) - 16,
|
||||
84 - (IMM8 as u32 & 0xff) - 16,
|
||||
85 - (IMM8 as u32 & 0xff) - 16,
|
||||
86 - (IMM8 as u32 & 0xff) - 16,
|
||||
87 - (IMM8 as u32 & 0xff) - 16,
|
||||
88 - (IMM8 as u32 & 0xff) - 16,
|
||||
89 - (IMM8 as u32 & 0xff) - 16,
|
||||
90 - (IMM8 as u32 & 0xff) - 16,
|
||||
91 - (IMM8 as u32 & 0xff) - 16,
|
||||
92 - (IMM8 as u32 & 0xff) - 16,
|
||||
93 - (IMM8 as u32 & 0xff) - 16,
|
||||
94 - (IMM8 as u32 & 0xff) - 16,
|
||||
95 - (IMM8 as u32 & 0xff) - 16,
|
||||
96 - (IMM8 as u32 & 0xff) - 32,
|
||||
97 - (IMM8 as u32 & 0xff) - 32,
|
||||
98 - (IMM8 as u32 & 0xff) - 32,
|
||||
99 - (IMM8 as u32 & 0xff) - 32,
|
||||
100 - (IMM8 as u32 & 0xff) - 32,
|
||||
101 - (IMM8 as u32 & 0xff) - 32,
|
||||
102 - (IMM8 as u32 & 0xff) - 32,
|
||||
103 - (IMM8 as u32 & 0xff) - 32,
|
||||
104 - (IMM8 as u32 & 0xff) - 32,
|
||||
105 - (IMM8 as u32 & 0xff) - 32,
|
||||
106 - (IMM8 as u32 & 0xff) - 32,
|
||||
107 - (IMM8 as u32 & 0xff) - 32,
|
||||
108 - (IMM8 as u32 & 0xff) - 32,
|
||||
109 - (IMM8 as u32 & 0xff) - 32,
|
||||
110 - (IMM8 as u32 & 0xff) - 32,
|
||||
111 - (IMM8 as u32 & 0xff) - 32,
|
||||
112 - (IMM8 as u32 & 0xff) - 48,
|
||||
113 - (IMM8 as u32 & 0xff) - 48,
|
||||
114 - (IMM8 as u32 & 0xff) - 48,
|
||||
115 - (IMM8 as u32 & 0xff) - 48,
|
||||
116 - (IMM8 as u32 & 0xff) - 48,
|
||||
117 - (IMM8 as u32 & 0xff) - 48,
|
||||
118 - (IMM8 as u32 & 0xff) - 48,
|
||||
119 - (IMM8 as u32 & 0xff) - 48,
|
||||
120 - (IMM8 as u32 & 0xff) - 48,
|
||||
121 - (IMM8 as u32 & 0xff) - 48,
|
||||
122 - (IMM8 as u32 & 0xff) - 48,
|
||||
123 - (IMM8 as u32 & 0xff) - 48,
|
||||
124 - (IMM8 as u32 & 0xff) - 48,
|
||||
125 - (IMM8 as u32 & 0xff) - 48,
|
||||
126 - (IMM8 as u32 & 0xff) - 48,
|
||||
127 - (IMM8 as u32 & 0xff) - 48,
|
||||
mask(IMM8, 0),
|
||||
mask(IMM8, 1),
|
||||
mask(IMM8, 2),
|
||||
mask(IMM8, 3),
|
||||
mask(IMM8, 4),
|
||||
mask(IMM8, 5),
|
||||
mask(IMM8, 6),
|
||||
mask(IMM8, 7),
|
||||
mask(IMM8, 8),
|
||||
mask(IMM8, 9),
|
||||
mask(IMM8, 10),
|
||||
mask(IMM8, 11),
|
||||
mask(IMM8, 12),
|
||||
mask(IMM8, 13),
|
||||
mask(IMM8, 14),
|
||||
mask(IMM8, 15),
|
||||
mask(IMM8, 16),
|
||||
mask(IMM8, 17),
|
||||
mask(IMM8, 18),
|
||||
mask(IMM8, 19),
|
||||
mask(IMM8, 20),
|
||||
mask(IMM8, 21),
|
||||
mask(IMM8, 22),
|
||||
mask(IMM8, 23),
|
||||
mask(IMM8, 24),
|
||||
mask(IMM8, 25),
|
||||
mask(IMM8, 26),
|
||||
mask(IMM8, 27),
|
||||
mask(IMM8, 28),
|
||||
mask(IMM8, 29),
|
||||
mask(IMM8, 30),
|
||||
mask(IMM8, 31),
|
||||
mask(IMM8, 32),
|
||||
mask(IMM8, 33),
|
||||
mask(IMM8, 34),
|
||||
mask(IMM8, 35),
|
||||
mask(IMM8, 36),
|
||||
mask(IMM8, 37),
|
||||
mask(IMM8, 38),
|
||||
mask(IMM8, 39),
|
||||
mask(IMM8, 40),
|
||||
mask(IMM8, 41),
|
||||
mask(IMM8, 42),
|
||||
mask(IMM8, 43),
|
||||
mask(IMM8, 44),
|
||||
mask(IMM8, 45),
|
||||
mask(IMM8, 46),
|
||||
mask(IMM8, 47),
|
||||
mask(IMM8, 48),
|
||||
mask(IMM8, 49),
|
||||
mask(IMM8, 50),
|
||||
mask(IMM8, 51),
|
||||
mask(IMM8, 52),
|
||||
mask(IMM8, 53),
|
||||
mask(IMM8, 54),
|
||||
mask(IMM8, 55),
|
||||
mask(IMM8, 56),
|
||||
mask(IMM8, 57),
|
||||
mask(IMM8, 58),
|
||||
mask(IMM8, 59),
|
||||
mask(IMM8, 60),
|
||||
mask(IMM8, 61),
|
||||
mask(IMM8, 62),
|
||||
mask(IMM8, 63),
|
||||
],
|
||||
);
|
||||
transmute(r)
|
||||
|
|
|
|||
|
|
@ -425,10 +425,11 @@ pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
|
||||
const fn mask(shift: i32, i: u32) -> u32 {
|
||||
if (shift as u32) > 15 {
|
||||
let shift = shift as u32 & 0xff;
|
||||
if shift > 15 {
|
||||
i
|
||||
} else {
|
||||
16 - (shift as u32) + i
|
||||
16 - shift + i
|
||||
}
|
||||
}
|
||||
let zero = _mm_set1_epi8(0).as_i8x16();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue