Fix avx512f build on x86-32; fix avx512gfni test fail (#1264)
This commit is contained in:
parent
72061cb5f5
commit
935d5297e3
3 changed files with 484 additions and 451 deletions
|
|
@ -7,6 +7,8 @@ use crate::{
|
|||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
use super::avx512f::{vpl, vps};
|
||||
|
||||
/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30)
|
||||
|
|
@ -4237,11 +4239,11 @@ pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
|
|||
pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
|
||||
let mut dst: __m512i = src;
|
||||
asm!(
|
||||
"vmovdqu16 {2}{{{1}}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
inout(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu16 {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4256,11 +4258,11 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con
|
|||
pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
|
||||
let mut dst: __m512i;
|
||||
asm!(
|
||||
"vmovdqu16 {2}{{{1}}} {{z}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
out(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4275,11 +4277,11 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
|
|||
pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
|
||||
let mut dst: __m512i = src;
|
||||
asm!(
|
||||
"vmovdqu8 {2}{{{1}}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
inout(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu8 {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4294,11 +4296,11 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons
|
|||
pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
|
||||
let mut dst: __m512i;
|
||||
asm!(
|
||||
"vmovdqu8 {2}{{{1}}} {{z}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
out(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4313,11 +4315,11 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
|
|||
pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
|
||||
let mut dst: __m256i = src;
|
||||
asm!(
|
||||
"vmovdqu16 {2}{{{1}}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
inout(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu16 {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4332,11 +4334,11 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con
|
|||
pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
|
||||
let mut dst: __m256i;
|
||||
asm!(
|
||||
"vmovdqu16 {2}{{{1}}} {{z}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
out(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4351,11 +4353,11 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
|
|||
pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
|
||||
let mut dst: __m256i = src;
|
||||
asm!(
|
||||
"vmovdqu8 {2}{{{1}}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
inout(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu8 {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4370,11 +4372,11 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons
|
|||
pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
|
||||
let mut dst: __m256i;
|
||||
asm!(
|
||||
"vmovdqu8 {2}{{{1}}} {{z}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
out(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4389,11 +4391,11 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
|
|||
pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
|
||||
let mut dst: __m128i = src;
|
||||
asm!(
|
||||
"vmovdqu16 {2}{{{1}}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
inout(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu16 {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4408,11 +4410,11 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i
|
|||
pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
|
||||
let mut dst: __m128i;
|
||||
asm!(
|
||||
"vmovdqu16 {2}{{{1}}} {{z}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
out(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4427,11 +4429,11 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
|
|||
pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
|
||||
let mut dst: __m128i = src;
|
||||
asm!(
|
||||
"vmovdqu8 {2}{{{1}}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
inout(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu8 {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4446,11 +4448,11 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i
|
|||
pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
|
||||
let mut dst: __m128i;
|
||||
asm!(
|
||||
"vmovdqu8 {2}{{{1}}} {{z}}, [{0}]",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) k,
|
||||
out(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
|
@ -4463,11 +4465,11 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
|
|||
#[target_feature(enable = "avx512f,avx512bw")]
|
||||
pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
|
||||
asm!(
|
||||
"vmovdqu16 [{0}]{{{1}}}, {2}",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) mask,
|
||||
in(zmm_reg) a,
|
||||
options(nostack)
|
||||
vps!("vmovdqu16", "{{{mask}}}, {a}"),
|
||||
p = in(reg) mem_addr,
|
||||
mask = in(kreg) mask,
|
||||
a = in(zmm_reg) a,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -4479,11 +4481,11 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
|
|||
#[target_feature(enable = "avx512f,avx512bw")]
|
||||
pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
|
||||
asm!(
|
||||
"vmovdqu8 [{0}]{{{1}}}, {2}",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) mask,
|
||||
in(zmm_reg) a,
|
||||
options(nostack)
|
||||
vps!("vmovdqu8", "{{{mask}}}, {a}"),
|
||||
p = in(reg) mem_addr,
|
||||
mask = in(kreg) mask,
|
||||
a = in(zmm_reg) a,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -4495,11 +4497,11 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
|
|||
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
|
||||
asm!(
|
||||
"vmovdqu16 [{0}]{{{1}}}, {2}",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) mask,
|
||||
in(ymm_reg) a,
|
||||
options(nostack)
|
||||
vps!("vmovdqu16", "{{{mask}}}, {a}"),
|
||||
p = in(reg) mem_addr,
|
||||
mask = in(kreg) mask,
|
||||
a = in(ymm_reg) a,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -4511,11 +4513,11 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
|
|||
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
|
||||
asm!(
|
||||
"vmovdqu8 [{0}]{{{1}}}, {2}",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) mask,
|
||||
in(ymm_reg) a,
|
||||
options(nostack)
|
||||
vps!("vmovdqu8", "{{{mask}}}, {a}"),
|
||||
p = in(reg) mem_addr,
|
||||
mask = in(kreg) mask,
|
||||
a = in(ymm_reg) a,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -4527,11 +4529,11 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
|
|||
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
|
||||
asm!(
|
||||
"vmovdqu16 [{0}]{{{1}}}, {2}",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) mask,
|
||||
in(xmm_reg) a,
|
||||
options(nostack)
|
||||
vps!("vmovdqu16", "{{{mask}}}, {a}"),
|
||||
p = in(reg) mem_addr,
|
||||
mask = in(kreg) mask,
|
||||
a = in(xmm_reg) a,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -4543,11 +4545,11 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
|
|||
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
|
||||
asm!(
|
||||
"vmovdqu8 [{0}]{{{1}}}, {2}",
|
||||
in(reg) mem_addr,
|
||||
in(kreg) mask,
|
||||
in(xmm_reg) a,
|
||||
options(nostack)
|
||||
vps!("vmovdqu8", "{{{mask}}}, {a}"),
|
||||
p = in(reg) mem_addr,
|
||||
mask = in(kreg) mask,
|
||||
a = in(xmm_reg) a,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -819,7 +819,7 @@ mod tests {
|
|||
|
||||
for i in 0..NUM_TEST_ENTRIES {
|
||||
left[i] = (i % 256) as u8;
|
||||
right[i] = left[i] * 101;
|
||||
right[i] = left[i].wrapping_mul(101);
|
||||
result[i] = mulbyte(left[i], right[i]);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue