Fix avx512f build on x86-32; fix avx512gfni test fail (#1264)

This commit is contained in:
Luo Jia 2021-12-09 20:30:22 +08:00 committed by GitHub
parent 72061cb5f5
commit 935d5297e3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 484 additions and 451 deletions

View file

@ -7,6 +7,8 @@ use crate::{
#[cfg(test)]
use stdarch_test::assert_instr;
use super::avx512f::{vpl, vps};
/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30)
@ -4237,11 +4239,11 @@ pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
let mut dst: __m512i = src;
asm!(
"vmovdqu16 {2}{{{1}}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
inout(zmm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu16 {dst}{{{k}}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4256,11 +4258,11 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con
pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
let mut dst: __m512i;
asm!(
"vmovdqu16 {2}{{{1}}} {{z}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
out(zmm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4275,11 +4277,11 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
let mut dst: __m512i = src;
asm!(
"vmovdqu8 {2}{{{1}}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
inout(zmm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu8 {dst}{{{k}}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4294,11 +4296,11 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons
pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
let mut dst: __m512i;
asm!(
"vmovdqu8 {2}{{{1}}} {{z}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
out(zmm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4313,11 +4315,11 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
let mut dst: __m256i = src;
asm!(
"vmovdqu16 {2}{{{1}}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
inout(ymm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu16 {dst}{{{k}}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4332,11 +4334,11 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con
pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
let mut dst: __m256i;
asm!(
"vmovdqu16 {2}{{{1}}} {{z}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
out(ymm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4351,11 +4353,11 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
let mut dst: __m256i = src;
asm!(
"vmovdqu8 {2}{{{1}}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
inout(ymm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu8 {dst}{{{k}}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4370,11 +4372,11 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons
pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
let mut dst: __m256i;
asm!(
"vmovdqu8 {2}{{{1}}} {{z}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
out(ymm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4389,11 +4391,11 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
let mut dst: __m128i = src;
asm!(
"vmovdqu16 {2}{{{1}}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
inout(xmm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu16 {dst}{{{k}}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4408,11 +4410,11 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i
pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
let mut dst: __m128i;
asm!(
"vmovdqu16 {2}{{{1}}} {{z}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
out(xmm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4427,11 +4429,11 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
let mut dst: __m128i = src;
asm!(
"vmovdqu8 {2}{{{1}}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
inout(xmm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu8 {dst}{{{k}}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4446,11 +4448,11 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i
pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
let mut dst: __m128i;
asm!(
"vmovdqu8 {2}{{{1}}} {{z}}, [{0}]",
in(reg) mem_addr,
in(kreg) k,
out(xmm_reg) dst,
options(pure, readonly, nostack)
vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
options(pure, readonly, nostack)
);
dst
}
@ -4463,11 +4465,11 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
#[target_feature(enable = "avx512f,avx512bw")]
pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
asm!(
"vmovdqu16 [{0}]{{{1}}}, {2}",
in(reg) mem_addr,
in(kreg) mask,
in(zmm_reg) a,
options(nostack)
vps!("vmovdqu16", "{{{mask}}}, {a}"),
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
options(nostack)
);
}
@ -4479,11 +4481,11 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
#[target_feature(enable = "avx512f,avx512bw")]
pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
asm!(
"vmovdqu8 [{0}]{{{1}}}, {2}",
in(reg) mem_addr,
in(kreg) mask,
in(zmm_reg) a,
options(nostack)
vps!("vmovdqu8", "{{{mask}}}, {a}"),
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
options(nostack)
);
}
@ -4495,11 +4497,11 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
asm!(
"vmovdqu16 [{0}]{{{1}}}, {2}",
in(reg) mem_addr,
in(kreg) mask,
in(ymm_reg) a,
options(nostack)
vps!("vmovdqu16", "{{{mask}}}, {a}"),
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
options(nostack)
);
}
@ -4511,11 +4513,11 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
asm!(
"vmovdqu8 [{0}]{{{1}}}, {2}",
in(reg) mem_addr,
in(kreg) mask,
in(ymm_reg) a,
options(nostack)
vps!("vmovdqu8", "{{{mask}}}, {a}"),
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
options(nostack)
);
}
@ -4527,11 +4529,11 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
asm!(
"vmovdqu16 [{0}]{{{1}}}, {2}",
in(reg) mem_addr,
in(kreg) mask,
in(xmm_reg) a,
options(nostack)
vps!("vmovdqu16", "{{{mask}}}, {a}"),
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
options(nostack)
);
}
@ -4543,11 +4545,11 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
asm!(
"vmovdqu8 [{0}]{{{1}}}, {2}",
in(reg) mem_addr,
in(kreg) mask,
in(xmm_reg) a,
options(nostack)
vps!("vmovdqu8", "{{{mask}}}, {a}"),
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
options(nostack)
);
}

File diff suppressed because it is too large Load diff

View file

@ -819,7 +819,7 @@ mod tests {
for i in 0..NUM_TEST_ENTRIES {
left[i] = (i % 256) as u8;
right[i] = left[i] * 101;
right[i] = left[i].wrapping_mul(101);
result[i] = mulbyte(left[i], right[i]);
}