add vpmaddwd tests back in
This commit is contained in:
parent
821f139a29
commit
54641efc68
1 changed files with 20 additions and 0 deletions
|
|
@ -4677,6 +4677,26 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpmaddwd))]
|
||||
unsafe fn test_mm256_madd_epi16_mul_one(v: __m256i) -> __m256i {
|
||||
// This is a trick used in the adler32 algorithm to get a widening addition. The
|
||||
// multiplication by 1 is trivial, but must not be optimized out because then the vpmaddwd
|
||||
// instruction is no longer selected. The assert_instr verifies that this is the case.
|
||||
let one_v = _mm256_set1_epi16(1);
|
||||
_mm256_madd_epi16(v, one_v)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpmaddwd))]
|
||||
unsafe fn test_mm256_madd_epi16_shl(v: __m256i) -> __m256i {
|
||||
// This is a trick used in the base64 algorithm to get a widening addition. Instead of a
|
||||
// multiplication, a vector shl is used. In LLVM 22 that breaks the pattern recognition
|
||||
// for the automatic optimization to vpmaddwd.
|
||||
let shift_value = _mm256_set1_epi32(12i32);
|
||||
_mm256_madd_epi16(v, shift_value)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
const fn test_mm256_inserti128_si256() {
|
||||
let a = _mm256_setr_epi64x(1, 2, 3, 4);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue