From 54641efc6839b75405d19dbe20150985df0fbb8d Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 2 Feb 2026 17:01:10 +0100 Subject: [PATCH] add `vpmaddwd` tests back in --- .../stdarch/crates/core_arch/src/x86/avx2.rs | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs index 83aef753c9d9..04a88e461f75 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs @@ -4677,6 +4677,26 @@ mod tests { assert_eq_m256i(r, e); } + #[target_feature(enable = "avx2")] + #[cfg_attr(test, assert_instr(vpmaddwd))] + unsafe fn test_mm256_madd_epi16_mul_one(v: __m256i) -> __m256i { + // This is a trick used in the adler32 algorithm to get a widening addition. The + // multiplication by 1 is trivial, but must not be optimized out because then the vpmaddwd + // instruction is no longer selected. The assert_instr verifies that this is the case. + let one_v = _mm256_set1_epi16(1); + _mm256_madd_epi16(v, one_v) + } + + #[target_feature(enable = "avx2")] + #[cfg_attr(test, assert_instr(vpmaddwd))] + unsafe fn test_mm256_madd_epi16_shl(v: __m256i) -> __m256i { + // This is a trick used in the base64 algorithm to get a widening addition. Instead of a + // multiplication, a vector shl is used. In LLVM 22 that breaks the pattern recognition + // for the automatic optimization to vpmaddwd. + let shift_value = _mm256_set1_epi32(12i32); + _mm256_madd_epi16(v, shift_value) + } + #[simd_test(enable = "avx2")] const fn test_mm256_inserti128_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4);