From 54641efc6839b75405d19dbe20150985df0fbb8d Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Mon, 2 Feb 2026 17:01:10 +0100
Subject: [PATCH] add `vpmaddwd` tests back in

---
 .../stdarch/crates/core_arch/src/x86/avx2.rs  | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index 83aef753c9d9..04a88e461f75 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -4677,6 +4677,26 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
+    #[target_feature(enable = "avx2")]
+    #[cfg_attr(test, assert_instr(vpmaddwd))]
+    unsafe fn test_mm256_madd_epi16_mul_one(v: __m256i) -> __m256i {
+        // This is a trick used in the adler32 algorithm to get a widening addition. The
+        // multiplication by 1 is trivial, but must not be optimized out because then the vpmaddwd
+        // instruction is no longer selected. The assert_instr verifies that this is the case.
+        let one_v = _mm256_set1_epi16(1);
+        _mm256_madd_epi16(v, one_v)
+    }
+
+    #[target_feature(enable = "avx2")]
+    #[cfg_attr(test, assert_instr(vpmaddwd))]
+    unsafe fn test_mm256_madd_epi16_shl(v: __m256i) -> __m256i {
+        // This is a trick used in the base64 algorithm to get a widening addition. Instead of a
+        // multiplication, a vector shl is used. In LLVM 22 that breaks the pattern recognition
+        // for the automatic optimization to vpmaddwd.
+        let shift_value = _mm256_set1_epi32(12i32);
+        _mm256_madd_epi16(v, shift_value)
+    }
+
     #[simd_test(enable = "avx2")]
     const fn test_mm256_inserti128_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);