add vpmaddwd tests back in

2026-02-02 17:01:10 +01:00 · 2026-02-02 17:01:10 +01:00 · 54641efc68
commit 54641efc68
parent 821f139a29
1 changed files with 20 additions and 0 deletions
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@ -4677,6 +4677,26 @@ mod tests {
        assert_eq_m256i(r, e);
    }

+    #[target_feature(enable = "avx2")]
+    #[cfg_attr(test, assert_instr(vpmaddwd))]
+    unsafe fn test_mm256_madd_epi16_mul_one(v: __m256i) -> __m256i {
+        // This is a trick used in the adler32 algorithm to get a widening addition. The
+        // multiplication by 1 is trivial, but must not be optimized out because then the vpmaddwd
+        // instruction is no longer selected. The assert_instr verifies that this is the case.
+        let one_v = _mm256_set1_epi16(1);
+        _mm256_madd_epi16(v, one_v)
+    }
+
+    #[target_feature(enable = "avx2")]
+    #[cfg_attr(test, assert_instr(vpmaddwd))]
+    unsafe fn test_mm256_madd_epi16_shl(v: __m256i) -> __m256i {
+        // This is a trick used in the base64 algorithm to get a widening addition. Instead of a
+        // multiplication, a vector shl is used. In LLVM 22 that breaks the pattern recognition
+        // for the automatic optimization to vpmaddwd.
+        let shift_value = _mm256_set1_epi32(12i32);
+        _mm256_madd_epi16(v, shift_value)
+    }
+
    #[simd_test(enable = "avx2")]
    const fn test_mm256_inserti128_si256() {
        let a = _mm256_setr_epi64x(1, 2, 3, 4);