From 3671f418ed232e63604f73161e5a55f70d9c04d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Mi=C4=85sko?= <tomasz.miasko@gmail.com>
Date: Thu, 4 Mar 2021 00:00:00 +0000
Subject: [PATCH] Convert some AVX intrinsics to const generics

* _mm256_extractf128_ps
* _mm256_extractf128_pd
* _mm256_extractf128_si256
* _mm256_insertf128_ps
* _mm256_insertf128_pd
* _mm256_insertf128_si256
---
 .../stdarch/crates/core_arch/src/x86/avx.rs   | 119 +++++++++---------
 1 file changed, 62 insertions(+), 57 deletions(-)
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index 53c4a00f4296..8f040fc2f54a 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -983,15 +983,17 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
 #[target_feature(enable = "avx")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vextractf128, imm8 = 1)
+    assert_instr(vextractf128, IMM1 = 1)
 )]
-#[rustc_args_required_const(1)]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 {
-    match imm8 & 1 {
-        0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
-        _ => simd_shuffle4(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
-    }
+pub unsafe fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
+    static_assert_imm1!(IMM1);
+    simd_shuffle4(
+        a,
+        _mm256_undefined_ps(),
+        [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
+    )
 }
 
 /// Extracts 128 bits (composed of 2 packed double-precision (64-bit)
@@ -1002,15 +1004,13 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vextractf128, imm8 = 1)
+    assert_instr(vextractf128, IMM1 = 1)
 )]
-#[rustc_args_required_const(1)]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d {
-    match imm8 & 1 {
-        0 => simd_shuffle2(a, _mm256_undefined_pd(), [0, 1]),
-        _ => simd_shuffle2(a, _mm256_undefined_pd(), [2, 3]),
-    }
+pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
+    static_assert_imm1!(IMM1);
+    simd_shuffle2(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize])
 }
 
 /// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`.
@@ -1020,16 +1020,17 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vextractf128, imm8 = 1)
+    assert_instr(vextractf128, IMM1 = 1)
 )]
-#[rustc_args_required_const(1)]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i {
-    let b = _mm256_undefined_si256().as_i64x4();
-    let dst: i64x2 = match imm8 & 1 {
-        0 => simd_shuffle2(a.as_i64x4(), b, [0, 1]),
-        _ => simd_shuffle2(a.as_i64x4(), b, [2, 3]),
-    };
+pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
+    static_assert_imm1!(IMM1);
+    let dst: i64x2 = simd_shuffle2(
+        a.as_i64x4(),
+        _mm256_undefined_si256().as_i64x4(),
+        [[0, 1], [2, 3]][IMM1 as usize],
+    );
     transmute(dst)
 }
 
@@ -1410,16 +1411,17 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vinsertf128, imm8 = 1)
+    assert_instr(vinsertf128, IMM1 = 1)
 )]
-#[rustc_args_required_const(2)]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 {
-    let b = _mm256_castps128_ps256(b);
-    match imm8 & 1 {
-        0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
-        _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
-    }
+pub unsafe fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
+    static_assert_imm1!(IMM1);
+    simd_shuffle8(
+        a,
+        _mm256_castps128_ps256(b),
+        [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
+    )
 }
 
 /// Copies `a` to result, then inserts 128 bits (composed of 2 packed
@@ -1431,15 +1433,17 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vinsertf128, imm8 = 1)
+    assert_instr(vinsertf128, IMM1 = 1)
 )]
-#[rustc_args_required_const(2)]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d, imm8: i32) -> __m256d {
-    match imm8 & 1 {
-        0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]),
-        _ => simd_shuffle4(a, _mm256_castpd128_pd256(b), [0, 1, 4, 5]),
-    }
+pub unsafe fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
+    static_assert_imm1!(IMM1);
+    simd_shuffle4(
+        a,
+        _mm256_castpd128_pd256(b),
+        [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
+    )
 }
 
 /// Copies `a` to result, then inserts 128 bits from `b` into result
@@ -1450,16 +1454,17 @@ pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d, imm8: i32) -> __m256d
 #[target_feature(enable = "avx")]
 #[cfg_attr(
     all(test, not(target_os = "windows")),
-    assert_instr(vinsertf128, imm8 = 1)
+    assert_instr(vinsertf128, IMM1 = 1)
 )]
-#[rustc_args_required_const(2)]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m256i {
-    let b = _mm256_castsi128_si256(b).as_i64x4();
-    let dst: i64x4 = match imm8 & 1 {
-        0 => simd_shuffle4(a.as_i64x4(), b, [4, 5, 2, 3]),
-        _ => simd_shuffle4(a.as_i64x4(), b, [0, 1, 4, 5]),
-    };
+pub unsafe fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
+    static_assert_imm1!(IMM1);
+    let dst: i64x4 = simd_shuffle4(
+        a.as_i64x4(),
+        _mm256_castsi128_si256(b).as_i64x4(),
+        [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
+    );
     transmute(dst)
 }
 
@@ -2961,7 +2966,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
     let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
-    _mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1)
+    _mm256_insertf128_ps::<1>(a, _mm_loadu_ps(hiaddr))
 }
 
 /// Loads two 128-bit values (composed of 2 packed double-precision (64-bit)
@@ -2976,7 +2981,7 @@ pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m2
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
     let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
-    _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1)
+    _mm256_insertf128_pd::<1>(a, _mm_loadu_pd(hiaddr))
 }
 
 /// Loads two 128-bit values (composed of integer data) from memory, and combine
@@ -2990,7 +2995,7 @@ pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
     let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
-    _mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
+    _mm256_insertf128_si256::<1>(a, _mm_loadu_si128(hiaddr))
 }
 
 /// Stores the high and low 128-bit halves (each composed of 4 packed
@@ -3006,7 +3011,7 @@ pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i
 pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
     let lo = _mm256_castps256_ps128(a);
     _mm_storeu_ps(loaddr, lo);
-    let hi = _mm256_extractf128_ps(a, 1);
+    let hi = _mm256_extractf128_ps::<1>(a);
     _mm_storeu_ps(hiaddr, hi);
 }
 
@@ -3023,7 +3028,7 @@ pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256)
 pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
     let lo = _mm256_castpd256_pd128(a);
     _mm_storeu_pd(loaddr, lo);
-    let hi = _mm256_extractf128_pd(a, 1);
+    let hi = _mm256_extractf128_pd::<1>(a);
     _mm_storeu_pd(hiaddr, hi);
 }
 
@@ -3039,7 +3044,7 @@ pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256
 pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
     let lo = _mm256_castsi256_si128(a);
     _mm_storeu_si128(loaddr, lo);
-    let hi = _mm256_extractf128_si256(a, 1);
+    let hi = _mm256_extractf128_si256::<1>(a);
     _mm_storeu_si128(hiaddr, hi);
 }
 
@@ -3727,7 +3732,7 @@ mod tests {
     #[simd_test(enable = "avx")]
     unsafe fn test_mm256_extractf128_ps() {
         let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
-        let r = _mm256_extractf128_ps(a, 0);
+        let r = _mm256_extractf128_ps::<0>(a);
         let e = _mm_setr_ps(4., 3., 2., 5.);
         assert_eq_m128(r, e);
     }
@@ -3735,7 +3740,7 @@ mod tests {
     #[simd_test(enable = "avx")]
     unsafe fn test_mm256_extractf128_pd() {
         let a = _mm256_setr_pd(4., 3., 2., 5.);
-        let r = _mm256_extractf128_pd(a, 0);
+        let r = _mm256_extractf128_pd::<0>(a);
         let e = _mm_setr_pd(4., 3.);
         assert_eq_m128d(r, e);
     }
@@ -3743,7 +3748,7 @@ mod tests {
     #[simd_test(enable = "avx")]
     unsafe fn test_mm256_extractf128_si256() {
         let a = _mm256_setr_epi64x(4, 3, 2, 5);
-        let r = _mm256_extractf128_si256(a, 0);
+        let r = _mm256_extractf128_si256::<0>(a);
         let e = _mm_setr_epi64x(4, 3);
         assert_eq_m128i(r, e);
     }
@@ -3894,7 +3899,7 @@ mod tests {
     unsafe fn test_mm256_insertf128_ps() {
         let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let b = _mm_setr_ps(4., 9., 16., 25.);
-        let r = _mm256_insertf128_ps(a, b, 0);
+        let r = _mm256_insertf128_ps::<0>(a, b);
         let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
         assert_eq_m256(r, e);
     }
@@ -3903,7 +3908,7 @@ mod tests {
     unsafe fn test_mm256_insertf128_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm_setr_pd(5., 6.);
-        let r = _mm256_insertf128_pd(a, b, 0);
+        let r = _mm256_insertf128_pd::<0>(a, b);
         let e = _mm256_setr_pd(5., 6., 3., 4.);
         assert_eq_m256d(r, e);
     }
@@ -3912,7 +3917,7 @@ mod tests {
     unsafe fn test_mm256_insertf128_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let b = _mm_setr_epi64x(5, 6);
-        let r = _mm256_insertf128_si256(a, b, 0);
+        let r = _mm256_insertf128_si256::<0>(a, b);
         let e = _mm256_setr_epi64x(5, 6, 3, 4);
         assert_eq_m256i(r, e);
     }