convert _mm256_extracti128_si256 to const generics

This commit is contained in:
Rémy Rakic 2021-03-06 03:52:10 +01:00 committed by Amanieu d'Antras
parent ad6678a1de
commit 8a65e1a0fb
3 changed files with 18 additions and 16 deletions

View file

@ -989,24 +989,22 @@ pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
transmute::<i64x4, _>(simd_cast(v32))
}
/// Extracts 128 bits (of integer data) from `a` selected with `imm8`.
/// Extracts 128 bits (of integer data) from `a` selected with `IMM1`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti128_si256)
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf128, imm8 = 1)
assert_instr(vextractf128, IMM1 = 1)
)]
#[rustc_args_required_const(1)]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i {
pub unsafe fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
static_assert_imm1!(IMM1);
let a = a.as_i64x4();
let b = _mm256_undefined_si256().as_i64x4();
let dst: i64x2 = match imm8 & 0b01 {
0 => simd_shuffle2(a, b, [0, 1]),
_ => simd_shuffle2(a, b, [2, 3]),
};
let dst: i64x2 = simd_shuffle2(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
transmute(dst)
}
@ -4412,7 +4410,7 @@ mod tests {
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_extracti128_si256() {
let a = _mm256_setr_epi64x(1, 2, 3, 4);
let r = _mm256_extracti128_si256(a, 0b01);
let r = _mm256_extracti128_si256::<1>(a);
let e = _mm_setr_epi64x(3, 4);
assert_eq_m128i(r, e);
}

View file

@ -155,18 +155,18 @@ mod tests {
0x978093862CDE7187,
);
let mut a_decomp = [_mm_setzero_si128(); 2];
a_decomp[0] = _mm256_extracti128_si256(a, 0);
a_decomp[1] = _mm256_extracti128_si256(a, 1);
a_decomp[0] = _mm256_extracti128_si256::<0>(a);
a_decomp[1] = _mm256_extracti128_si256::<1>(a);
let mut k_decomp = [_mm_setzero_si128(); 2];
k_decomp[0] = _mm256_extracti128_si256(k, 0);
k_decomp[1] = _mm256_extracti128_si256(k, 1);
k_decomp[0] = _mm256_extracti128_si256::<0>(k);
k_decomp[1] = _mm256_extracti128_si256::<1>(k);
let r = vectorized(a, k);
let mut e_decomp = [_mm_setzero_si128(); 2];
for i in 0..2 {
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
}
assert_eq_m128i(_mm256_extracti128_si256(r, 0), e_decomp[0]);
assert_eq_m128i(_mm256_extracti128_si256(r, 1), e_decomp[1]);
assert_eq_m128i(_mm256_extracti128_si256::<0>(r), e_decomp[0]);
assert_eq_m128i(_mm256_extracti128_si256::<1>(r), e_decomp[1]);
}
#[target_feature(enable = "sse2")]

View file

@ -125,6 +125,10 @@ mod tests {
assert_eq_m128i($op($vec_res, 1), $lin_res[1]);
assert_eq_m128i($op($vec_res, 0), $lin_res[0]);
};
(assert_eq_m128i($op:ident::<2>($vec_res:ident),$lin_res:ident[2]);) => {
assert_eq_m128i($op::<1>($vec_res), $lin_res[1]);
assert_eq_m128i($op::<0>($vec_res), $lin_res[0]);
};
}
// this function tests one of the possible 4 instances
@ -209,7 +213,7 @@ mod tests {
for i in 0..2 {
e_decomp[i] = linear(a_decomp[i], b_decomp[i]);
}
unroll! {assert_eq_m128i(_mm256_extracti128_si256(r,2),e_decomp[2]);}
unroll! {assert_eq_m128i(_mm256_extracti128_si256::<2>(r),e_decomp[2]);}
}
#[simd_test(enable = "avx512vpclmulqdq,avx512f")]