convert _mm256_extracti128_si256 to const generics
This commit is contained in:
parent
ad6678a1de
commit
8a65e1a0fb
3 changed files with 18 additions and 16 deletions
|
|
@ -989,24 +989,22 @@ pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
|
|||
transmute::<i64x4, _>(simd_cast(v32))
|
||||
}
|
||||
|
||||
/// Extracts 128 bits (of integer data) from `a` selected with `imm8`.
|
||||
/// Extracts 128 bits (of integer data) from `a` selected with `IMM1`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti128_si256)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(
|
||||
all(test, not(target_os = "windows")),
|
||||
assert_instr(vextractf128, imm8 = 1)
|
||||
assert_instr(vextractf128, IMM1 = 1)
|
||||
)]
|
||||
#[rustc_args_required_const(1)]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i {
|
||||
pub unsafe fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
|
||||
static_assert_imm1!(IMM1);
|
||||
let a = a.as_i64x4();
|
||||
let b = _mm256_undefined_si256().as_i64x4();
|
||||
let dst: i64x2 = match imm8 & 0b01 {
|
||||
0 => simd_shuffle2(a, b, [0, 1]),
|
||||
_ => simd_shuffle2(a, b, [2, 3]),
|
||||
};
|
||||
let dst: i64x2 = simd_shuffle2(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
|
||||
transmute(dst)
|
||||
}
|
||||
|
||||
|
|
@ -4412,7 +4410,7 @@ mod tests {
|
|||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_extracti128_si256() {
|
||||
let a = _mm256_setr_epi64x(1, 2, 3, 4);
|
||||
let r = _mm256_extracti128_si256(a, 0b01);
|
||||
let r = _mm256_extracti128_si256::<1>(a);
|
||||
let e = _mm_setr_epi64x(3, 4);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -155,18 +155,18 @@ mod tests {
|
|||
0x978093862CDE7187,
|
||||
);
|
||||
let mut a_decomp = [_mm_setzero_si128(); 2];
|
||||
a_decomp[0] = _mm256_extracti128_si256(a, 0);
|
||||
a_decomp[1] = _mm256_extracti128_si256(a, 1);
|
||||
a_decomp[0] = _mm256_extracti128_si256::<0>(a);
|
||||
a_decomp[1] = _mm256_extracti128_si256::<1>(a);
|
||||
let mut k_decomp = [_mm_setzero_si128(); 2];
|
||||
k_decomp[0] = _mm256_extracti128_si256(k, 0);
|
||||
k_decomp[1] = _mm256_extracti128_si256(k, 1);
|
||||
k_decomp[0] = _mm256_extracti128_si256::<0>(k);
|
||||
k_decomp[1] = _mm256_extracti128_si256::<1>(k);
|
||||
let r = vectorized(a, k);
|
||||
let mut e_decomp = [_mm_setzero_si128(); 2];
|
||||
for i in 0..2 {
|
||||
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
|
||||
}
|
||||
assert_eq_m128i(_mm256_extracti128_si256(r, 0), e_decomp[0]);
|
||||
assert_eq_m128i(_mm256_extracti128_si256(r, 1), e_decomp[1]);
|
||||
assert_eq_m128i(_mm256_extracti128_si256::<0>(r), e_decomp[0]);
|
||||
assert_eq_m128i(_mm256_extracti128_si256::<1>(r), e_decomp[1]);
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
|
|
|
|||
|
|
@ -125,6 +125,10 @@ mod tests {
|
|||
assert_eq_m128i($op($vec_res, 1), $lin_res[1]);
|
||||
assert_eq_m128i($op($vec_res, 0), $lin_res[0]);
|
||||
};
|
||||
(assert_eq_m128i($op:ident::<2>($vec_res:ident),$lin_res:ident[2]);) => {
|
||||
assert_eq_m128i($op::<1>($vec_res), $lin_res[1]);
|
||||
assert_eq_m128i($op::<0>($vec_res), $lin_res[0]);
|
||||
};
|
||||
}
|
||||
|
||||
// this function tests one of the possible 4 instances
|
||||
|
|
@ -209,7 +213,7 @@ mod tests {
|
|||
for i in 0..2 {
|
||||
e_decomp[i] = linear(a_decomp[i], b_decomp[i]);
|
||||
}
|
||||
unroll! {assert_eq_m128i(_mm256_extracti128_si256(r,2),e_decomp[2]);}
|
||||
unroll! {assert_eq_m128i(_mm256_extracti128_si256::<2>(r),e_decomp[2]);}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpclmulqdq,avx512f")]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue