convert _mm_blend_epi32 to const generics
This commit is contained in:
parent
8a65e1a0fb
commit
65f6a147de
2 changed files with 26 additions and 34 deletions
|
|
@ -358,39 +358,28 @@ pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
|
|||
transmute(pavgb(a.as_u8x32(), b.as_u8x32()))
|
||||
}
|
||||
|
||||
/// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
|
||||
/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
pub unsafe fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
|
||||
static_assert_imm4!(IMM4);
|
||||
let a = a.as_i32x4();
|
||||
let b = b.as_i32x4();
|
||||
macro_rules! blend2 {
|
||||
($a:expr, $b:expr, $c:expr, $d:expr) => {
|
||||
simd_shuffle4(a, b, [$a, $b, $c, $d])
|
||||
};
|
||||
}
|
||||
macro_rules! blend1 {
|
||||
($a:expr, $b:expr) => {
|
||||
match (imm8 >> 2) & 0b11 {
|
||||
0b00 => blend2!($a, $b, 2, 3),
|
||||
0b01 => blend2!($a, $b, 6, 3),
|
||||
0b10 => blend2!($a, $b, 2, 7),
|
||||
_ => blend2!($a, $b, 6, 7),
|
||||
}
|
||||
};
|
||||
}
|
||||
let r: i32x4 = match imm8 & 0b11 {
|
||||
0b00 => blend1!(0, 1),
|
||||
0b01 => blend1!(4, 1),
|
||||
0b10 => blend1!(0, 5),
|
||||
_ => blend1!(4, 5),
|
||||
};
|
||||
let r: i32x4 = simd_shuffle4(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
[0, 4, 0, 4][IMM4 as usize & 0b11],
|
||||
[1, 1, 5, 5][IMM4 as usize & 0b11],
|
||||
[2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
|
||||
[3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
|
||||
],
|
||||
);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
@ -4065,10 +4054,10 @@ mod tests {
|
|||
unsafe fn test_mm_blend_epi32() {
|
||||
let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
|
||||
let e = _mm_setr_epi32(9, 3, 3, 3);
|
||||
let r = _mm_blend_epi32(a, b, 0x01 as i32);
|
||||
let r = _mm_blend_epi32::<0x01>(a, b);
|
||||
assert_eq_m128i(r, e);
|
||||
|
||||
let r = _mm_blend_epi32(b, a, 0x0E as i32);
|
||||
let r = _mm_blend_epi32::<0x0E>(b, a);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1057,7 +1057,8 @@ mod tests {
|
|||
const MASK_WORDS: i32 = 0b01_10;
|
||||
let expected_result = _mm_gf2p8mul_epi8(left, right);
|
||||
let result_masked = _mm_maskz_gf2p8mul_epi8(mask_bytes, left, right);
|
||||
let expected_masked = _mm_blend_epi32(_mm_setzero_si128(), expected_result, MASK_WORDS);
|
||||
let expected_masked =
|
||||
_mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
|
||||
assert_eq_m128i(result_masked, expected_masked);
|
||||
}
|
||||
}
|
||||
|
|
@ -1075,7 +1076,7 @@ mod tests {
|
|||
const MASK_WORDS: i32 = 0b01_10;
|
||||
let expected_result = _mm_gf2p8mul_epi8(left, right);
|
||||
let result_masked = _mm_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
|
||||
let expected_masked = _mm_blend_epi32(left, expected_result, MASK_WORDS);
|
||||
let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
|
||||
assert_eq_m128i(result_masked, expected_masked);
|
||||
}
|
||||
}
|
||||
|
|
@ -1281,7 +1282,8 @@ mod tests {
|
|||
let expected_result = _mm_gf2p8affine_epi64_epi8(vector, matrix, CONSTANT_BYTE);
|
||||
let result_masked =
|
||||
_mm_maskz_gf2p8affine_epi64_epi8(mask_bytes, vector, matrix, CONSTANT_BYTE);
|
||||
let expected_masked = _mm_blend_epi32(_mm_setzero_si128(), expected_result, MASK_WORDS);
|
||||
let expected_masked =
|
||||
_mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
|
||||
assert_eq_m128i(result_masked, expected_masked);
|
||||
}
|
||||
}
|
||||
|
|
@ -1301,7 +1303,7 @@ mod tests {
|
|||
let expected_result = _mm_gf2p8affine_epi64_epi8(left, right, CONSTANT_BYTE);
|
||||
let result_masked =
|
||||
_mm_mask_gf2p8affine_epi64_epi8(left, mask_bytes, left, right, CONSTANT_BYTE);
|
||||
let expected_masked = _mm_blend_epi32(left, expected_result, MASK_WORDS);
|
||||
let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
|
||||
assert_eq_m128i(result_masked, expected_masked);
|
||||
}
|
||||
}
|
||||
|
|
@ -1539,7 +1541,8 @@ mod tests {
|
|||
let expected_result = _mm_gf2p8affineinv_epi64_epi8(vector, matrix, CONSTANT_BYTE);
|
||||
let result_masked =
|
||||
_mm_maskz_gf2p8affineinv_epi64_epi8(mask_bytes, vector, matrix, CONSTANT_BYTE);
|
||||
let expected_masked = _mm_blend_epi32(_mm_setzero_si128(), expected_result, MASK_WORDS);
|
||||
let expected_masked =
|
||||
_mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
|
||||
assert_eq_m128i(result_masked, expected_masked);
|
||||
}
|
||||
}
|
||||
|
|
@ -1560,7 +1563,7 @@ mod tests {
|
|||
let expected_result = _mm_gf2p8affineinv_epi64_epi8(left, right, CONSTANT_BYTE);
|
||||
let result_masked =
|
||||
_mm_mask_gf2p8affineinv_epi64_epi8(left, mask_bytes, left, right, CONSTANT_BYTE);
|
||||
let expected_masked = _mm_blend_epi32(left, expected_result, MASK_WORDS);
|
||||
let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
|
||||
assert_eq_m128i(result_masked, expected_masked);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue