Reimplement _mm_mul_epu32 and _mm256_mul_epu32 without LLVM intrinsics
This commit is contained in:
parent
6baf6bf473
commit
507391d2c8
2 changed files with 8 additions and 6 deletions
|
|
@ -2074,7 +2074,10 @@ pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpmuludq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(pmuludq(a.as_u32x8(), b.as_u32x8()))
|
||||
let a = a.as_u64x4();
|
||||
let b = b.as_u64x4();
|
||||
let mask = u64x4::splat(u32::MAX.into());
|
||||
transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
|
||||
}
|
||||
|
||||
/// Multiplies the packed 16-bit integers in `a` and `b`, producing
|
||||
|
|
@ -3675,8 +3678,6 @@ extern "C" {
|
|||
fn pmulhw(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.pmul.dq"]
|
||||
fn pmuldq(a: i32x8, b: i32x8) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.pmulu.dq"]
|
||||
fn pmuludq(a: u32x8, b: u32x8) -> u64x4;
|
||||
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
|
||||
fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.packsswb"]
|
||||
|
|
|
|||
|
|
@ -303,7 +303,10 @@ pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(pmuludq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(pmuludq(a.as_u32x4(), b.as_u32x4()))
|
||||
let a = a.as_u64x2();
|
||||
let b = b.as_u64x2();
|
||||
let mask = u64x2::splat(u32::MAX.into());
|
||||
transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
|
||||
}
|
||||
|
||||
/// Sum the absolute differences of packed unsigned 8-bit integers.
|
||||
|
|
@ -2839,8 +2842,6 @@ extern "C" {
|
|||
fn pmulhw(a: i16x8, b: i16x8) -> i16x8;
|
||||
#[link_name = "llvm.x86.sse2.pmulhu.w"]
|
||||
fn pmulhuw(a: u16x8, b: u16x8) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse2.pmulu.dq"]
|
||||
fn pmuludq(a: u32x4, b: u32x4) -> u64x2;
|
||||
#[link_name = "llvm.x86.sse2.psad.bw"]
|
||||
fn psadbw(a: u8x16, b: u8x16) -> u64x2;
|
||||
#[link_name = "llvm.x86.sse2.psll.w"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue