Reimplement _mm_mul_epi32 and _mm256_mul_epi32 without LLVM intrinsics
This commit is contained in:
parent
387c45ae16
commit
fd694451fe
2 changed files with 6 additions and 6 deletions
|
|
@ -2066,7 +2066,9 @@ pub unsafe fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __
|
|||
#[cfg_attr(test, assert_instr(vpmuldq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(pmuldq(a.as_i32x8(), b.as_i32x8()))
|
||||
let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
|
||||
let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
|
||||
transmute(simd_mul(a, b))
|
||||
}
|
||||
|
||||
/// Multiplies the low unsigned 32-bit integers from each packed 64-bit
|
||||
|
|
@ -3680,8 +3682,6 @@ extern "C" {
|
|||
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
|
||||
#[link_name = "llvm.x86.avx2.mpsadbw"]
|
||||
fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
|
||||
#[link_name = "llvm.x86.avx2.pmul.dq"]
|
||||
fn pmuldq(a: i32x8, b: i32x8) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
|
||||
fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.packsswb"]
|
||||
|
|
|
|||
|
|
@ -923,7 +923,9 @@ pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(pmuldq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(pmuldq(a.as_i32x4(), b.as_i32x4()))
|
||||
let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
|
||||
let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
|
||||
transmute(simd_mul(a, b))
|
||||
}
|
||||
|
||||
/// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate
|
||||
|
|
@ -1154,8 +1156,6 @@ extern "C" {
|
|||
fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
|
||||
#[link_name = "llvm.x86.sse41.phminposuw"]
|
||||
fn phminposuw(a: u16x8) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse41.pmuldq"]
|
||||
fn pmuldq(a: i32x4, b: i32x4) -> i64x2;
|
||||
#[link_name = "llvm.x86.sse41.mpsadbw"]
|
||||
fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse41.ptestz"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue