Reimplement _mm_mul_epi32 and _mm256_mul_epi32 without LLVM intrinsics

This commit is contained in:
Eduardo Sánchez Muñoz 2023-10-03 23:38:47 +02:00 committed by Amanieu d'Antras
parent 387c45ae16
commit fd694451fe
2 changed files with 6 additions and 6 deletions

View file

@ -2066,7 +2066,9 @@ pub unsafe fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __
#[cfg_attr(test, assert_instr(vpmuldq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
transmute(pmuldq(a.as_i32x8(), b.as_i32x8()))
let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
transmute(simd_mul(a, b))
}
/// Multiplies the low unsigned 32-bit integers from each packed 64-bit
@ -3680,8 +3682,6 @@ extern "C" {
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
#[link_name = "llvm.x86.avx2.mpsadbw"]
fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
#[link_name = "llvm.x86.avx2.pmul.dq"]
fn pmuldq(a: i32x8, b: i32x8) -> i64x4;
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx2.packsswb"]

View file

@ -923,7 +923,9 @@ pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmuldq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
transmute(pmuldq(a.as_i32x4(), b.as_i32x4()))
let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
transmute(simd_mul(a, b))
}
/// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate
@ -1154,8 +1156,6 @@ extern "C" {
fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
#[link_name = "llvm.x86.sse41.phminposuw"]
fn phminposuw(a: u16x8) -> u16x8;
#[link_name = "llvm.x86.sse41.pmuldq"]
fn pmuldq(a: i32x4, b: i32x4) -> i64x2;
#[link_name = "llvm.x86.sse41.mpsadbw"]
fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
#[link_name = "llvm.x86.sse41.ptestz"]