Reimplement _mm_mulhi_epi16, _mm_mulhi_epu16, _mm256_mulhi_epi16 and _mm256_mulhi_epu16 without LLVM intrinsics
This commit is contained in:
parent
507391d2c8
commit
e4363c287d
2 changed files with 16 additions and 12 deletions
|
|
@ -2090,7 +2090,10 @@ pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpmulhw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(pmulhw(a.as_i16x16(), b.as_i16x16()))
|
||||
let a = simd_cast::<_, i32x16>(a.as_i16x16());
|
||||
let b = simd_cast::<_, i32x16>(b.as_i16x16());
|
||||
let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
|
||||
transmute(simd_cast::<i32x16, i16x16>(r))
|
||||
}
|
||||
|
||||
/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
|
||||
|
|
@ -2103,7 +2106,10 @@ pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpmulhuw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(pmulhuw(a.as_u16x16(), b.as_u16x16()))
|
||||
let a = simd_cast::<_, u32x16>(a.as_u16x16());
|
||||
let b = simd_cast::<_, u32x16>(b.as_u16x16());
|
||||
let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
|
||||
transmute(simd_cast::<u32x16, u16x16>(r))
|
||||
}
|
||||
|
||||
/// Multiplies the packed 16-bit integers in `a` and `b`, producing
|
||||
|
|
@ -3672,10 +3678,6 @@ extern "C" {
|
|||
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
|
||||
#[link_name = "llvm.x86.avx2.mpsadbw"]
|
||||
fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
|
||||
#[link_name = "llvm.x86.avx2.pmulhu.w"]
|
||||
fn pmulhuw(a: u16x16, b: u16x16) -> u16x16;
|
||||
#[link_name = "llvm.x86.avx2.pmulh.w"]
|
||||
fn pmulhw(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.pmul.dq"]
|
||||
fn pmuldq(a: i32x8, b: i32x8) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
|
||||
|
|
|
|||
|
|
@ -261,7 +261,10 @@ pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(pmulhw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(pmulhw(a.as_i16x8(), b.as_i16x8()))
|
||||
let a = simd_cast::<_, i32x8>(a.as_i16x8());
|
||||
let b = simd_cast::<_, i32x8>(b.as_i16x8());
|
||||
let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
|
||||
transmute(simd_cast::<i32x8, i16x8>(r))
|
||||
}
|
||||
|
||||
/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
|
||||
|
|
@ -275,7 +278,10 @@ pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(pmulhuw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(pmulhuw(a.as_u16x8(), b.as_u16x8()))
|
||||
let a = simd_cast::<_, u32x8>(a.as_u16x8());
|
||||
let b = simd_cast::<_, u32x8>(b.as_u16x8());
|
||||
let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
|
||||
transmute(simd_cast::<u32x8, u16x8>(r))
|
||||
}
|
||||
|
||||
/// Multiplies the packed 16-bit integers in `a` and `b`.
|
||||
|
|
@ -2838,10 +2844,6 @@ extern "C" {
|
|||
fn pavgw(a: u16x8, b: u16x8) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse2.pmadd.wd"]
|
||||
fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
|
||||
#[link_name = "llvm.x86.sse2.pmulh.w"]
|
||||
fn pmulhw(a: i16x8, b: i16x8) -> i16x8;
|
||||
#[link_name = "llvm.x86.sse2.pmulhu.w"]
|
||||
fn pmulhuw(a: u16x8, b: u16x8) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse2.psad.bw"]
|
||||
fn psadbw(a: u8x16, b: u8x16) -> u64x2;
|
||||
#[link_name = "llvm.x86.sse2.psll.w"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue