Reimplement _mm_avg_epu8, _mm_avg_epu16, _mm256_avg_epu8 and _mm256_avg_epu16 without LLVM intrinsics

This commit is contained in:
Eduardo Sánchez Muñoz 2023-10-03 23:14:09 +02:00 committed by Amanieu d'Antras
parent e4363c287d
commit 387c45ae16
2 changed files with 16 additions and 12 deletions

View file

@ -344,7 +344,10 @@ pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpavgw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
transmute(pavgw(a.as_u16x16(), b.as_u16x16()))
let a = simd_cast::<_, u32x16>(a.as_u16x16());
let b = simd_cast::<_, u32x16>(b.as_u16x16());
let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
transmute(simd_cast::<_, u16x16>(r))
}
/// Averages packed unsigned 8-bit integers in `a` and `b`.
@ -355,7 +358,10 @@ pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpavgb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
transmute(pavgb(a.as_u8x32(), b.as_u8x32()))
let a = simd_cast::<_, u16x32>(a.as_u8x32());
let b = simd_cast::<_, u16x32>(b.as_u8x32());
let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
transmute(simd_cast::<_, u8x32>(r))
}
/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`.
@ -3638,10 +3644,6 @@ extern "C" {
fn pabsw(a: i16x16) -> u16x16;
#[link_name = "llvm.x86.avx2.pabs.d"]
fn pabsd(a: i32x8) -> u32x8;
#[link_name = "llvm.x86.avx2.pavg.b"]
fn pavgb(a: u8x32, b: u8x32) -> u8x32;
#[link_name = "llvm.x86.avx2.pavg.w"]
fn pavgw(a: u16x16, b: u16x16) -> u16x16;
#[link_name = "llvm.x86.avx2.pblendvb"]
fn pblendvb(a: i8x32, b: i8x32, mask: i8x32) -> i8x32;
#[link_name = "llvm.x86.avx2.phadd.w"]

View file

@ -165,7 +165,10 @@ pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pavgb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
transmute(pavgb(a.as_u8x16(), b.as_u8x16()))
let a = simd_cast::<_, u16x16>(a.as_u8x16());
let b = simd_cast::<_, u16x16>(b.as_u8x16());
let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
transmute(simd_cast::<_, u8x16>(r))
}
/// Averages packed unsigned 16-bit integers in `a` and `b`.
@ -176,7 +179,10 @@ pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pavgw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
transmute(pavgw(a.as_u16x8(), b.as_u16x8()))
let a = simd_cast::<_, u32x8>(a.as_u16x8());
let b = simd_cast::<_, u32x8>(b.as_u16x8());
let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
transmute(simd_cast::<_, u16x8>(r))
}
/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
@ -2838,10 +2844,6 @@ extern "C" {
fn lfence();
#[link_name = "llvm.x86.sse2.mfence"]
fn mfence();
#[link_name = "llvm.x86.sse2.pavg.b"]
fn pavgb(a: u8x16, b: u8x16) -> u8x16;
#[link_name = "llvm.x86.sse2.pavg.w"]
fn pavgw(a: u16x8, b: u16x8) -> u16x8;
#[link_name = "llvm.x86.sse2.pmadd.wd"]
fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
#[link_name = "llvm.x86.sse2.psad.bw"]