correct signedness of pmadd arguments

This commit is contained in:
Folkert de Vries 2025-11-17 16:01:02 +01:00
parent 57436fe950
commit ac2d97254e
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
2 changed files with 4 additions and 4 deletions

View file

@ -1773,7 +1773,7 @@ pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaddubsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
}
/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
@ -3702,7 +3702,7 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx2.phsub.sw"]
fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
#[link_name = "llvm.x86.avx2.mpsadbw"]
fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]

View file

@ -5955,7 +5955,7 @@ pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaddubsw))]
pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) }
unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
}
/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -11688,7 +11688,7 @@ unsafe extern "C" {
fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
#[link_name = "llvm.x86.avx512.packssdw.512"]
fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;