Merge pull request #1966 from sayantn/saturation

Use generic SIMD intrinsics for cvtepi intrinsics
This commit is contained in:
Folkert de Vries 2025-11-28 10:27:36 +00:00 committed by GitHub
commit 989aff20e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 103 additions and 282 deletions

View file

@ -2,6 +2,18 @@
#![allow(non_camel_case_types)]
#[inline(always)]
pub(crate) unsafe fn simd_imax<T: Copy>(a: T, b: T) -> T {
let mask: T = crate::intrinsics::simd::simd_gt(a, b);
crate::intrinsics::simd::simd_select(mask, a, b)
}
#[inline(always)]
pub(crate) unsafe fn simd_imin<T: Copy>(a: T, b: T) -> T {
let mask: T = crate::intrinsics::simd::simd_lt(a, b);
crate::intrinsics::simd::simd_select(mask, a, b)
}
macro_rules! simd_ty {
($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
#[repr(simd)]

View file

@ -1897,11 +1897,7 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25
#[cfg_attr(test, assert_instr(vpmaxsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_i16x16();
let b = b.as_i16x16();
transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
}
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@ -1913,11 +1909,7 @@ pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_i32x8();
let b = b.as_i32x8();
transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
}
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@ -1929,11 +1921,7 @@ pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_i8x32();
let b = b.as_i8x32();
transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
}
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@ -1945,11 +1933,7 @@ pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_u16x16();
let b = b.as_u16x16();
transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
}
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@ -1961,11 +1945,7 @@ pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_u32x8();
let b = b.as_u32x8();
transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
}
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@ -1977,11 +1957,7 @@ pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_u8x32();
let b = b.as_u8x32();
transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
}
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@ -1993,11 +1969,7 @@ pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_i16x16();
let b = b.as_i16x16();
transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
}
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@ -2009,11 +1981,7 @@ pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_i32x8();
let b = b.as_i32x8();
transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
}
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@ -2025,11 +1993,7 @@ pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_i8x32();
let b = b.as_i8x32();
transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
}
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@ -2041,11 +2005,7 @@ pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_u16x16();
let b = b.as_u16x16();
transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
}
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@ -2057,11 +2017,7 @@ pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_u32x8();
let b = b.as_u32x8();
transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
}
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@ -2073,11 +2029,7 @@ pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_u8x32();
let b = b.as_u8x32();
transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
}
/// Creates mask from the most significant bit of each 8-bit element in `a`,

View file

@ -1743,11 +1743,7 @@ pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxuw))]
pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_u16x32();
let b = b.as_u16x32();
transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u16x32(), b.as_u16x32()).as_m512i() }
}
/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -1842,11 +1838,7 @@ pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxub))]
pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_u8x64();
let b = b.as_u8x64();
transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u8x64(), b.as_u8x64()).as_m512i() }
}
/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -1941,11 +1933,7 @@ pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxsw))]
pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_i16x32();
let b = b.as_i16x32();
transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i16x32(), b.as_i16x32()).as_m512i() }
}
/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2040,11 +2028,7 @@ pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxsb))]
pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_i8x64();
let b = b.as_i8x64();
transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i8x64(), b.as_i8x64()).as_m512i() }
}
/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2139,11 +2123,7 @@ pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminuw))]
pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_u16x32();
let b = b.as_u16x32();
transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u16x32(), b.as_u16x32()).as_m512i() }
}
/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2238,11 +2218,7 @@ pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminub))]
pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_u8x64();
let b = b.as_u8x64();
transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u8x64(), b.as_u8x64()).as_m512i() }
}
/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2337,11 +2313,7 @@ pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminsw))]
pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_i16x32();
let b = b.as_i16x32();
transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i16x32(), b.as_i16x32()).as_m512i() }
}
/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2436,11 +2408,7 @@ pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminsb))]
pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_i8x64();
let b = b.as_i8x64();
transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i8x64(), b.as_i8x64()).as_m512i() }
}
/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -10767,11 +10735,11 @@ pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpmovswb))]
pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
unsafe {
transmute(vpmovswb(
a.as_i16x32(),
i8x32::ZERO,
0b11111111_11111111_11111111_11111111,
simd_cast::<_, i8x32>(simd_imax(
simd_imin(a.as_i16x32(), i16x32::splat(i8::MAX as _)),
i16x32::splat(i8::MIN as _),
))
.as_m256i()
}
}
@ -10783,7 +10751,9 @@ pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovswb))]
pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) }
unsafe {
simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), src.as_i8x32()).as_m256i()
}
}
/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -10794,7 +10764,7 @@ pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovswb))]
pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
unsafe { transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) }
unsafe { simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), i8x32::ZERO).as_m256i() }
}
/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
@ -10805,7 +10775,13 @@ pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovswb))]
pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) }
unsafe {
simd_cast::<_, i8x16>(simd_imax(
simd_imin(a.as_i16x16(), i16x16::splat(i8::MAX as _)),
i16x16::splat(i8::MIN as _),
))
.as_m128i()
}
}
/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -10816,7 +10792,9 @@ pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovswb))]
pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) }
unsafe {
simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), src.as_i8x16()).as_m128i()
}
}
/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -10827,7 +10805,7 @@ pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovswb))]
pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) }
unsafe { simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), i8x16::ZERO).as_m128i() }
}
/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
@ -10872,11 +10850,7 @@ pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpmovuswb))]
pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
unsafe {
transmute(vpmovuswb(
a.as_u16x32(),
u8x32::ZERO,
0b11111111_11111111_11111111_11111111,
))
simd_cast::<_, u8x32>(simd_imin(a.as_u16x32(), u16x32::splat(u8::MAX as _))).as_m256i()
}
}
@ -10888,7 +10862,9 @@ pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovuswb))]
pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) }
unsafe {
simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), src.as_u8x32()).as_m256i()
}
}
/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -10899,7 +10875,7 @@ pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovuswb))]
pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
unsafe { transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) }
unsafe { simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), u8x32::ZERO).as_m256i() }
}
/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
@ -10911,11 +10887,7 @@ pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmovuswb))]
pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
unsafe {
transmute(vpmovuswb256(
a.as_u16x16(),
u8x16::ZERO,
0b11111111_11111111,
))
simd_cast::<_, u8x16>(simd_imin(a.as_u16x16(), u16x16::splat(u8::MAX as _))).as_m128i()
}
}
@ -10927,7 +10899,9 @@ pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovuswb))]
pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) }
unsafe {
simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), src.as_u8x16()).as_m128i()
}
}
/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -10938,7 +10912,7 @@ pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovuswb))]
pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
unsafe { transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) }
unsafe { simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), u8x16::ZERO).as_m128i() }
}
/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
@ -11624,7 +11598,9 @@ pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a:
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovwb))]
pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
vpmovwbmem(mem_addr, a.as_i16x32(), k);
let result = _mm512_cvtepi16_epi8(a).as_i8x32();
let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
}
/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@ -11635,7 +11611,9 @@ pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32,
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovwb))]
pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
vpmovwbmem256(mem_addr, a.as_i16x16(), k);
let result = _mm256_cvtepi16_epi8(a).as_i8x16();
let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
}
/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@ -11646,7 +11624,13 @@ pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16,
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmovwb))]
pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovwbmem128(mem_addr, a.as_i16x8(), k);
let result: i8x8 = simd_shuffle!(
_mm_cvtepi16_epi8(a).as_i8x16(),
i8x16::ZERO,
[0, 1, 2, 3, 4, 5, 6, 7]
);
let mask = simd_select_bitmask(k, i8x8::splat(!0), i8x8::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
}
/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@ -11735,17 +11719,9 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx512.dbpsadbw.128"]
fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
#[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
#[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
@ -11756,13 +11732,6 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
#[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
@ -21172,7 +21141,7 @@ mod tests {
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
);
assert_eq_m128i(r, e);

View file

@ -2091,11 +2091,7 @@ pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_i32x16();
let b = b.as_i32x16();
transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i32x16(), b.as_i32x16()).as_m512i() }
}
/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2190,11 +2186,7 @@ pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_i64x8();
let b = b.as_i64x8();
transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i64x8(), b.as_i64x8()).as_m512i() }
}
/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2233,11 +2225,7 @@ pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_i64x4();
let b = b.as_i64x4();
transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i64x4(), b.as_i64x4()).as_m256i() }
}
/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2276,11 +2264,7 @@ pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_i64x2();
let b = b.as_i64x2();
transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i64x2(), b.as_i64x2()).as_m128i() }
}
/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2515,11 +2499,7 @@ pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_u32x16();
let b = b.as_u32x16();
transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u32x16(), b.as_u32x16()).as_m512i() }
}
/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2614,11 +2594,7 @@ pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_u64x8();
let b = b.as_u64x8();
transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u64x8(), b.as_u64x8()).as_m512i() }
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2657,11 +2633,7 @@ pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_u64x4();
let b = b.as_u64x4();
transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u64x4(), b.as_u64x4()).as_m256i() }
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2700,11 +2672,7 @@ pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_u64x2();
let b = b.as_u64x2();
transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u64x2(), b.as_u64x2()).as_m128i() }
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2743,11 +2711,7 @@ pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_i32x16();
let b = b.as_i32x16();
transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i32x16(), b.as_i32x16()).as_m512i() }
}
/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2842,11 +2806,7 @@ pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_i64x8();
let b = b.as_i64x8();
transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i64x8(), b.as_i64x8()).as_m512i() }
}
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2885,11 +2845,7 @@ pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_i64x4();
let b = b.as_i64x4();
transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i64x4(), b.as_i64x4()).as_m256i() }
}
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -2928,11 +2884,7 @@ pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_i64x2();
let b = b.as_i64x2();
transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i64x2(), b.as_i64x2()).as_m128i() }
}
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -3167,11 +3119,7 @@ pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminud))]
pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_u32x16();
let b = b.as_u32x16();
transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u32x16(), b.as_u32x16()).as_m512i() }
}
/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -3266,11 +3214,7 @@ pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
unsafe {
let a = a.as_u64x8();
let b = b.as_u64x8();
transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u64x8(), b.as_u64x8()).as_m512i() }
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -3309,11 +3253,7 @@ pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_u64x4();
let b = b.as_u64x4();
transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u64x4(), b.as_u64x4()).as_m256i() }
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -3352,11 +3292,7 @@ pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_u64x2();
let b = b.as_u64x2();
transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u64x2(), b.as_u64x2()).as_m128i() }
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).

View file

@ -218,11 +218,7 @@ pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_i16x8();
let b = b.as_i16x8();
transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i16x8(), b.as_i16x8()).as_m128i() }
}
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
@ -234,11 +230,7 @@ pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_u8x16();
let b = b.as_u8x16();
transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u8x16(), b.as_u8x16()).as_m128i() }
}
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@ -250,11 +242,7 @@ pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_i16x8();
let b = b.as_i16x8();
transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i16x8(), b.as_i16x8()).as_m128i() }
}
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
@ -266,11 +254,7 @@ pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_u8x16();
let b = b.as_u8x16();
transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u8x16(), b.as_u8x16()).as_m128i() }
}
/// Multiplies the packed 16-bit integers in `a` and `b`.

View file

@ -312,11 +312,7 @@ pub fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_i8x16();
let b = b.as_i8x16();
transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i8x16(), b.as_i8x16()).as_m128i() }
}
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
@ -328,11 +324,7 @@ pub fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_u16x8();
let b = b.as_u16x8();
transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u16x8(), b.as_u16x8()).as_m128i() }
}
/// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
@ -344,11 +336,7 @@ pub fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_i32x4();
let b = b.as_i32x4();
transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_i32x4(), b.as_i32x4()).as_m128i() }
}
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
@ -360,11 +348,7 @@ pub fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_u32x4();
let b = b.as_u32x4();
transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
}
unsafe { simd_imax(a.as_u32x4(), b.as_u32x4()).as_m128i() }
}
/// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
@ -376,11 +360,7 @@ pub fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_i8x16();
let b = b.as_i8x16();
transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i8x16(), b.as_i8x16()).as_m128i() }
}
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
@ -392,11 +372,7 @@ pub fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_u16x8();
let b = b.as_u16x8();
transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u16x8(), b.as_u16x8()).as_m128i() }
}
/// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
@ -408,11 +384,7 @@ pub fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_i32x4();
let b = b.as_i32x4();
transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_i32x4(), b.as_i32x4()).as_m128i() }
}
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
@ -424,11 +396,7 @@ pub fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
unsafe {
let a = a.as_u32x4();
let b = b.as_u32x4();
transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
}
unsafe { simd_imin(a.as_u32x4(), b.as_u32x4()).as_m128i() }
}
/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers