Merge pull request #1955 from sayantn/vector-shifts
Use SIMD intrinsics for vector shifts
This commit is contained in:
commit
d84c69548d
3 changed files with 162 additions and 87 deletions
|
|
@ -2786,7 +2786,12 @@ pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsllvd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x4();
|
||||
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x4::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 32-bit integers in `a` left by the amount
|
||||
|
|
@ -2799,7 +2804,12 @@ pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsllvd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x8();
|
||||
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x8::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 64-bit integers in `a` left by the amount
|
||||
|
|
@ -2812,7 +2822,12 @@ pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsllvq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x2();
|
||||
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x2::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 64-bit integers in `a` left by the amount
|
||||
|
|
@ -2825,7 +2840,12 @@ pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsllvq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x4();
|
||||
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x4::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 16-bit integers in `a` right by `count` while
|
||||
|
|
@ -2889,7 +2909,12 @@ pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsravd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x4();
|
||||
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
|
||||
simd_shr(a.as_i32x4(), count).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 32-bit integers in `a` right by the amount specified by the
|
||||
|
|
@ -2901,7 +2926,12 @@ pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsravd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x8();
|
||||
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
|
||||
simd_shr(a.as_i32x8(), count).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
|
||||
|
|
@ -3084,7 +3114,12 @@ pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsrlvd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x4();
|
||||
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x4::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 32-bit integers in `a` right by the amount specified by
|
||||
|
|
@ -3096,7 +3131,12 @@ pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsrlvd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x8();
|
||||
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x8::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 64-bit integers in `a` right by the amount specified by
|
||||
|
|
@ -3108,7 +3148,12 @@ pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsrlvq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x2();
|
||||
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x2::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 64-bit integers in `a` right by the amount specified by
|
||||
|
|
@ -3120,7 +3165,12 @@ pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsrlvq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x4();
|
||||
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x4::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
|
||||
|
|
@ -3679,36 +3729,16 @@ unsafe extern "C" {
|
|||
fn pslld(a: i32x8, count: i32x4) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psll.q"]
|
||||
fn psllq(a: i64x4, count: i64x2) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.psllv.d"]
|
||||
fn psllvd(a: i32x4, count: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx2.psllv.d.256"]
|
||||
fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psllv.q"]
|
||||
fn psllvq(a: i64x2, count: i64x2) -> i64x2;
|
||||
#[link_name = "llvm.x86.avx2.psllv.q.256"]
|
||||
fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.psra.w"]
|
||||
fn psraw(a: i16x16, count: i16x8) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.psra.d"]
|
||||
fn psrad(a: i32x8, count: i32x4) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psrav.d"]
|
||||
fn psravd(a: i32x4, count: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx2.psrav.d.256"]
|
||||
fn psravd256(a: i32x8, count: i32x8) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psrl.w"]
|
||||
fn psrlw(a: i16x16, count: i16x8) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.psrl.d"]
|
||||
fn psrld(a: i32x8, count: i32x4) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psrl.q"]
|
||||
fn psrlq(a: i64x4, count: i64x2) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.psrlv.d"]
|
||||
fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx2.psrlv.d.256"]
|
||||
fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psrlv.q"]
|
||||
fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
|
||||
#[link_name = "llvm.x86.avx2.psrlv.q.256"]
|
||||
fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.pshuf.b"]
|
||||
fn pshufb(a: u8x32, b: u8x32) -> u8x32;
|
||||
#[link_name = "llvm.x86.avx2.permd"]
|
||||
|
|
|
|||
|
|
@ -6864,7 +6864,12 @@ pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvw))]
|
||||
pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x32();
|
||||
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x32::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -6903,7 +6908,12 @@ pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvw))]
|
||||
pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x16();
|
||||
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x16::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -6942,7 +6952,12 @@ pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvw))]
|
||||
pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x8();
|
||||
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x8::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7200,7 +7215,12 @@ pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvw))]
|
||||
pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x32();
|
||||
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x32::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7239,7 +7259,12 @@ pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvw))]
|
||||
pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x16();
|
||||
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x16::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7278,7 +7303,12 @@ pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvw))]
|
||||
pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x8();
|
||||
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x8::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7523,7 +7553,12 @@ pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravw))]
|
||||
pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x32();
|
||||
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
|
||||
simd_shr(a.as_i16x32(), count).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7562,7 +7597,12 @@ pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravw))]
|
||||
pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x16();
|
||||
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
|
||||
simd_shr(a.as_i16x16(), count).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7601,7 +7641,12 @@ pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravw))]
|
||||
pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x8();
|
||||
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
|
||||
simd_shr(a.as_i16x8(), count).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -11657,33 +11702,12 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx512.psll.w.512"]
|
||||
fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psllv.w.512"]
|
||||
fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
|
||||
#[link_name = "llvm.x86.avx512.psllv.w.256"]
|
||||
fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx512.psllv.w.128"]
|
||||
fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrl.w.512"]
|
||||
fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrlv.w.512"]
|
||||
fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
|
||||
#[link_name = "llvm.x86.avx512.psrlv.w.256"]
|
||||
fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx512.psrlv.w.128"]
|
||||
fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psra.w.512"]
|
||||
fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrav.w.512"]
|
||||
fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
|
||||
#[link_name = "llvm.x86.avx512.psrav.w.256"]
|
||||
fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx512.psrav.w.128"]
|
||||
fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
|
||||
fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
|
||||
#[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
|
||||
|
|
|
|||
|
|
@ -20940,7 +20940,12 @@ pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravd))]
|
||||
pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x16();
|
||||
let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, transmute(count), i32x16::splat(31));
|
||||
simd_shr(a.as_i32x16(), count).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21035,7 +21040,12 @@ pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravq))]
|
||||
pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x8();
|
||||
let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, transmute(count), i64x8::splat(63));
|
||||
simd_shr(a.as_i64x8(), count).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21074,7 +21084,12 @@ pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m51
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravq))]
|
||||
pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x4();
|
||||
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, transmute(count), i64x4::splat(63));
|
||||
simd_shr(a.as_i64x4(), count).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21113,7 +21128,12 @@ pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m25
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravq))]
|
||||
pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x2();
|
||||
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, transmute(count), i64x2::splat(63));
|
||||
simd_shr(a.as_i64x2(), count).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21692,7 +21712,12 @@ pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvd))]
|
||||
pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x16();
|
||||
let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x16::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21787,7 +21812,12 @@ pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvd))]
|
||||
pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x16();
|
||||
let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x16::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21882,7 +21912,12 @@ pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvq))]
|
||||
pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x8();
|
||||
let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x8::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21977,7 +22012,12 @@ pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvq))]
|
||||
pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x8();
|
||||
let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x8::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -42881,15 +42921,6 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
|
||||
fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psllv.d.512"]
|
||||
fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
|
||||
#[link_name = "llvm.x86.avx512.psrlv.d.512"]
|
||||
fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
|
||||
#[link_name = "llvm.x86.avx512.psllv.q.512"]
|
||||
fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
|
||||
#[link_name = "llvm.x86.avx512.psrlv.q.512"]
|
||||
fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psll.d.512"]
|
||||
fn vpslld(a: i32x16, count: i32x4) -> i32x16;
|
||||
#[link_name = "llvm.x86.avx512.psrl.d.512"]
|
||||
|
|
@ -42909,16 +42940,6 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx512.psra.q.128"]
|
||||
fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrav.d.512"]
|
||||
fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrav.q.512"]
|
||||
fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
|
||||
#[link_name = "llvm.x86.avx512.psrav.q.256"]
|
||||
fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx512.psrav.q.128"]
|
||||
fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
|
||||
fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
|
||||
#[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue