Add #[rustc_args_required_const] annotations (#319)
Support isn't quite in nightly to make this work yet, but using a local build this gets everything passing again! This also implements native verification that we have the attribute in the right place
This commit is contained in:
parent
354e96ba1b
commit
d097221faf
16 changed files with 245 additions and 34 deletions
|
|
@ -14,8 +14,8 @@
|
|||
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
|
||||
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
|
||||
integer_atomics, stmt_expr_attributes, core_intrinsics,
|
||||
crate_in_paths)]
|
||||
#![cfg_attr(test, feature(proc_macro, test, attr_literals, abi_vectorcall))]
|
||||
crate_in_paths, attr_literals, rustc_attrs)]
|
||||
#![cfg_attr(test, feature(proc_macro, test, abi_vectorcall))]
|
||||
#![cfg_attr(feature = "cargo-clippy",
|
||||
allow(inline_always, too_many_arguments, cast_sign_loss,
|
||||
cast_lossless, cast_possible_wrap,
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! shuffle4 {
|
||||
|
|
@ -138,6 +139,7 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! shuffle4 {
|
||||
|
|
@ -330,6 +332,7 @@ pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vroundpd, b = 0x3))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_round_pd(a: __m256d, b: i32) -> __m256d {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { roundpd256(a, $imm8) }
|
||||
|
|
@ -369,6 +372,7 @@ pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vroundps, b = 0x00))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_round_ps(a: __m256, b: i32) -> __m256 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
|
|
@ -419,6 +423,7 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! blend4 {
|
||||
|
|
@ -461,6 +466,7 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! blend4 {
|
||||
|
|
@ -531,6 +537,7 @@ pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vdpps, imm8 = 0x0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { vdpps(a, b, $imm8) }
|
||||
|
|
@ -678,6 +685,7 @@ pub const _CMP_TRUE_US: i32 = 0x1f;
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx,sse2")]
|
||||
#[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { vcmppd(a, b, $imm8) }
|
||||
|
|
@ -691,6 +699,7 @@ pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { vcmppd256(a, b, $imm8) }
|
||||
|
|
@ -704,6 +713,7 @@ pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx,sse")]
|
||||
#[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { vcmpps(a, b, $imm8) }
|
||||
|
|
@ -717,6 +727,7 @@ pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { vcmpps256(a, b, $imm8) }
|
||||
|
|
@ -732,6 +743,7 @@ pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx,sse2")]
|
||||
#[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { vcmpsd(a, b, $imm8) }
|
||||
|
|
@ -747,6 +759,7 @@ pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx,sse")]
|
||||
#[cfg_attr(test, assert_instr(vcmpeqss, imm8 = 0))] // TODO Validate vcmpss
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { vcmpss(a, b, $imm8) }
|
||||
|
|
@ -830,7 +843,8 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
|
|||
/// floating-point elements) from `a`, selected with `imm8`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vextractf128))]
|
||||
#[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 {
|
||||
match imm8 & 1 {
|
||||
0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
|
||||
|
|
@ -842,7 +856,8 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 {
|
|||
/// floating-point elements) from `a`, selected with `imm8`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vextractf128))]
|
||||
#[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d {
|
||||
match imm8 & 1 {
|
||||
0 => simd_shuffle2(a, _mm256_undefined_pd(), [0, 1]),
|
||||
|
|
@ -853,7 +868,8 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d {
|
|||
/// Extract 128 bits (composed of integer data) from `a`, selected with `imm8`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vextractf128))]
|
||||
#[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i {
|
||||
let b = _mm256_undefined_si256().as_i64x4();
|
||||
let dst: i64x2 = match imm8 & 1 {
|
||||
|
|
@ -903,6 +919,7 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! shuffle4 {
|
||||
|
|
@ -955,6 +972,7 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx,sse")]
|
||||
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! shuffle4 {
|
||||
|
|
@ -1025,6 +1043,7 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! shuffle4 {
|
||||
|
|
@ -1067,6 +1086,7 @@ pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx,sse2")]
|
||||
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! shuffle2 {
|
||||
|
|
@ -1093,6 +1113,7 @@ pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x5))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_permute2f128_ps(
|
||||
a: __m256, b: __m256, imm8: i32
|
||||
) -> __m256 {
|
||||
|
|
@ -1107,6 +1128,7 @@ pub unsafe fn _mm256_permute2f128_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_permute2f128_pd(
|
||||
a: __m256d, b: __m256d, imm8: i32
|
||||
) -> __m256d {
|
||||
|
|
@ -1121,6 +1143,7 @@ pub unsafe fn _mm256_permute2f128_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_permute2f128_si256(
|
||||
a: __m256i, b: __m256i, imm8: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -1184,6 +1207,7 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 {
|
||||
let b = _mm256_castps128_ps256(b);
|
||||
match imm8 & 1 {
|
||||
|
|
@ -1198,6 +1222,7 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_insertf128_pd(
|
||||
a: __m256d, b: __m128d, imm8: i32
|
||||
) -> __m256d {
|
||||
|
|
@ -1212,6 +1237,7 @@ pub unsafe fn _mm256_insertf128_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_insertf128_si256(
|
||||
a: __m256i, b: __m128i, imm8: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -1228,6 +1254,7 @@ pub unsafe fn _mm256_insertf128_si256(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i {
|
||||
mem::transmute(simd_insert(a.as_i8x32(), (index as u32) & 31, i))
|
||||
}
|
||||
|
|
@ -1237,6 +1264,7 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i {
|
||||
mem::transmute(simd_insert(a.as_i16x16(), (index as u32) & 15, i))
|
||||
}
|
||||
|
|
@ -1246,6 +1274,7 @@ pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i {
|
||||
mem::transmute(simd_insert(a.as_i32x8(), (index as u32) & 7, i))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -123,6 +123,7 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpalignr, n = 15))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i {
|
||||
let n = n as u32;
|
||||
// If palignr is shifting the pair of vectors more than the size of two
|
||||
|
|
@ -227,6 +228,7 @@ pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
let a = a.as_i32x4();
|
||||
|
|
@ -259,6 +261,7 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_blend_epi32(
|
||||
a: __m256i, b: __m256i, imm8: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -313,6 +316,7 @@ pub unsafe fn _mm256_blend_epi32(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_blend_epi16(
|
||||
a: __m256i, b: __m256i, imm8: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -697,6 +701,7 @@ pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i {
|
||||
let a = a.as_i64x4();
|
||||
let b = _mm256_undefined_si256().as_i64x4();
|
||||
|
|
@ -763,6 +768,7 @@ pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_i32gather_epi32(
|
||||
slice: *const i32, offsets: __m128i, scale: i32
|
||||
) -> __m128i {
|
||||
|
|
@ -784,6 +790,7 @@ pub unsafe fn _mm_i32gather_epi32(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_mask_i32gather_epi32(
|
||||
src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i,
|
||||
scale: i32,
|
||||
|
|
@ -805,6 +812,7 @@ pub unsafe fn _mm_mask_i32gather_epi32(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_i32gather_epi32(
|
||||
slice: *const i32, offsets: __m256i, scale: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -826,6 +834,7 @@ pub unsafe fn _mm256_i32gather_epi32(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm256_mask_i32gather_epi32(
|
||||
src: __m256i, slice: *const i32, offsets: __m256i, mask: __m256i,
|
||||
scale: i32,
|
||||
|
|
@ -847,6 +856,7 @@ pub unsafe fn _mm256_mask_i32gather_epi32(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_i32gather_ps(
|
||||
slice: *const f32, offsets: __m128i, scale: i32
|
||||
) -> __m128 {
|
||||
|
|
@ -867,6 +877,7 @@ pub unsafe fn _mm_i32gather_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_mask_i32gather_ps(
|
||||
src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32
|
||||
) -> __m128 {
|
||||
|
|
@ -884,6 +895,7 @@ pub unsafe fn _mm_mask_i32gather_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_i32gather_ps(
|
||||
slice: *const f32, offsets: __m256i, scale: i32
|
||||
) -> __m256 {
|
||||
|
|
@ -904,6 +916,7 @@ pub unsafe fn _mm256_i32gather_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm256_mask_i32gather_ps(
|
||||
src: __m256, slice: *const f32, offsets: __m256i, mask: __m256, scale: i32
|
||||
) -> __m256 {
|
||||
|
|
@ -921,6 +934,7 @@ pub unsafe fn _mm256_mask_i32gather_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_i32gather_epi64(
|
||||
slice: *const i64, offsets: __m128i, scale: i32
|
||||
) -> __m128i {
|
||||
|
|
@ -942,6 +956,7 @@ pub unsafe fn _mm_i32gather_epi64(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_mask_i32gather_epi64(
|
||||
src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i,
|
||||
scale: i32,
|
||||
|
|
@ -963,6 +978,7 @@ pub unsafe fn _mm_mask_i32gather_epi64(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_i32gather_epi64(
|
||||
slice: *const i64, offsets: __m128i, scale: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -984,6 +1000,7 @@ pub unsafe fn _mm256_i32gather_epi64(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm256_mask_i32gather_epi64(
|
||||
src: __m256i, slice: *const i64, offsets: __m128i, mask: __m256i,
|
||||
scale: i32,
|
||||
|
|
@ -1005,6 +1022,7 @@ pub unsafe fn _mm256_mask_i32gather_epi64(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_i32gather_pd(
|
||||
slice: *const f64, offsets: __m128i, scale: i32
|
||||
) -> __m128d {
|
||||
|
|
@ -1025,6 +1043,7 @@ pub unsafe fn _mm_i32gather_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_mask_i32gather_pd(
|
||||
src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d,
|
||||
scale: i32,
|
||||
|
|
@ -1043,6 +1062,7 @@ pub unsafe fn _mm_mask_i32gather_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_i32gather_pd(
|
||||
slice: *const f64, offsets: __m128i, scale: i32
|
||||
) -> __m256d {
|
||||
|
|
@ -1063,6 +1083,7 @@ pub unsafe fn _mm256_i32gather_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm256_mask_i32gather_pd(
|
||||
src: __m256d, slice: *const f64, offsets: __m128i, mask: __m256d,
|
||||
scale: i32,
|
||||
|
|
@ -1081,6 +1102,7 @@ pub unsafe fn _mm256_mask_i32gather_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_i64gather_epi32(
|
||||
slice: *const i32, offsets: __m128i, scale: i32
|
||||
) -> __m128i {
|
||||
|
|
@ -1102,6 +1124,7 @@ pub unsafe fn _mm_i64gather_epi32(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_mask_i64gather_epi32(
|
||||
src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i,
|
||||
scale: i32,
|
||||
|
|
@ -1123,6 +1146,7 @@ pub unsafe fn _mm_mask_i64gather_epi32(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_i64gather_epi32(
|
||||
slice: *const i32, offsets: __m256i, scale: i32
|
||||
) -> __m128i {
|
||||
|
|
@ -1144,6 +1168,7 @@ pub unsafe fn _mm256_i64gather_epi32(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm256_mask_i64gather_epi32(
|
||||
src: __m128i, slice: *const i32, offsets: __m256i, mask: __m128i,
|
||||
scale: i32,
|
||||
|
|
@ -1165,6 +1190,7 @@ pub unsafe fn _mm256_mask_i64gather_epi32(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_i64gather_ps(
|
||||
slice: *const f32, offsets: __m128i, scale: i32
|
||||
) -> __m128 {
|
||||
|
|
@ -1185,6 +1211,7 @@ pub unsafe fn _mm_i64gather_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_mask_i64gather_ps(
|
||||
src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32
|
||||
) -> __m128 {
|
||||
|
|
@ -1202,6 +1229,7 @@ pub unsafe fn _mm_mask_i64gather_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_i64gather_ps(
|
||||
slice: *const f32, offsets: __m256i, scale: i32
|
||||
) -> __m128 {
|
||||
|
|
@ -1222,6 +1250,7 @@ pub unsafe fn _mm256_i64gather_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm256_mask_i64gather_ps(
|
||||
src: __m128, slice: *const f32, offsets: __m256i, mask: __m128, scale: i32
|
||||
) -> __m128 {
|
||||
|
|
@ -1239,6 +1268,7 @@ pub unsafe fn _mm256_mask_i64gather_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_i64gather_epi64(
|
||||
slice: *const i64, offsets: __m128i, scale: i32
|
||||
) -> __m128i {
|
||||
|
|
@ -1260,6 +1290,7 @@ pub unsafe fn _mm_i64gather_epi64(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_mask_i64gather_epi64(
|
||||
src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i,
|
||||
scale: i32,
|
||||
|
|
@ -1281,6 +1312,7 @@ pub unsafe fn _mm_mask_i64gather_epi64(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_i64gather_epi64(
|
||||
slice: *const i64, offsets: __m256i, scale: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -1302,6 +1334,7 @@ pub unsafe fn _mm256_i64gather_epi64(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm256_mask_i64gather_epi64(
|
||||
src: __m256i, slice: *const i64, offsets: __m256i, mask: __m256i,
|
||||
scale: i32,
|
||||
|
|
@ -1323,6 +1356,7 @@ pub unsafe fn _mm256_mask_i64gather_epi64(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_i64gather_pd(
|
||||
slice: *const f64, offsets: __m128i, scale: i32
|
||||
) -> __m128d {
|
||||
|
|
@ -1343,6 +1377,7 @@ pub unsafe fn _mm_i64gather_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_mask_i64gather_pd(
|
||||
src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d,
|
||||
scale: i32,
|
||||
|
|
@ -1361,6 +1396,7 @@ pub unsafe fn _mm_mask_i64gather_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_i64gather_pd(
|
||||
slice: *const f64, offsets: __m256i, scale: i32
|
||||
) -> __m256d {
|
||||
|
|
@ -1381,6 +1417,7 @@ pub unsafe fn _mm256_i64gather_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm256_mask_i64gather_pd(
|
||||
src: __m256d, slice: *const f64, offsets: __m256i, mask: __m256d,
|
||||
scale: i32,
|
||||
|
|
@ -1398,6 +1435,7 @@ pub unsafe fn _mm256_mask_i64gather_pd(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_inserti128_si256(
|
||||
a: __m256i, b: __m128i, imm8: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -1654,6 +1692,7 @@ pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_mpsadbw_epu8(
|
||||
a: __m256i, b: __m256i, imm8: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -1799,6 +1838,7 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
let zero = _mm256_setzero_si256().as_i64x4();
|
||||
|
|
@ -1851,6 +1891,7 @@ pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vperm2f128, imm8 = 9))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_permute2x128_si256(
|
||||
a: __m256i, b: __m256i, imm8: i32
|
||||
) -> __m256i {
|
||||
|
|
@ -1869,6 +1910,7 @@ pub unsafe fn _mm256_permute2x128_si256(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d {
|
||||
use x86::i586::avx::_mm256_undefined_pd;
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
|
|
@ -1989,11 +2031,8 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||
///
|
||||
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
///
|
||||
/// let shuffle1 = 0b00_11_10_01;
|
||||
/// let shuffle2 = 0b01_00_10_11;
|
||||
///
|
||||
/// let c1 = _mm256_shuffle_epi32(a, shuffle1);
|
||||
/// let c2 = _mm256_shuffle_epi32(a, shuffle2);
|
||||
/// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01);
|
||||
/// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11);
|
||||
///
|
||||
/// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4);
|
||||
/// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5);
|
||||
|
|
@ -2008,6 +2047,7 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i {
|
||||
// simd_shuffleX requires that its selector parameter be made up of
|
||||
// constant values, but we can't enforce that here. In spirit, we need
|
||||
|
|
@ -2069,6 +2109,7 @@ pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
let a = a.as_i16x16();
|
||||
|
|
@ -2126,6 +2167,7 @@ pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_shufflelo_epi16(a: __m256i, imm8: i32) -> __m256i {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
let a = a.as_i16x16();
|
||||
|
|
@ -2265,6 +2307,7 @@ pub unsafe fn _mm256_slli_epi64(a: __m256i, imm8: i32) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i {
|
||||
let a = a.as_i64x4();
|
||||
macro_rules! call {
|
||||
|
|
@ -2279,8 +2322,15 @@ pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i {
|
||||
_mm256_slli_si256(a, imm8)
|
||||
let a = a.as_i64x4();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpslldq(a, $imm8)
|
||||
}
|
||||
}
|
||||
mem::transmute(constify_imm8!(imm8 * 8, call))
|
||||
}
|
||||
|
||||
/// Shift packed 32-bit integers in `a` left by the amount
|
||||
|
|
@ -2381,6 +2431,7 @@ pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i {
|
||||
let a = a.as_i64x4();
|
||||
macro_rules! call {
|
||||
|
|
@ -2395,8 +2446,15 @@ pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i {
|
||||
_mm256_srli_si256(a, imm8)
|
||||
let a = a.as_i64x4();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpsrldq(a, $imm8)
|
||||
}
|
||||
}
|
||||
mem::transmute(constify_imm8!(imm8 * 8, call))
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in `a` right by `count` while shifting in
|
||||
|
|
@ -2897,6 +2955,7 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i8 {
|
||||
let imm8 = (imm8 & 31) as u32;
|
||||
simd_extract(a.as_i8x32(), imm8)
|
||||
|
|
@ -2909,6 +2968,7 @@ pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i8 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_extract_epi16(a: __m256i, imm8: i32) -> i16 {
|
||||
let imm8 = (imm8 & 15) as u32;
|
||||
simd_extract(a.as_i16x16(), imm8)
|
||||
|
|
@ -2918,6 +2978,7 @@ pub unsafe fn _mm256_extract_epi16(a: __m256i, imm8: i32) -> i16 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_extract_epi32(a: __m256i, imm8: i32) -> i32 {
|
||||
let imm8 = (imm8 & 7) as u32;
|
||||
simd_extract(a.as_i32x8(), imm8)
|
||||
|
|
|
|||
|
|
@ -764,6 +764,7 @@ pub unsafe fn _mm_setzero_ps() -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(shufps, mask = 3))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128, mask: u32) -> __m128 {
|
||||
let mask = (mask & 0xFF) as u8;
|
||||
|
||||
|
|
@ -1548,6 +1549,7 @@ pub const _MM_HINT_NTA: i32 = 0;
|
|||
#[cfg_attr(test, assert_instr(prefetcht1, strategy = _MM_HINT_T1))]
|
||||
#[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))]
|
||||
#[cfg_attr(test, assert_instr(prefetchnta, strategy = _MM_HINT_NTA))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) {
|
||||
// The `strategy` must be a compile-time constant, so we use a short form
|
||||
// of `constify_imm8!` for now.
|
||||
|
|
@ -2739,8 +2741,7 @@ mod tests {
|
|||
unsafe fn test_mm_shuffle_ps() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
|
||||
let mask = 0b00_01_01_11;
|
||||
let r = _mm_shuffle_ps(a, b, mask);
|
||||
let r = _mm_shuffle_ps(a, b, 0b00_01_01_11);
|
||||
assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -314,7 +314,14 @@ pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i {
|
||||
_mm_slli_si128_impl(a, imm8)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i {
|
||||
let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32);
|
||||
let a = a.as_i8x16();
|
||||
macro_rules! shuffle {
|
||||
|
|
@ -357,22 +364,25 @@ pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i {
|
||||
_mm_slli_si128(a, imm8)
|
||||
_mm_slli_si128_impl(a, imm8)
|
||||
}
|
||||
|
||||
/// Shift `a` right by `imm8` bytes while shifting in zeros.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i {
|
||||
_mm_srli_si128(a, imm8)
|
||||
_mm_srli_si128_impl(a, imm8)
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in `a` left by `imm8` while shifting in zeros.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(psllw))]
|
||||
#[cfg_attr(test, assert_instr(psllw, imm8 = 7))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i {
|
||||
mem::transmute(pslliw(a.as_i16x8(), imm8))
|
||||
}
|
||||
|
|
@ -389,7 +399,8 @@ pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
|
|||
/// Shift packed 32-bit integers in `a` left by `imm8` while shifting in zeros.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(pslld))]
|
||||
#[cfg_attr(test, assert_instr(pslld, imm8 = 7))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i {
|
||||
mem::transmute(psllid(a.as_i32x4(), imm8))
|
||||
}
|
||||
|
|
@ -406,7 +417,8 @@ pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
|
|||
/// Shift packed 64-bit integers in `a` left by `imm8` while shifting in zeros.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(psllq))]
|
||||
#[cfg_attr(test, assert_instr(psllq, imm8 = 7))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i {
|
||||
mem::transmute(pslliq(a.as_i64x2(), imm8))
|
||||
}
|
||||
|
|
@ -424,7 +436,8 @@ pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
|
|||
/// bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(psraw))]
|
||||
#[cfg_attr(test, assert_instr(psraw, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i {
|
||||
mem::transmute(psraiw(a.as_i16x8(), imm8))
|
||||
}
|
||||
|
|
@ -442,7 +455,8 @@ pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
|
|||
/// bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(psrad))]
|
||||
#[cfg_attr(test, assert_instr(psrad, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i {
|
||||
mem::transmute(psraid(a.as_i32x4(), imm8))
|
||||
}
|
||||
|
|
@ -460,7 +474,14 @@ pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
|
||||
_mm_srli_si128_impl(a, imm8)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn _mm_srli_si128_impl(a: __m128i, imm8: i32) -> __m128i {
|
||||
let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32);
|
||||
let a = a.as_i8x16();
|
||||
macro_rules! shuffle {
|
||||
|
|
@ -503,7 +524,8 @@ pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
|
|||
/// zeros.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(psrlw))]
|
||||
#[cfg_attr(test, assert_instr(psrlw, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i {
|
||||
mem::transmute(psrliw(a.as_i16x8(), imm8))
|
||||
}
|
||||
|
|
@ -521,7 +543,8 @@ pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
|
|||
/// zeros.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(psrld))]
|
||||
#[cfg_attr(test, assert_instr(psrld, imm8 = 8))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i {
|
||||
mem::transmute(psrlid(a.as_i32x4(), imm8))
|
||||
}
|
||||
|
|
@ -539,7 +562,8 @@ pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
|
|||
/// zeros.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(psrlq))]
|
||||
#[cfg_attr(test, assert_instr(psrlq, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i {
|
||||
mem::transmute(psrliq(a.as_i64x2(), imm8))
|
||||
}
|
||||
|
|
@ -985,6 +1009,7 @@ pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(pextrw, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 {
|
||||
simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32
|
||||
}
|
||||
|
|
@ -993,6 +1018,7 @@ pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i {
|
||||
mem::transmute(simd_insert(a.as_i16x8(), (imm8 & 7) as u32, i as i16))
|
||||
}
|
||||
|
|
@ -1009,6 +1035,7 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(pshufd, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i {
|
||||
// simd_shuffleX requires that its selector parameter be made up of
|
||||
// constant values, but we can't enforce that here. In spirit, we need
|
||||
|
|
@ -1072,6 +1099,7 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i {
|
||||
// See _mm_shuffle_epi32.
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
|
|
@ -1130,6 +1158,7 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i {
|
||||
// See _mm_shuffle_epi32.
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
|
|
@ -2078,6 +2107,7 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(shufpd, imm8 = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
||||
match imm8 & 0b11 {
|
||||
0b00 => simd_shuffle2(a, b, [0, 2]),
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ pub unsafe fn _mm_blendv_epi8(
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
let a = a.as_i16x8();
|
||||
let b = b.as_i16x8();
|
||||
|
|
@ -96,6 +97,7 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => { blendpd(a, b, $imm2) }
|
||||
|
|
@ -108,6 +110,7 @@ pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => { blendps(a, b, $imm4) }
|
||||
|
|
@ -121,6 +124,7 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
|
|||
#[target_feature(enable = "sse4.1")]
|
||||
// TODO: Add test for Windows
|
||||
#[cfg_attr(test, assert_instr(extractps, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
|
||||
mem::transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11))
|
||||
}
|
||||
|
|
@ -132,6 +136,7 @@ pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 {
|
||||
let imm8 = (imm8 & 15) as u32;
|
||||
simd_extract::<_, u8>(a.as_u8x16(), imm8) as i32
|
||||
|
|
@ -142,6 +147,7 @@ pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 {
|
|||
#[target_feature(enable = "sse4.1")]
|
||||
// TODO: Add test for Windows
|
||||
#[cfg_attr(test, assert_instr(extractps, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
|
||||
let imm8 = (imm8 & 3) as u32;
|
||||
simd_extract::<_, i32>(a.as_i32x4(), imm8)
|
||||
|
|
@ -172,6 +178,7 @@ pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { insertps(a, b, $imm8) }
|
||||
|
|
@ -184,6 +191,7 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
|
||||
mem::transmute(simd_insert(a.as_i8x16(), (imm8 & 0b1111) as u32, i as i8))
|
||||
}
|
||||
|
|
@ -193,6 +201,7 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i {
|
||||
mem::transmute(simd_insert(a.as_i32x4(), (imm8 & 0b11) as u32, i))
|
||||
}
|
||||
|
|
@ -420,6 +429,7 @@ pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(dppd, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { dppd(a, b, $imm8) }
|
||||
|
|
@ -437,6 +447,7 @@ pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(dpps, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { dpps(a, b, $imm8) }
|
||||
|
|
@ -554,6 +565,7 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundpd, rounding = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => { roundpd(a, $imm4) }
|
||||
|
|
@ -583,6 +595,7 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundps, rounding = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => { roundps(a, $imm4) }
|
||||
|
|
@ -614,6 +627,7 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundsd, rounding = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => { roundsd(a, b, $imm4) }
|
||||
|
|
@ -645,6 +659,7 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundss, rounding = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => { roundss(a, b, $imm4) }
|
||||
|
|
@ -734,6 +749,7 @@ pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let b = b.as_u8x16();
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000;
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -261,6 +262,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -276,6 +278,7 @@ pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -291,6 +294,7 @@ pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -306,6 +310,7 @@ pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -320,6 +325,7 @@ pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -335,6 +341,7 @@ pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -349,6 +356,7 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_cmpestrm(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> __m128i {
|
||||
|
|
@ -442,6 +450,7 @@ pub unsafe fn _mm_cmpestrm(
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_cmpestri(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -459,6 +468,7 @@ pub unsafe fn _mm_cmpestri(
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_cmpestrz(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -476,6 +486,7 @@ pub unsafe fn _mm_cmpestrz(
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_cmpestrc(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -493,6 +504,7 @@ pub unsafe fn _mm_cmpestrc(
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_cmpestrs(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -510,6 +522,7 @@ pub unsafe fn _mm_cmpestrs(
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_cmpestro(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -528,6 +541,7 @@ pub unsafe fn _mm_cmpestro(
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
pub unsafe fn _mm_cmpestra(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(palignr, n = 15))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
|
||||
let n = n as u32;
|
||||
// If palignr is shifting the pair of vectors more than the size of two
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aeskeygenassist, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_aeskeygenassist_si128(a: __m128i, imm8: i32) -> __m128i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => (aeskeygenassist(a, $imm8))
|
||||
|
|
|
|||
|
|
@ -312,6 +312,7 @@ pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => { pextrw(a, $imm2) as i32 }
|
||||
|
|
@ -324,8 +325,12 @@ pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 {
|
||||
_mm_extract_pi16(a, imm2)
|
||||
macro_rules! call {
|
||||
($imm2:expr) => { pextrw(a, $imm2) as i32 }
|
||||
}
|
||||
constify_imm2!(imm2, call)
|
||||
}
|
||||
|
||||
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
|
||||
|
|
@ -334,6 +339,7 @@ pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => { pinsrw(a, d, $imm2) }
|
||||
|
|
@ -347,8 +353,12 @@ pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
|
||||
_mm_insert_pi16(a, d, imm2)
|
||||
macro_rules! call {
|
||||
($imm2:expr) => { pinsrw(a, d, $imm2) }
|
||||
}
|
||||
constify_imm2!(imm2, call)
|
||||
}
|
||||
|
||||
/// Takes the most significant bit from each 8-bit element in a 64-bit
|
||||
|
|
@ -376,6 +386,7 @@ pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { pshufw(a, $imm8) }
|
||||
|
|
@ -388,8 +399,12 @@ pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
|
||||
_mm_shuffle_pi16(a, imm8)
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { pshufw(a, $imm8) }
|
||||
}
|
||||
constify_imm8!(imm8, call)
|
||||
}
|
||||
|
||||
/// Convert the two lower packed single-precision (32-bit) floating-point
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(palignr, n = 15))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ use x86::*;
|
|||
/// Copy `a` to result, and insert the 64-bit integer `i` into result
|
||||
/// at the location specified by `index`.
|
||||
#[inline]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[target_feature(enable = "avx")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i {
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ use x86::*;
|
|||
/// Extract a 64-bit integer from `a`, selected with `imm8`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[rustc_args_required_const(1)]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 {
|
||||
let imm8 = (imm8 & 3) as u32;
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ use stdsimd_test::assert_instr;
|
|||
#[target_feature(enable = "sse4.1")]
|
||||
// TODO: Add test for Windows
|
||||
#[cfg_attr(test, assert_instr(pextrq, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 {
|
||||
let imm8 = (imm8 & 1) as u32;
|
||||
simd_extract(a.as_i64x2(), imm8)
|
||||
|
|
@ -23,6 +24,7 @@ pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i {
|
||||
mem::transmute(simd_insert(a.as_i64x2(), (imm8 & 1) as u32, i))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ extern crate proc_macro;
|
|||
extern crate proc_macro2;
|
||||
#[macro_use]
|
||||
extern crate quote;
|
||||
#[macro_use]
|
||||
extern crate syn;
|
||||
|
||||
use std::path::Path;
|
||||
|
|
@ -77,6 +78,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
|
|||
Some(i) => my_quote! { Some(#i) },
|
||||
None => my_quote! { None },
|
||||
};
|
||||
let required_const = find_required_const(&f.attrs);
|
||||
my_quote! {
|
||||
Function {
|
||||
name: stringify!(#name),
|
||||
|
|
@ -85,6 +87,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
|
|||
target_feature: #target_feature,
|
||||
instrs: &[#(stringify!(#instrs)),*],
|
||||
file: stringify!(#path),
|
||||
required_const: &[#(#required_const),*],
|
||||
}
|
||||
}
|
||||
})
|
||||
|
|
@ -236,3 +239,29 @@ fn find_target_feature(attrs: &[syn::Attribute]) -> Option<syn::Lit> {
|
|||
})
|
||||
.next()
|
||||
}
|
||||
|
||||
fn find_required_const(attrs: &[syn::Attribute]) -> Vec<usize> {
|
||||
attrs.iter()
|
||||
.filter(|a| a.path.segments[0].ident == "rustc_args_required_const")
|
||||
.map(|a| a.tts.clone())
|
||||
.map(|a| syn::parse::<RustcArgsRequiredConst>(a.into()).unwrap())
|
||||
.flat_map(|a| a.args)
|
||||
.collect()
|
||||
}
|
||||
|
||||
struct RustcArgsRequiredConst {
|
||||
args: Vec<usize>,
|
||||
}
|
||||
|
||||
impl syn::synom::Synom for RustcArgsRequiredConst {
|
||||
named!(parse -> Self, do_parse!(
|
||||
items: parens!(
|
||||
call!(syn::punctuated::Punctuated::<syn::LitInt, syn::token::Comma>::parse_terminated)
|
||||
) >>
|
||||
(RustcArgsRequiredConst {
|
||||
args: items.1.into_iter()
|
||||
.map(|a| a.value() as usize)
|
||||
.collect(),
|
||||
})
|
||||
));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,9 +25,9 @@ struct Function {
|
|||
target_feature: Option<&'static str>,
|
||||
instrs: &'static [&'static str],
|
||||
file: &'static str,
|
||||
required_const: &'static [usize],
|
||||
}
|
||||
|
||||
static BOOL: Type = Type::Bool;
|
||||
static F32: Type = Type::PrimFloat(32);
|
||||
static F64: Type = Type::PrimFloat(64);
|
||||
static I16: Type = Type::PrimSigned(16);
|
||||
|
|
@ -63,7 +63,6 @@ enum Type {
|
|||
M256,
|
||||
M256D,
|
||||
M256I,
|
||||
Bool,
|
||||
Tuple,
|
||||
CpuidResult,
|
||||
}
|
||||
|
|
@ -301,7 +300,7 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
|
|||
|
||||
// Make sure we've got the right return type.
|
||||
if let Some(t) = rust.ret {
|
||||
equate(t, &intel.rettype, rust.name)?;
|
||||
equate(t, &intel.rettype, rust.name, false)?;
|
||||
} else if intel.rettype != "" && intel.rettype != "void" {
|
||||
bail!(
|
||||
"{} returns `{}` with intel, void in rust",
|
||||
|
|
@ -321,8 +320,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
|
|||
if rust.arguments.len() != intel.parameters.len() {
|
||||
bail!("wrong number of arguments on {}", rust.name)
|
||||
}
|
||||
for (a, b) in intel.parameters.iter().zip(rust.arguments) {
|
||||
equate(b, &a.type_, &intel.name)?;
|
||||
for (i, (a, b)) in intel.parameters.iter().zip(rust.arguments).enumerate() {
|
||||
let is_const = rust.required_const.contains(&i);
|
||||
equate(b, &a.type_, &intel.name, is_const)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -361,16 +361,25 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn equate(t: &Type, intel: &str, intrinsic: &str) -> Result<(), String> {
|
||||
fn equate(t: &Type,
|
||||
intel: &str,
|
||||
intrinsic: &str,
|
||||
is_const: bool) -> Result<(), String> {
|
||||
let intel = intel.replace(" *", "*");
|
||||
let intel = intel.replace(" const*", "*");
|
||||
let require_const = || {
|
||||
if is_const {
|
||||
return Ok(())
|
||||
}
|
||||
Err(format!("argument required to be const but isn't"))
|
||||
};
|
||||
match (t, &intel[..]) {
|
||||
(&Type::PrimFloat(32), "float") => {}
|
||||
(&Type::PrimFloat(64), "double") => {}
|
||||
(&Type::PrimSigned(16), "__int16") => {}
|
||||
(&Type::PrimSigned(16), "short") => {}
|
||||
(&Type::PrimSigned(32), "__int32") => {}
|
||||
(&Type::PrimSigned(32), "const int") => {}
|
||||
(&Type::PrimSigned(32), "const int") => require_const()?,
|
||||
(&Type::PrimSigned(32), "int") => {}
|
||||
(&Type::PrimSigned(64), "__int64") => {}
|
||||
(&Type::PrimSigned(64), "long long") => {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue