Remove unneeded transmutes

(or replace them with safe versions)
This commit is contained in:
Eduardo Sánchez Muñoz 2023-10-10 20:22:26 +02:00 committed by Amanieu d'Antras
parent 7808ffa5af
commit b2c5bc9696
8 changed files with 237 additions and 378 deletions

View file

@ -311,7 +311,7 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __
#[target_feature(enable = "avx512bitalg")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
transmute(bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0))
bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0)
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -326,7 +326,7 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64
#[target_feature(enable = "avx512bitalg")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
transmute(bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k))
bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k)
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -338,7 +338,7 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
transmute(bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0))
bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0)
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -353,7 +353,7 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
transmute(bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k))
bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k)
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -365,7 +365,7 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
transmute(bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0))
bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0)
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -380,7 +380,7 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
transmute(bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k))
bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k)
}
#[cfg(test)]

View file

@ -3703,8 +3703,7 @@ pub unsafe fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) ->
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x32();
let b = b.as_u16x32();
let r = vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
transmute(r)
vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3722,8 +3721,7 @@ pub unsafe fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x32();
let b = b.as_u16x32();
let r = vpcmpuw(a, b, IMM8, k1);
transmute(r)
vpcmpuw(a, b, IMM8, k1)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3737,8 +3735,7 @@ pub unsafe fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) ->
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x16();
let b = b.as_u16x16();
let r = vpcmpuw256(a, b, IMM8, 0b11111111_11111111);
transmute(r)
vpcmpuw256(a, b, IMM8, 0b11111111_11111111)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3756,8 +3753,7 @@ pub unsafe fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x16();
let b = b.as_u16x16();
let r = vpcmpuw256(a, b, IMM8, k1);
transmute(r)
vpcmpuw256(a, b, IMM8, k1)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3771,8 +3767,7 @@ pub unsafe fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __m
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x8();
let b = b.as_u16x8();
let r = vpcmpuw128(a, b, IMM8, 0b11111111);
transmute(r)
vpcmpuw128(a, b, IMM8, 0b11111111)
}
/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3790,8 +3785,7 @@ pub unsafe fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u16x8();
let b = b.as_u16x8();
let r = vpcmpuw128(a, b, IMM8, k1);
transmute(r)
vpcmpuw128(a, b, IMM8, k1)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3805,13 +3799,12 @@ pub unsafe fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> _
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x64();
let b = b.as_u8x64();
let r = vpcmpub(
vpcmpub(
a,
b,
IMM8,
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
);
transmute(r)
)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3829,8 +3822,7 @@ pub unsafe fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x64();
let b = b.as_u8x64();
let r = vpcmpub(a, b, IMM8, k1);
transmute(r)
vpcmpub(a, b, IMM8, k1)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3844,8 +3836,7 @@ pub unsafe fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> _
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x32();
let b = b.as_u8x32();
let r = vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
transmute(r)
vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3863,8 +3854,7 @@ pub unsafe fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x32();
let b = b.as_u8x32();
let r = vpcmpub256(a, b, IMM8, k1);
transmute(r)
vpcmpub256(a, b, IMM8, k1)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3878,8 +3868,7 @@ pub unsafe fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mm
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x16();
let b = b.as_u8x16();
let r = vpcmpub128(a, b, IMM8, 0b11111111_11111111);
transmute(r)
vpcmpub128(a, b, IMM8, 0b11111111_11111111)
}
/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3897,8 +3886,7 @@ pub unsafe fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_u8x16();
let b = b.as_u8x16();
let r = vpcmpub128(a, b, IMM8, k1);
transmute(r)
vpcmpub128(a, b, IMM8, k1)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3912,8 +3900,7 @@ pub unsafe fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) ->
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x32();
let b = b.as_i16x32();
let r = vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
transmute(r)
vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3931,8 +3918,7 @@ pub unsafe fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x32();
let b = b.as_i16x32();
let r = vpcmpw(a, b, IMM8, k1);
transmute(r)
vpcmpw(a, b, IMM8, k1)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3946,8 +3932,7 @@ pub unsafe fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) ->
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x16();
let b = b.as_i16x16();
let r = vpcmpw256(a, b, IMM8, 0b11111111_11111111);
transmute(r)
vpcmpw256(a, b, IMM8, 0b11111111_11111111)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3965,8 +3950,7 @@ pub unsafe fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x16();
let b = b.as_i16x16();
let r = vpcmpw256(a, b, IMM8, k1);
transmute(r)
vpcmpw256(a, b, IMM8, k1)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3980,8 +3964,7 @@ pub unsafe fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __m
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x8();
let b = b.as_i16x8();
let r = vpcmpw128(a, b, IMM8, 0b11111111);
transmute(r)
vpcmpw128(a, b, IMM8, 0b11111111)
}
/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3999,8 +3982,7 @@ pub unsafe fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i16x8();
let b = b.as_i16x8();
let r = vpcmpw128(a, b, IMM8, k1);
transmute(r)
vpcmpw128(a, b, IMM8, k1)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -4014,13 +3996,12 @@ pub unsafe fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> _
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x64();
let b = b.as_i8x64();
let r = vpcmpb(
vpcmpb(
a,
b,
IMM8,
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
);
transmute(r)
)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -4038,8 +4019,7 @@ pub unsafe fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x64();
let b = b.as_i8x64();
let r = vpcmpb(a, b, IMM8, k1);
transmute(r)
vpcmpb(a, b, IMM8, k1)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -4053,8 +4033,7 @@ pub unsafe fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> _
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x32();
let b = b.as_i8x32();
let r = vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
transmute(r)
vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -4072,8 +4051,7 @@ pub unsafe fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x32();
let b = b.as_i8x32();
let r = vpcmpb256(a, b, IMM8, k1);
transmute(r)
vpcmpb256(a, b, IMM8, k1)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -4087,8 +4065,7 @@ pub unsafe fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mm
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x16();
let b = b.as_i8x16();
let r = vpcmpb128(a, b, IMM8, 0b11111111_11111111);
transmute(r)
vpcmpb128(a, b, IMM8, 0b11111111_11111111)
}
/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -4106,8 +4083,7 @@ pub unsafe fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
static_assert_uimm_bits!(IMM8, 3);
let a = a.as_i8x16();
let b = b.as_i8x16();
let r = vpcmpb128(a, b, IMM8, k1);
transmute(r)
vpcmpb128(a, b, IMM8, k1)
}
/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
@ -8566,7 +8542,7 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
transmute(a + b)
a + b
}
/// Add 64-bit masks in a and b, and store the result in k.
@ -8575,7 +8551,7 @@ pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
transmute(a + b)
a + b
}
/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
@ -8584,7 +8560,7 @@ pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
transmute(a & b)
a & b
}
/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
@ -8593,7 +8569,7 @@ pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
transmute(a & b)
a & b
}
/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
@ -8602,7 +8578,7 @@ pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
transmute(a ^ 0b11111111_11111111_11111111_11111111)
a ^ 0b11111111_11111111_11111111_11111111
}
/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
@ -8611,7 +8587,7 @@ pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
transmute(a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111)
a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
}
/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
@ -8620,7 +8596,7 @@ pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
transmute(_knot_mask32(a) & b)
_knot_mask32(a) & b
}
/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
@ -8629,7 +8605,7 @@ pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
transmute(_knot_mask64(a) & b)
_knot_mask64(a) & b
}
/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
@ -8638,7 +8614,7 @@ pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
transmute(a | b)
a | b
}
/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
@ -8647,7 +8623,7 @@ pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
transmute(a | b)
a | b
}
/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
@ -8656,7 +8632,7 @@ pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
transmute(a ^ b)
a ^ b
}
/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
@ -8665,7 +8641,7 @@ pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
transmute(a ^ b)
a ^ b
}
/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
@ -8674,7 +8650,7 @@ pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
transmute(_knot_mask32(a ^ b))
_knot_mask32(a ^ b)
}
/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
@ -8683,7 +8659,7 @@ pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
#[inline]
#[target_feature(enable = "avx512bw")]
pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
transmute(_knot_mask64(a ^ b))
_knot_mask64(a ^ b)
}
/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.

File diff suppressed because it is too large Load diff

View file

@ -2176,12 +2176,12 @@ mod tests {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
let e: u32x4 = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0));
let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
assert_eq!(r, e);
let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
let e2: u32x4 = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0));
let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
assert_eq!(r2, e2);
}
@ -2197,15 +2197,15 @@ mod tests {
let d1 = !0u32; // a.extract(0) < d.extract(0)
let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2221,15 +2221,15 @@ mod tests {
let d1 = !0u32; // a.extract(0) <= d.extract(0)
let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2245,15 +2245,15 @@ mod tests {
let d1 = 0u32; // a.extract(0) > d.extract(0)
let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2269,15 +2269,15 @@ mod tests {
let d1 = 0u32; // a.extract(0) >= d.extract(0)
let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2293,15 +2293,15 @@ mod tests {
let d1 = !0u32; // a.extract(0) != d.extract(0)
let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2322,15 +2322,15 @@ mod tests {
let d1 = 0u32; // a.extract(0) >= d.extract(0)
let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2351,15 +2351,15 @@ mod tests {
let d1 = 0u32; // a.extract(0) > d.extract(0)
let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2380,15 +2380,15 @@ mod tests {
let d1 = !0u32; // a.extract(0) <= d.extract(0)
let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2409,15 +2409,15 @@ mod tests {
let d1 = !0u32; // a.extract(0) < d.extract(0)
let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2433,15 +2433,15 @@ mod tests {
let d1 = !0u32; // a.extract(0) ord d.extract(0)
let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}
@ -2457,15 +2457,15 @@ mod tests {
let d1 = 0u32; // a.extract(0) unord d.extract(0)
let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
assert_eq!(rd, ed);
}

View file

@ -4255,7 +4255,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpeq_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4263,7 +4263,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmplt_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4271,7 +4271,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmple_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4279,7 +4279,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpgt_sd() {
let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4287,7 +4287,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpge_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4295,7 +4295,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpord_sd() {
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4303,7 +4303,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpunord_sd() {
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4311,7 +4311,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpneq_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4319,7 +4319,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpnlt_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4327,7 +4327,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpnle_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4335,7 +4335,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpngt_sd() {
let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
assert_eq_m128i(r, e);
}
@ -4343,7 +4343,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpnge_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
assert_eq_m128i(r, e);
}

View file

@ -201,7 +201,7 @@ pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
static_assert_uimm_bits!(IMM8, 2);
transmute(simd_extract::<_, f32>(a, IMM8 as u32))
simd_extract::<_, f32>(a, IMM8 as u32).to_bits() as i32
}
/// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit
@ -1259,9 +1259,9 @@ mod tests {
#[simd_test(enable = "sse4.1")]
unsafe fn test_mm_extract_ps() {
let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
let r: f32 = transmute(_mm_extract_ps::<1>(a));
let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
assert_eq!(r, 1.0);
let r: f32 = transmute(_mm_extract_ps::<3>(a));
let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
assert_eq!(r, 3.0);
}

View file

@ -33,7 +33,7 @@ pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi))]
pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
}
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
@ -43,7 +43,7 @@ pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
}
/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@ -54,8 +54,7 @@ pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
#[cfg_attr(test, assert_instr(vcvtsi2ss))]
pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
let b = b as f32;
let r = simd_insert(a, 0, b);
transmute(r)
simd_insert(a, 0, b)
}
/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -66,8 +65,7 @@ pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
#[cfg_attr(test, assert_instr(vcvtsi2sd))]
pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
let b = b as f64;
let r = simd_insert(a, 0, b);
transmute(r)
simd_insert(a, 0, b)
}
/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@ -78,8 +76,7 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
#[cfg_attr(test, assert_instr(vcvtusi2ss))]
pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
let b = b as f32;
let r = simd_insert(a, 0, b);
transmute(r)
simd_insert(a, 0, b)
}
/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -90,8 +87,7 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
#[cfg_attr(test, assert_instr(vcvtusi2sd))]
pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
let b = b as f64;
let r = simd_insert(a, 0, b);
transmute(r)
simd_insert(a, 0, b)
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
@ -101,7 +97,7 @@ pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si))]
pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
transmute(vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
}
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@ -111,7 +107,7 @@ pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
@ -121,7 +117,7 @@ pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si))]
pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
transmute(vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
}
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@ -131,7 +127,7 @@ pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi))]
pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
}
/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -270,8 +266,7 @@ pub unsafe fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m
pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
let r = vcvtsd2si64(a, ROUNDING);
transmute(r)
vcvtsd2si64(a, ROUNDING)
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -290,8 +285,7 @@ pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
let r = vcvtsd2si64(a, ROUNDING);
transmute(r)
vcvtsd2si64(a, ROUNDING)
}
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
@ -310,8 +304,7 @@ pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
let r = vcvtsd2usi64(a, ROUNDING);
transmute(r)
vcvtsd2usi64(a, ROUNDING)
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -330,8 +323,7 @@ pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
let r = vcvtss2si64(a, ROUNDING);
transmute(r)
vcvtss2si64(a, ROUNDING)
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -350,8 +342,7 @@ pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
let r = vcvtss2si64(a, ROUNDING);
transmute(r)
vcvtss2si64(a, ROUNDING)
}
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
@ -370,8 +361,7 @@ pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
let r = vcvtss2usi64(a, ROUNDING);
transmute(r)
vcvtss2usi64(a, ROUNDING)
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -385,8 +375,7 @@ pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
static_assert_sae!(SAE);
let a = a.as_f64x2();
let r = vcvtsd2si64(a, SAE);
transmute(r)
vcvtsd2si64(a, SAE)
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -400,8 +389,7 @@ pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
static_assert_sae!(SAE);
let a = a.as_f64x2();
let r = vcvtsd2si64(a, SAE);
transmute(r)
vcvtsd2si64(a, SAE)
}
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@ -415,8 +403,7 @@ pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
static_assert_sae!(SAE);
let a = a.as_f64x2();
let r = vcvtsd2usi64(a, SAE);
transmute(r)
vcvtsd2usi64(a, SAE)
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -430,8 +417,7 @@ pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
static_assert_sae!(SAE);
let a = a.as_f32x4();
let r = vcvtss2si64(a, SAE);
transmute(r)
vcvtss2si64(a, SAE)
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -445,8 +431,7 @@ pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
static_assert_sae!(SAE);
let a = a.as_f32x4();
let r = vcvtss2si64(a, SAE);
transmute(r)
vcvtss2si64(a, SAE)
}
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@ -460,8 +445,7 @@ pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
pub unsafe fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
static_assert_sae!(SAE);
let a = a.as_f32x4();
let r = vcvtss2usi64(a, SAE);
transmute(r)
vcvtss2usi64(a, SAE)
}
#[allow(improper_ctypes)]

View file

@ -49,11 +49,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
ecx,
edx,
} = __cpuid(0);
let vendor_id: [[u8; 4]; 3] = [
mem::transmute(ebx),
mem::transmute(edx),
mem::transmute(ecx),
];
let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()];
let vendor_id: [u8; 12] = mem::transmute(vendor_id);
(max_basic_leaf, vendor_id)
};