Remove unneeded transmutes

(or replace them with safe versions)
2023-10-10 20:22:26 +02:00 · 2023-10-10 20:22:26 +02:00 · b2c5bc9696
commit b2c5bc9696
parent 7808ffa5af
8 changed files with 237 additions and 378 deletions
--- a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
@ -311,7 +311,7 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __
 #[target_feature(enable = "avx512bitalg")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
-    transmute(bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0))
+    bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0)
 }

 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -326,7 +326,7 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64
 #[target_feature(enable = "avx512bitalg")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
-    transmute(bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k))
+    bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k)
 }

 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -338,7 +338,7 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
-    transmute(bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0))
+    bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0)
 }

 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -353,7 +353,7 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
-    transmute(bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k))
+    bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k)
 }

 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -365,7 +365,7 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
-    transmute(bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0))
+    bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0)
 }

 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -380,7 +380,7 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
-    transmute(bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k))
+    bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k)
 }

 #[cfg(test)]
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@ -3703,8 +3703,7 @@ pub unsafe fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) ->
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u16x32();
    let b = b.as_u16x32();
-    let r = vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
-    transmute(r)
+    vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
 }

 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3722,8 +3721,7 @@ pub unsafe fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u16x32();
    let b = b.as_u16x32();
-    let r = vpcmpuw(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpuw(a, b, IMM8, k1)
 }

 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3737,8 +3735,7 @@ pub unsafe fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) ->
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u16x16();
    let b = b.as_u16x16();
-    let r = vpcmpuw256(a, b, IMM8, 0b11111111_11111111);
-    transmute(r)
+    vpcmpuw256(a, b, IMM8, 0b11111111_11111111)
 }

 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3756,8 +3753,7 @@ pub unsafe fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u16x16();
    let b = b.as_u16x16();
-    let r = vpcmpuw256(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpuw256(a, b, IMM8, k1)
 }

 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3771,8 +3767,7 @@ pub unsafe fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __m
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u16x8();
    let b = b.as_u16x8();
-    let r = vpcmpuw128(a, b, IMM8, 0b11111111);
-    transmute(r)
+    vpcmpuw128(a, b, IMM8, 0b11111111)
 }

 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3790,8 +3785,7 @@ pub unsafe fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u16x8();
    let b = b.as_u16x8();
-    let r = vpcmpuw128(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpuw128(a, b, IMM8, k1)
 }

 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3805,13 +3799,12 @@ pub unsafe fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> _
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u8x64();
    let b = b.as_u8x64();
-    let r = vpcmpub(
+    vpcmpub(
        a,
        b,
        IMM8,
        0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
-    );
-    transmute(r)
+    )
 }

 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3829,8 +3822,7 @@ pub unsafe fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u8x64();
    let b = b.as_u8x64();
-    let r = vpcmpub(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpub(a, b, IMM8, k1)
 }

 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3844,8 +3836,7 @@ pub unsafe fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> _
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u8x32();
    let b = b.as_u8x32();
-    let r = vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
-    transmute(r)
+    vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
 }

 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3863,8 +3854,7 @@ pub unsafe fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u8x32();
    let b = b.as_u8x32();
-    let r = vpcmpub256(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpub256(a, b, IMM8, k1)
 }

 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3878,8 +3868,7 @@ pub unsafe fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mm
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u8x16();
    let b = b.as_u8x16();
-    let r = vpcmpub128(a, b, IMM8, 0b11111111_11111111);
-    transmute(r)
+    vpcmpub128(a, b, IMM8, 0b11111111_11111111)
 }

 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3897,8 +3886,7 @@ pub unsafe fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_u8x16();
    let b = b.as_u8x16();
-    let r = vpcmpub128(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpub128(a, b, IMM8, k1)
 }

 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3912,8 +3900,7 @@ pub unsafe fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) ->
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i16x32();
    let b = b.as_i16x32();
-    let r = vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
-    transmute(r)
+    vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
 }

 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3931,8 +3918,7 @@ pub unsafe fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i16x32();
    let b = b.as_i16x32();
-    let r = vpcmpw(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpw(a, b, IMM8, k1)
 }

 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3946,8 +3932,7 @@ pub unsafe fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) ->
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i16x16();
    let b = b.as_i16x16();
-    let r = vpcmpw256(a, b, IMM8, 0b11111111_11111111);
-    transmute(r)
+    vpcmpw256(a, b, IMM8, 0b11111111_11111111)
 }

 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3965,8 +3950,7 @@ pub unsafe fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i16x16();
    let b = b.as_i16x16();
-    let r = vpcmpw256(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpw256(a, b, IMM8, k1)
 }

 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -3980,8 +3964,7 @@ pub unsafe fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __m
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i16x8();
    let b = b.as_i16x8();
-    let r = vpcmpw128(a, b, IMM8, 0b11111111);
-    transmute(r)
+    vpcmpw128(a, b, IMM8, 0b11111111)
 }

 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -3999,8 +3982,7 @@ pub unsafe fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i16x8();
    let b = b.as_i16x8();
-    let r = vpcmpw128(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpw128(a, b, IMM8, k1)
 }

 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -4014,13 +3996,12 @@ pub unsafe fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> _
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i8x64();
    let b = b.as_i8x64();
-    let r = vpcmpb(
+    vpcmpb(
        a,
        b,
        IMM8,
        0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
-    );
-    transmute(r)
+    )
 }

 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -4038,8 +4019,7 @@ pub unsafe fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i8x64();
    let b = b.as_i8x64();
-    let r = vpcmpb(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpb(a, b, IMM8, k1)
 }

 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -4053,8 +4033,7 @@ pub unsafe fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> _
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i8x32();
    let b = b.as_i8x32();
-    let r = vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
-    transmute(r)
+    vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111)
 }

 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -4072,8 +4051,7 @@ pub unsafe fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i8x32();
    let b = b.as_i8x32();
-    let r = vpcmpb256(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpb256(a, b, IMM8, k1)
 }

 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@ -4087,8 +4065,7 @@ pub unsafe fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mm
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i8x16();
    let b = b.as_i8x16();
-    let r = vpcmpb128(a, b, IMM8, 0b11111111_11111111);
-    transmute(r)
+    vpcmpb128(a, b, IMM8, 0b11111111_11111111)
 }

 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@ -4106,8 +4083,7 @@ pub unsafe fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
    static_assert_uimm_bits!(IMM8, 3);
    let a = a.as_i8x16();
    let b = b.as_i8x16();
-    let r = vpcmpb128(a, b, IMM8, k1);
-    transmute(r)
+    vpcmpb128(a, b, IMM8, k1)
 }

 /// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
@ -8566,7 +8542,7 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(a + b)
+    a + b
 }

 /// Add 64-bit masks in a and b, and store the result in k.
@ -8575,7 +8551,7 @@ pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(a + b)
+    a + b
 }

 /// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
@ -8584,7 +8560,7 @@ pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(a & b)
+    a & b
 }

 /// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
@ -8593,7 +8569,7 @@ pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(a & b)
+    a & b
 }

 /// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
@ -8602,7 +8578,7 @@ pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
-    transmute(a ^ 0b11111111_11111111_11111111_11111111)
+    a ^ 0b11111111_11111111_11111111_11111111
 }

 /// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
@ -8611,7 +8587,7 @@ pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
-    transmute(a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111)
+    a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
 }

 /// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
@ -8620,7 +8596,7 @@ pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(_knot_mask32(a) & b)
+    _knot_mask32(a) & b
 }

 /// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
@ -8629,7 +8605,7 @@ pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(_knot_mask64(a) & b)
+    _knot_mask64(a) & b
 }

 /// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
@ -8638,7 +8614,7 @@ pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(a | b)
+    a | b
 }

 /// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
@ -8647,7 +8623,7 @@ pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(a | b)
+    a | b
 }

 /// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
@ -8656,7 +8632,7 @@ pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(a ^ b)
+    a ^ b
 }

 /// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
@ -8665,7 +8641,7 @@ pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(a ^ b)
+    a ^ b
 }

 /// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
@ -8674,7 +8650,7 @@ pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    transmute(_knot_mask32(a ^ b))
+    _knot_mask32(a ^ b)
 }

 /// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
@ -8683,7 +8659,7 @@ pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    transmute(_knot_mask64(a ^ b))
+    _knot_mask64(a ^ b)
 }

 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@ -2176,12 +2176,12 @@ mod tests {
        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
        let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
-        let e: u32x4 = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0));
+        let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
        assert_eq!(r, e);

        let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
        let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
-        let e2: u32x4 = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0));
+        let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
        assert_eq!(r2, e2);
    }

@ -2197,15 +2197,15 @@ mod tests {
        let d1 = !0u32; // a.extract(0) < d.extract(0)

        let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2221,15 +2221,15 @@ mod tests {
        let d1 = !0u32; // a.extract(0) <= d.extract(0)

        let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2245,15 +2245,15 @@ mod tests {
        let d1 = 0u32; // a.extract(0) > d.extract(0)

        let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2269,15 +2269,15 @@ mod tests {
        let d1 = 0u32; // a.extract(0) >= d.extract(0)

        let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2293,15 +2293,15 @@ mod tests {
        let d1 = !0u32; // a.extract(0) != d.extract(0)

        let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2322,15 +2322,15 @@ mod tests {
        let d1 = 0u32; // a.extract(0) >= d.extract(0)

        let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2351,15 +2351,15 @@ mod tests {
        let d1 = 0u32; // a.extract(0) > d.extract(0)

        let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2380,15 +2380,15 @@ mod tests {
        let d1 = !0u32; // a.extract(0) <= d.extract(0)

        let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2409,15 +2409,15 @@ mod tests {
        let d1 = !0u32; // a.extract(0) < d.extract(0)

        let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2433,15 +2433,15 @@ mod tests {
        let d1 = !0u32; // a.extract(0) ord d.extract(0)

        let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

@ -2457,15 +2457,15 @@ mod tests {
        let d1 = 0u32; // a.extract(0) unord d.extract(0)

        let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
        assert_eq!(rb, eb);

        let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
        assert_eq!(rc, ec);

        let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
        assert_eq!(rd, ed);
    }

--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@ -4255,7 +4255,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpeq_sd() {
        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4263,7 +4263,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmplt_sd() {
        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4271,7 +4271,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmple_sd() {
        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4279,7 +4279,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpgt_sd() {
        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4287,7 +4287,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpge_sd() {
        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4295,7 +4295,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpord_sd() {
        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4303,7 +4303,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpunord_sd() {
        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4311,7 +4311,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpneq_sd() {
        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4319,7 +4319,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpnlt_sd() {
        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4327,7 +4327,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpnle_sd() {
        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4335,7 +4335,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpngt_sd() {
        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
        assert_eq_m128i(r, e);
    }
@ -4343,7 +4343,7 @@ mod tests {
    #[simd_test(enable = "sse2")]
    unsafe fn test_mm_cmpnge_sd() {
        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
        let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
        assert_eq_m128i(r, e);
    }
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@ -201,7 +201,7 @@ pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
    static_assert_uimm_bits!(IMM8, 2);
-    transmute(simd_extract::<_, f32>(a, IMM8 as u32))
+    simd_extract::<_, f32>(a, IMM8 as u32).to_bits() as i32
 }

 /// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit
@ -1259,9 +1259,9 @@ mod tests {
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_extract_ps() {
        let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
-        let r: f32 = transmute(_mm_extract_ps::<1>(a));
+        let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
        assert_eq!(r, 1.0);
-        let r: f32 = transmute(_mm_extract_ps::<3>(a));
+        let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
        assert_eq!(r, 3.0);
    }

--- a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
@ -33,7 +33,7 @@ pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2usi))]
 pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
-    transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }

 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
@ -43,7 +43,7 @@ pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
 pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
-    transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }

 /// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@ -54,8 +54,7 @@ pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
 #[cfg_attr(test, assert_instr(vcvtsi2ss))]
 pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
    let b = b as f32;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }

 /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -66,8 +65,7 @@ pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
 #[cfg_attr(test, assert_instr(vcvtsi2sd))]
 pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
    let b = b as f64;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }

 /// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@ -78,8 +76,7 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
 #[cfg_attr(test, assert_instr(vcvtusi2ss))]
 pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
    let b = b as f32;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }

 /// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -90,8 +87,7 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
 #[cfg_attr(test, assert_instr(vcvtusi2sd))]
 pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
    let b = b as f64;
-    let r = simd_insert(a, 0, b);
-    transmute(r)
+    simd_insert(a, 0, b)
 }

 /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
@ -101,7 +97,7 @@ pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2si))]
 pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
-    transmute(vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }

 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@ -111,7 +107,7 @@ pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
 pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
-    transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+    vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }

 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
@ -121,7 +117,7 @@ pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2si))]
 pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
-    transmute(vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }

 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@ -131,7 +127,7 @@ pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vcvtss2usi))]
 pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
-    transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+    vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }

 /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -270,8 +266,7 @@ pub unsafe fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m
 pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
    static_assert_rounding!(ROUNDING);
    let a = a.as_f64x2();
-    let r = vcvtsd2si64(a, ROUNDING);
-    transmute(r)
+    vcvtsd2si64(a, ROUNDING)
 }

 /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -290,8 +285,7 @@ pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
 pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
    static_assert_rounding!(ROUNDING);
    let a = a.as_f64x2();
-    let r = vcvtsd2si64(a, ROUNDING);
-    transmute(r)
+    vcvtsd2si64(a, ROUNDING)
 }

 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
@ -310,8 +304,7 @@ pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
 pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
    static_assert_rounding!(ROUNDING);
    let a = a.as_f64x2();
-    let r = vcvtsd2usi64(a, ROUNDING);
-    transmute(r)
+    vcvtsd2usi64(a, ROUNDING)
 }

 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -330,8 +323,7 @@ pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
 pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
    static_assert_rounding!(ROUNDING);
    let a = a.as_f32x4();
-    let r = vcvtss2si64(a, ROUNDING);
-    transmute(r)
+    vcvtss2si64(a, ROUNDING)
 }

 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -350,8 +342,7 @@ pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
 pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
    static_assert_rounding!(ROUNDING);
    let a = a.as_f32x4();
-    let r = vcvtss2si64(a, ROUNDING);
-    transmute(r)
+    vcvtss2si64(a, ROUNDING)
 }

 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
@ -370,8 +361,7 @@ pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
 pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
    static_assert_rounding!(ROUNDING);
    let a = a.as_f32x4();
-    let r = vcvtss2usi64(a, ROUNDING);
-    transmute(r)
+    vcvtss2usi64(a, ROUNDING)
 }

 /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -385,8 +375,7 @@ pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
 pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
    static_assert_sae!(SAE);
    let a = a.as_f64x2();
-    let r = vcvtsd2si64(a, SAE);
-    transmute(r)
+    vcvtsd2si64(a, SAE)
 }

 /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -400,8 +389,7 @@ pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
 pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
    static_assert_sae!(SAE);
    let a = a.as_f64x2();
-    let r = vcvtsd2si64(a, SAE);
-    transmute(r)
+    vcvtsd2si64(a, SAE)
 }

 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@ -415,8 +403,7 @@ pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
 pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
    static_assert_sae!(SAE);
    let a = a.as_f64x2();
-    let r = vcvtsd2usi64(a, SAE);
-    transmute(r)
+    vcvtsd2usi64(a, SAE)
 }

 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -430,8 +417,7 @@ pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
 pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
    static_assert_sae!(SAE);
    let a = a.as_f32x4();
-    let r = vcvtss2si64(a, SAE);
-    transmute(r)
+    vcvtss2si64(a, SAE)
 }

 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -445,8 +431,7 @@ pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
 pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
    static_assert_sae!(SAE);
    let a = a.as_f32x4();
-    let r = vcvtss2si64(a, SAE);
-    transmute(r)
+    vcvtss2si64(a, SAE)
 }

 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@ -460,8 +445,7 @@ pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
 pub unsafe fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
    static_assert_sae!(SAE);
    let a = a.as_f32x4();
-    let r = vcvtss2usi64(a, SAE);
-    transmute(r)
+    vcvtss2usi64(a, SAE)
 }

 #[allow(improper_ctypes)]
--- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs
@ -49,11 +49,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
            ecx,
            edx,
        } = __cpuid(0);
-        let vendor_id: [[u8; 4]; 3] = [
-            mem::transmute(ebx),
-            mem::transmute(edx),
-            mem::transmute(ecx),
-        ];
+        let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()];
        let vendor_id: [u8; 12] = mem::transmute(vendor_id);
        (max_basic_leaf, vendor_id)
    };