Rollup merge of #151711 - folkertdev:stdarch-sync-2026-01-26, r=folkertdev

stdarch subtree update Subtree update of `stdarch` to 9ba0a3f392. Created using https://github.com/rust-lang/josh-sync. r? @ghost
2026-01-27 17:00:54 +01:00 · 2026-01-27 17:00:54 +01:00 · 7d11720fd3
commit 7d11720fd3
parent 84bb764741 fdad66a382
29 changed files with 7214 additions and 6731 deletions
--- a/library/core/src/lib.rs
+++ b/library/core/src/lib.rs
@ -182,6 +182,7 @@
 #![feature(staged_api)]
 #![feature(stmt_expr_attributes)]
 #![feature(strict_provenance_lints)]
+#![feature(target_feature_inline_always)]
 #![feature(trait_alias)]
 #![feature(transparent_unions)]
 #![feature(try_blocks)]
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@ -309,6 +309,7 @@
 #![feature(staged_api)]
 #![feature(stmt_expr_attributes)]
 #![feature(strict_provenance_lints)]
+#![feature(target_feature_inline_always)]
 #![feature(thread_local)]
 #![feature(try_blocks)]
 #![feature(try_trait_v2)]
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@ -40,7 +40,7 @@ case ${TARGET} in
 	export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
 	;;
    armv7-*eabihf | thumbv7-*eabihf)
-        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
+        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon,+fp16"
        ;;
    amdgcn-*)
        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-cpu=gfx1200"
--- a/library/stdarch/crates/core_arch/rustfmt.toml
+++ b/library/stdarch/crates/core_arch/rustfmt.toml
@ -1,3 +0,0 @@
-ignore = [
-    "src/simd.rs",
-]
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs
@ -13,194 +13,195 @@ use crate::core_arch::aarch64::*;
 use crate::core_arch::simd::*;
 use std::mem;
 use stdarch_test::simd_test;
+
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_s8() {
+fn test_vld1_s8() {
    let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
    let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr()));
+    let r = unsafe { i8x8::from(vld1_s8(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_s8() {
+fn test_vld1q_s8() {
    let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
    let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-    let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr()));
+    let r = unsafe { i8x16::from(vld1q_s8(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_s16() {
+fn test_vld1_s16() {
    let a: [i16; 5] = [0, 1, 2, 3, 4];
    let e = i16x4::new(1, 2, 3, 4);
-    let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr()));
+    let r = unsafe { i16x4::from(vld1_s16(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_s16() {
+fn test_vld1q_s16() {
    let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
    let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr()));
+    let r = unsafe { i16x8::from(vld1q_s16(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_s32() {
+fn test_vld1_s32() {
    let a: [i32; 3] = [0, 1, 2];
    let e = i32x2::new(1, 2);
-    let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr()));
+    let r = unsafe { i32x2::from(vld1_s32(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_s32() {
+fn test_vld1q_s32() {
    let a: [i32; 5] = [0, 1, 2, 3, 4];
    let e = i32x4::new(1, 2, 3, 4);
-    let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr()));
+    let r = unsafe { i32x4::from(vld1q_s32(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_s64() {
+fn test_vld1_s64() {
    let a: [i64; 2] = [0, 1];
    let e = i64x1::new(1);
-    let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr()));
+    let r = unsafe { i64x1::from(vld1_s64(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_s64() {
+fn test_vld1q_s64() {
    let a: [i64; 3] = [0, 1, 2];
    let e = i64x2::new(1, 2);
-    let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr()));
+    let r = unsafe { i64x2::from(vld1q_s64(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_u8() {
+fn test_vld1_u8() {
    let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
    let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr()));
+    let r = unsafe { u8x8::from(vld1_u8(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_u8() {
+fn test_vld1q_u8() {
    let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
    let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-    let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr()));
+    let r = unsafe { u8x16::from(vld1q_u8(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_u16() {
+fn test_vld1_u16() {
    let a: [u16; 5] = [0, 1, 2, 3, 4];
    let e = u16x4::new(1, 2, 3, 4);
-    let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr()));
+    let r = unsafe { u16x4::from(vld1_u16(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_u16() {
+fn test_vld1q_u16() {
    let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
    let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr()));
+    let r = unsafe { u16x8::from(vld1q_u16(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_u32() {
+fn test_vld1_u32() {
    let a: [u32; 3] = [0, 1, 2];
    let e = u32x2::new(1, 2);
-    let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr()));
+    let r = unsafe { u32x2::from(vld1_u32(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_u32() {
+fn test_vld1q_u32() {
    let a: [u32; 5] = [0, 1, 2, 3, 4];
    let e = u32x4::new(1, 2, 3, 4);
-    let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr()));
+    let r = unsafe { u32x4::from(vld1q_u32(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_u64() {
+fn test_vld1_u64() {
    let a: [u64; 2] = [0, 1];
    let e = u64x1::new(1);
-    let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr()));
+    let r = unsafe { u64x1::from(vld1_u64(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_u64() {
+fn test_vld1q_u64() {
    let a: [u64; 3] = [0, 1, 2];
    let e = u64x2::new(1, 2);
-    let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr()));
+    let r = unsafe { u64x2::from(vld1q_u64(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_p8() {
+fn test_vld1_p8() {
    let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
    let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr()));
+    let r = unsafe { u8x8::from(vld1_p8(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_p8() {
+fn test_vld1q_p8() {
    let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
    let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-    let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr()));
+    let r = unsafe { u8x16::from(vld1q_p8(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_p16() {
+fn test_vld1_p16() {
    let a: [p16; 5] = [0, 1, 2, 3, 4];
    let e = u16x4::new(1, 2, 3, 4);
-    let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr()));
+    let r = unsafe { u16x4::from(vld1_p16(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_p16() {
+fn test_vld1q_p16() {
    let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
    let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr()));
+    let r = unsafe { u16x8::from(vld1q_p16(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon,aes")]
-unsafe fn test_vld1_p64() {
+fn test_vld1_p64() {
    let a: [p64; 2] = [0, 1];
    let e = u64x1::new(1);
-    let r: u64x1 = transmute(vld1_p64(a[1..].as_ptr()));
+    let r = unsafe { u64x1::from(vld1_p64(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon,aes")]
-unsafe fn test_vld1q_p64() {
+fn test_vld1q_p64() {
    let a: [p64; 3] = [0, 1, 2];
    let e = u64x2::new(1, 2);
-    let r: u64x2 = transmute(vld1q_p64(a[1..].as_ptr()));
+    let r = unsafe { u64x2::from(vld1q_p64(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_f32() {
+fn test_vld1_f32() {
    let a: [f32; 3] = [0., 1., 2.];
    let e = f32x2::new(1., 2.);
-    let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr()));
+    let r = unsafe { f32x2::from(vld1_f32(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_f32() {
+fn test_vld1q_f32() {
    let a: [f32; 5] = [0., 1., 2., 3., 4.];
    let e = f32x4::new(1., 2., 3., 4.);
-    let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr()));
+    let r = unsafe { f32x4::from(vld1q_f32(a[1..].as_ptr())) };
    assert_eq!(r, e)
 }
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs
@ -14,11 +14,13 @@ use crate::core_arch::simd::*;
 use stdarch_test::simd_test;

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_s8() {
+fn test_vst1_s8() {
    let mut vals = [0_i8; 9];
    let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);

-    vst1_s8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_s8(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -32,11 +34,13 @@ unsafe fn test_vst1_s8() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_s8() {
+fn test_vst1q_s8() {
    let mut vals = [0_i8; 17];
    let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);

-    vst1q_s8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_s8(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -58,11 +62,13 @@ unsafe fn test_vst1q_s8() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_s16() {
+fn test_vst1_s16() {
    let mut vals = [0_i16; 5];
    let a = i16x4::new(1, 2, 3, 4);

-    vst1_s16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_s16(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -72,11 +78,13 @@ unsafe fn test_vst1_s16() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_s16() {
+fn test_vst1q_s16() {
    let mut vals = [0_i16; 9];
    let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);

-    vst1q_s16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_s16(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -90,11 +98,13 @@ unsafe fn test_vst1q_s16() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_s32() {
+fn test_vst1_s32() {
    let mut vals = [0_i32; 3];
    let a = i32x2::new(1, 2);

-    vst1_s32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_s32(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -102,11 +112,13 @@ unsafe fn test_vst1_s32() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_s32() {
+fn test_vst1q_s32() {
    let mut vals = [0_i32; 5];
    let a = i32x4::new(1, 2, 3, 4);

-    vst1q_s32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_s32(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -116,22 +128,26 @@ unsafe fn test_vst1q_s32() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_s64() {
+fn test_vst1_s64() {
    let mut vals = [0_i64; 2];
    let a = i64x1::new(1);

-    vst1_s64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_s64(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_s64() {
+fn test_vst1q_s64() {
    let mut vals = [0_i64; 3];
    let a = i64x2::new(1, 2);

-    vst1q_s64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_s64(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -139,11 +155,13 @@ unsafe fn test_vst1q_s64() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_u8() {
+fn test_vst1_u8() {
    let mut vals = [0_u8; 9];
    let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);

-    vst1_u8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_u8(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -157,11 +175,13 @@ unsafe fn test_vst1_u8() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_u8() {
+fn test_vst1q_u8() {
    let mut vals = [0_u8; 17];
    let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);

-    vst1q_u8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_u8(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -183,11 +203,13 @@ unsafe fn test_vst1q_u8() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_u16() {
+fn test_vst1_u16() {
    let mut vals = [0_u16; 5];
    let a = u16x4::new(1, 2, 3, 4);

-    vst1_u16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_u16(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -197,11 +219,13 @@ unsafe fn test_vst1_u16() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_u16() {
+fn test_vst1q_u16() {
    let mut vals = [0_u16; 9];
    let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);

-    vst1q_u16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_u16(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -215,11 +239,13 @@ unsafe fn test_vst1q_u16() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_u32() {
+fn test_vst1_u32() {
    let mut vals = [0_u32; 3];
    let a = u32x2::new(1, 2);

-    vst1_u32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_u32(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -227,11 +253,13 @@ unsafe fn test_vst1_u32() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_u32() {
+fn test_vst1q_u32() {
    let mut vals = [0_u32; 5];
    let a = u32x4::new(1, 2, 3, 4);

-    vst1q_u32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_u32(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -241,22 +269,26 @@ unsafe fn test_vst1q_u32() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_u64() {
+fn test_vst1_u64() {
    let mut vals = [0_u64; 2];
    let a = u64x1::new(1);

-    vst1_u64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_u64(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_u64() {
+fn test_vst1q_u64() {
    let mut vals = [0_u64; 3];
    let a = u64x2::new(1, 2);

-    vst1q_u64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_u64(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -264,11 +296,13 @@ unsafe fn test_vst1q_u64() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_p8() {
+fn test_vst1_p8() {
    let mut vals = [0_u8; 9];
    let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);

-    vst1_p8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_p8(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -282,11 +316,13 @@ unsafe fn test_vst1_p8() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_p8() {
+fn test_vst1q_p8() {
    let mut vals = [0_u8; 17];
    let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);

-    vst1q_p8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_p8(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -308,11 +344,13 @@ unsafe fn test_vst1q_p8() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_p16() {
+fn test_vst1_p16() {
    let mut vals = [0_u16; 5];
    let a = u16x4::new(1, 2, 3, 4);

-    vst1_p16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_p16(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -322,11 +360,13 @@ unsafe fn test_vst1_p16() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_p16() {
+fn test_vst1q_p16() {
    let mut vals = [0_u16; 9];
    let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);

-    vst1q_p16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_p16(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -340,22 +380,26 @@ unsafe fn test_vst1q_p16() {
 }

 #[simd_test(enable = "neon,aes")]
-unsafe fn test_vst1_p64() {
+fn test_vst1_p64() {
    let mut vals = [0_u64; 2];
    let a = u64x1::new(1);

-    vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_p64(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
 }

 #[simd_test(enable = "neon,aes")]
-unsafe fn test_vst1q_p64() {
+fn test_vst1q_p64() {
    let mut vals = [0_u64; 3];
    let a = u64x2::new(1, 2);

-    vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_p64(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0);
    assert_eq!(vals[1], 1);
@ -363,11 +407,13 @@ unsafe fn test_vst1q_p64() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_f32() {
+fn test_vst1_f32() {
    let mut vals = [0_f32; 3];
    let a = f32x2::new(1., 2.);

-    vst1_f32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_f32(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0.);
    assert_eq!(vals[1], 1.);
@ -375,11 +421,13 @@ unsafe fn test_vst1_f32() {
 }

 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_f32() {
+fn test_vst1q_f32() {
    let mut vals = [0_f32; 5];
    let a = f32x4::new(1., 2., 3., 4.);

-    vst1q_f32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_f32(vals[1..].as_mut_ptr(), a.into());
+    }

    assert_eq!(vals[0], 0.);
    assert_eq!(vals[1], 1.);
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
@ -21,19 +21,19 @@ macro_rules! test_vtbl {
    ) => {
        #[cfg(target_endian = "little")]
        #[simd_test(enable = "neon")]
-        unsafe fn $test_name() {
+        fn $test_name() {
            // create table as array, and transmute it to
            // arm's table type
-            let table: $table_t = mem::transmute([$($table_v),*]);
+            let table: $table_t = unsafe { mem::transmute([$($table_v),*]) };

            // For each control vector, perform a table lookup and
            // verify the result:
            $(
                {
-                    let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]);
-                    let result = $fn_id(table, mem::transmute(ctrl));
-                    let result: $ctrl_t = mem::transmute(result);
-                    let expected: $ctrl_t = mem::transmute([$($exp_v),*]);
+                    let ctrl: $ctrl_t = unsafe { mem::transmute([$($ctrl_v),*]) };
+                    let result = $fn_id(table, unsafe { mem::transmute(ctrl) });
+                    let result: $ctrl_t = unsafe { mem::transmute(result) };
+                    let expected: $ctrl_t = unsafe { mem::transmute([$($exp_v),*]) };
                    assert_eq!(result, expected);
                }
            )*
@ -171,20 +171,19 @@ macro_rules! test_vtbx {
    ) => {
        #[cfg(target_endian = "little")]
        #[simd_test(enable = "neon")]
-        unsafe fn $test_name() {
+        fn $test_name() {
            // create table as array, and transmute it to
            // arm's table type
-            let table: $table_t = mem::transmute([$($table_v),*]);
-            let ext: $ext_t = mem::transmute([$($ext_v),*]);
-
+            let table: $table_t = unsafe { mem::transmute([$($table_v),*]) };
+            let ext: $ext_t = unsafe { mem::transmute([$($ext_v),*]) };
            // For each control vector, perform a table lookup and
            // verify the result:
            $(
                {
-                    let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]);
-                    let result = $fn_id(ext, table, mem::transmute(ctrl));
-                    let result: $ctrl_t = mem::transmute(result);
-                    let expected: $ctrl_t = mem::transmute([$($exp_v),*]);
+                    let ctrl: $ctrl_t = unsafe { mem::transmute([$($ctrl_v),*]) };
+                    let result = $fn_id(ext, table, unsafe { mem::transmute(ctrl) });
+                    let result: $ctrl_t = unsafe { mem::transmute(result) };
+                    let expected: $ctrl_t = unsafe { mem::transmute([$($exp_v),*]) };
                    assert_eq!(result, expected);
                }
            )*
--- a/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/test_support.rs
@ -111,13 +111,13 @@ macro_rules! V_f32 {

 macro_rules! to64 {
    ($t : ident) => {
-        |v: $t| -> u64 { transmute(v) }
+        |v: $t| -> u64 { unsafe { transmute(v) } }
    };
 }

 macro_rules! to128 {
    ($t : ident) => {
-        |v: $t| -> u128 { transmute(v) }
+        |v: $t| -> u128 { unsafe { transmute(v) } }
    };
 }

@ -158,9 +158,7 @@ pub(crate) fn test<T, U, V, W, X>(
 macro_rules! gen_test_fn {
    ($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
        pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
-            unsafe {
-                test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
-            };
+            test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun);
        }
    };
 }
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@ -32,12 +32,14 @@
    x86_amx_intrinsics,
    f16,
    aarch64_unstable_target_feature,
+    target_feature_inline_always,
    bigint_helper_methods,
    funnel_shifts,
    avx10_target_feature,
    const_trait_impl,
    const_cmp,
-    const_eval_select
+    const_eval_select,
+    maybe_uninit_as_bytes
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
 #![deny(clippy::missing_inline_in_public_items)]
@ -87,4 +89,4 @@ pub mod arch {
 }

 #[allow(unused_imports)]
-use core::{array, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};
+use core::{array, cmp, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};
--- a/library/stdarch/crates/core_arch/src/macros.rs
+++ b/library/stdarch/crates/core_arch/src/macros.rs
@ -90,17 +90,10 @@ macro_rules! types {
        pub struct $name($v [$elem_type; $len]);

        impl $name {
-            /// Using `my_simd([x; N])` seemingly fails tests,
-            /// so use this internal helper for it instead.
+            /// Put the same value in every lane.
            #[inline(always)]
            $v fn splat(value: $elem_type) -> $name {
-                #[derive(Copy, Clone)]
-                #[repr(simd)]
-                struct JustOne([$elem_type; 1]);
-                let one = JustOne([value]);
-                // SAFETY: 0 is always in-bounds because we're shuffling
-                // a simd type with exactly one element.
-                unsafe { simd_shuffle!(one, one, [0; $len]) }
+                unsafe { $crate::intrinsics::simd::simd_splat(value) }
            }

            /// Returns an array reference containing the entire SIMD vector.
@ -135,6 +128,22 @@ macro_rules! types {
                crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
            }
        }
+
+        $(#[$stability])+
+        impl crate::convert::From<crate::core_arch::simd::Simd<$elem_type, $len>> for $name {
+            #[inline(always)]
+            fn from(simd: crate::core_arch::simd::Simd<$elem_type, $len>) -> Self {
+                unsafe { crate::mem::transmute(simd) }
+            }
+        }
+
+        $(#[$stability])+
+        impl crate::convert::From<$name> for crate::core_arch::simd::Simd<$elem_type, $len> {
+            #[inline(always)]
+            fn from(simd: $name) -> Self {
+                unsafe { crate::mem::transmute(simd) }
+            }
+        }
    )*);
 }

--- a/library/stdarch/crates/core_arch/src/nvptx/mod.rs
+++ b/library/stdarch/crates/core_arch/src/nvptx/mod.rs
@ -23,29 +23,29 @@ unsafe extern "C" {
    #[link_name = "llvm.nvvm.barrier0"]
    fn syncthreads() -> ();
    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
-    fn block_dim_x() -> i32;
+    fn block_dim_x() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
-    fn block_dim_y() -> i32;
+    fn block_dim_y() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
-    fn block_dim_z() -> i32;
+    fn block_dim_z() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
-    fn block_idx_x() -> i32;
+    fn block_idx_x() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
-    fn block_idx_y() -> i32;
+    fn block_idx_y() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
-    fn block_idx_z() -> i32;
+    fn block_idx_z() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
-    fn grid_dim_x() -> i32;
+    fn grid_dim_x() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
-    fn grid_dim_y() -> i32;
+    fn grid_dim_y() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
-    fn grid_dim_z() -> i32;
+    fn grid_dim_z() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
-    fn thread_idx_x() -> i32;
+    fn thread_idx_x() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
-    fn thread_idx_y() -> i32;
+    fn thread_idx_y() -> u32;
    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
-    fn thread_idx_z() -> i32;
+    fn thread_idx_z() -> u32;
 }

 /// Synchronizes all threads in the block.
@ -58,84 +58,84 @@ pub unsafe fn _syncthreads() -> () {
 /// x-th thread-block dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_dim_x() -> i32 {
+pub unsafe fn _block_dim_x() -> u32 {
    block_dim_x()
 }

 /// y-th thread-block dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_dim_y() -> i32 {
+pub unsafe fn _block_dim_y() -> u32 {
    block_dim_y()
 }

 /// z-th thread-block dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_dim_z() -> i32 {
+pub unsafe fn _block_dim_z() -> u32 {
    block_dim_z()
 }

 /// x-th thread-block index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_idx_x() -> i32 {
+pub unsafe fn _block_idx_x() -> u32 {
    block_idx_x()
 }

 /// y-th thread-block index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_idx_y() -> i32 {
+pub unsafe fn _block_idx_y() -> u32 {
    block_idx_y()
 }

 /// z-th thread-block index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_idx_z() -> i32 {
+pub unsafe fn _block_idx_z() -> u32 {
    block_idx_z()
 }

 /// x-th block-grid dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _grid_dim_x() -> i32 {
+pub unsafe fn _grid_dim_x() -> u32 {
    grid_dim_x()
 }

 /// y-th block-grid dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _grid_dim_y() -> i32 {
+pub unsafe fn _grid_dim_y() -> u32 {
    grid_dim_y()
 }

 /// z-th block-grid dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _grid_dim_z() -> i32 {
+pub unsafe fn _grid_dim_z() -> u32 {
    grid_dim_z()
 }

 /// x-th thread index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _thread_idx_x() -> i32 {
+pub unsafe fn _thread_idx_x() -> u32 {
    thread_idx_x()
 }

 /// y-th thread index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _thread_idx_y() -> i32 {
+pub unsafe fn _thread_idx_y() -> u32 {
    thread_idx_y()
 }

 /// z-th thread index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _thread_idx_z() -> i32 {
+pub unsafe fn _thread_idx_z() -> u32 {
    thread_idx_z()
 }

--- a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
@ -364,17 +364,46 @@ unsafe extern "C" {
    fn vrfin(a: vector_float) -> vector_float;
 }

-impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, f32x4 }
-
-impl_neg! { i8x16 : 0 }
-impl_neg! { i16x8 : 0 }
-impl_neg! { i32x4 : 0 }
-impl_neg! { f32x4 : 0f32 }
-
 #[macro_use]
 mod sealed {
    use super::*;

+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorNeg {
+        unsafe fn vec_neg(self) -> Self;
+    }
+
+    macro_rules! impl_neg {
+        ($($v:ty)*) => {
+            $(
+                #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+                impl VectorNeg for $v {
+                    #[inline]
+                    #[target_feature(enable = "altivec")]
+                    unsafe fn vec_neg(self) -> Self {
+                        simd_neg(self)
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_neg! {
+        vector_signed_char
+        vector_unsigned_char
+        vector_bool_char
+
+        vector_signed_short
+        vector_unsigned_short
+        vector_bool_short
+
+        vector_signed_int
+        vector_unsigned_int
+        vector_bool_int
+
+        vector_float
+    }
+
    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
    pub trait VectorInsert {
        type Scalar;
@ -1380,7 +1409,7 @@ mod sealed {
            #[inline]
            #[target_feature(enable = "altivec")]
            unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) {
-                v.vec_max(-v)
+                v.vec_max(simd_neg(v))
            }

            impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) }
@ -1428,7 +1457,7 @@ mod sealed {
    #[cfg_attr(test, assert_instr(vspltb, IMM4 = 15))]
    unsafe fn vspltb<const IMM4: u32>(a: vector_signed_char) -> vector_signed_char {
        static_assert_uimm_bits!(IMM4, 4);
-        simd_shuffle(a, a, const { u32x16::from_array([IMM4; 16]) })
+        simd_shuffle(a, a, const { u32x16::splat(IMM4) })
    }

    #[inline]
@ -1436,7 +1465,7 @@ mod sealed {
    #[cfg_attr(test, assert_instr(vsplth, IMM3 = 7))]
    unsafe fn vsplth<const IMM3: u32>(a: vector_signed_short) -> vector_signed_short {
        static_assert_uimm_bits!(IMM3, 3);
-        simd_shuffle(a, a, const { u32x8::from_array([IMM3; 8]) })
+        simd_shuffle(a, a, const { u32x8::splat(IMM3) })
    }

    #[inline]
@ -1445,7 +1474,7 @@ mod sealed {
    #[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxspltw, IMM2 = 3))]
    unsafe fn vspltw<const IMM2: u32>(a: vector_signed_int) -> vector_signed_int {
        static_assert_uimm_bits!(IMM2, 2);
-        simd_shuffle(a, a, const { u32x4::from_array([IMM2; 4]) })
+        simd_shuffle(a, a, const { u32x4::splat(IMM2) })
    }

    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
@ -4032,6 +4061,14 @@ pub unsafe fn vec_mfvscr() -> vector_unsigned_short {
    mfvscr()
 }

+/// Vector Negate
+#[inline]
+#[target_feature(enable = "altivec")]
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub unsafe fn vec_neg<T: sealed::VectorNeg>(a: T) -> T {
+    a.vec_neg()
+}
+
 /// Vector add.
 #[inline]
 #[target_feature(enable = "altivec")]
@ -4703,7 +4740,7 @@ mod tests {
        for off in 0..16 {
            let val: u8x16 = transmute(vec_xl(0, (pat.as_ptr() as *const u8).offset(off)));
            for i in 0..16 {
-                let v = val.extract(i);
+                let v = val.extract_dyn(i);
                assert_eq!(off as usize + i, v as usize);
            }
        }
@ -4758,7 +4795,7 @@ mod tests {
        )];
        for off in 0..16 {
            let v: u8x16 = transmute(vec_lde(off, pat.as_ptr() as *const u8));
-            assert_eq!(off as u8, v.extract(off as _));
+            assert_eq!(off as u8, v.extract_dyn(off as _));
        }
    }

@ -4767,7 +4804,7 @@ mod tests {
        let pat = [u16x8::new(0, 1, 2, 3, 4, 5, 6, 7)];
        for off in 0..8 {
            let v: u16x8 = transmute(vec_lde(off * 2, pat.as_ptr() as *const u16));
-            assert_eq!(off as u16, v.extract(off as _));
+            assert_eq!(off as u16, v.extract_dyn(off as _));
        }
    }

@ -4776,7 +4813,7 @@ mod tests {
        let pat = [u32x4::new(0, 1, 2, 3)];
        for off in 0..4 {
            let v: u32x4 = transmute(vec_lde(off * 4, pat.as_ptr() as *const u32));
-            assert_eq!(off as u32, v.extract(off as _));
+            assert_eq!(off as u32, v.extract_dyn(off as _));
        }
    }

--- a/library/stdarch/crates/core_arch/src/powerpc/macros.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/macros.rs
@ -274,40 +274,6 @@ macro_rules! t_b {
    };
 }

-macro_rules! impl_from {
-    ($s: ident) => {
-        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
-        impl From<$s> for s_t_l!($s) {
-            #[inline]
-            fn from (v: $s) -> Self {
-                unsafe {
-                    transmute(v)
-                }
-            }
-        }
-    };
-    ($($s: ident),*) => {
-        $(
-            impl_from! { $s }
-        )*
-    };
-}
-
-macro_rules! impl_neg {
-    ($s: ident : $zero: expr) => {
-        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
-        impl crate::ops::Neg for s_t_l!($s) {
-            type Output = s_t_l!($s);
-            #[inline]
-            fn neg(self) -> Self::Output {
-                unsafe { simd_neg(self) }
-            }
-        }
-    };
-}
-
-pub(crate) use impl_from;
-pub(crate) use impl_neg;
 pub(crate) use impl_vec_trait;
 pub(crate) use s_t_l;
 pub(crate) use t_b;
--- a/library/stdarch/crates/core_arch/src/s390x/macros.rs
+++ b/library/stdarch/crates/core_arch/src/s390x/macros.rs
@ -431,40 +431,6 @@ macro_rules! t_b {
    };
 }

-macro_rules! impl_from {
-    ($s: ident) => {
-        #[unstable(feature = "stdarch_s390x", issue = "135681")]
-        impl From<$s> for s_t_l!($s) {
-            #[inline]
-            fn from (v: $s) -> Self {
-                unsafe {
-                    transmute(v)
-                }
-            }
-        }
-    };
-    ($($s: ident),*) => {
-        $(
-            impl_from! { $s }
-        )*
-    };
-}
-
-macro_rules! impl_neg {
-    ($s: ident : $zero: expr) => {
-        #[unstable(feature = "stdarch_s390x", issue = "135681")]
-        impl crate::ops::Neg for s_t_l!($s) {
-            type Output = s_t_l!($s);
-            #[inline]
-            fn neg(self) -> Self::Output {
-                unsafe { simd_neg(self) }
-            }
-        }
-    };
-}
-
-pub(crate) use impl_from;
-pub(crate) use impl_neg;
 pub(crate) use impl_vec_trait;
 pub(crate) use l_t_t;
 pub(crate) use s_t_l;
--- a/library/stdarch/crates/core_arch/src/s390x/vector.rs
+++ b/library/stdarch/crates/core_arch/src/s390x/vector.rs
@ -281,17 +281,14 @@ unsafe extern "unadjusted" {
    #[link_name = "llvm.s390.vfenezbs"] fn vfenezbs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
    #[link_name = "llvm.s390.vfenezhs"] fn vfenezhs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
    #[link_name = "llvm.s390.vfenezfs"] fn vfenezfs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
+
+    #[link_name = "llvm.s390.vclfnhs"] fn vclfnhs(a: vector_signed_short, immarg: i32) -> vector_float;
+    #[link_name = "llvm.s390.vclfnls"] fn vclfnls(a: vector_signed_short, immarg: i32) -> vector_float;
+    #[link_name = "llvm.s390.vcfn"] fn vcfn(a: vector_signed_short, immarg: i32) -> vector_signed_short;
+    #[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short;
+    #[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short;
 }

-impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
-
-impl_neg! { i8x16 : 0 }
-impl_neg! { i16x8 : 0 }
-impl_neg! { i32x4 : 0 }
-impl_neg! { i64x2 : 0 }
-impl_neg! { f32x4 : 0f32 }
-impl_neg! { f64x2 : 0f64 }
-
 #[repr(simd)]
 struct ShuffleMask<const N: usize>([u32; N]);

@ -439,6 +436,43 @@ enum FindImm {
 mod sealed {
    use super::*;

+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorNeg {
+        unsafe fn vec_neg(self) -> Self;
+    }
+
+    macro_rules! impl_neg {
+        ($($v:ty)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorNeg for $v {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_neg(self) -> Self {
+                        simd_neg(self)
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_neg! {
+        vector_signed_char
+        vector_unsigned_char
+
+        vector_signed_short
+        vector_unsigned_short
+
+        vector_signed_int
+        vector_unsigned_int
+
+        vector_signed_long_long
+        vector_unsigned_long_long
+
+        vector_float
+        vector_double
+    }
+
    #[unstable(feature = "stdarch_s390x", issue = "135681")]
    pub trait VectorAdd<Other> {
        type Result;
@ -761,7 +795,7 @@ mod sealed {
            #[inline]
            #[target_feature(enable = "vector")]
            unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) {
-                v.vec_max(-v)
+                v.vec_max(simd_neg(v))
            }

            impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) }
@ -4055,6 +4089,14 @@ unsafe fn __lcbb<const BLOCK_BOUNDARY: u16>(ptr: *const u8) -> u32 {
    lcbb(ptr, const { validate_block_boundary(BLOCK_BOUNDARY) })
 }

+/// Vector Negate
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_neg<T: sealed::VectorNeg>(a: T) -> T {
+    a.vec_neg()
+}
+
 /// Vector Add
 #[inline]
 #[target_feature(enable = "vector")]
@ -5875,6 +5917,74 @@ pub unsafe fn vec_promote<T: sealed::VectorPromote>(a: T::ElementType, b: i32) -
    T::vec_promote(a, b)
 }

+/// Converts the left-most half of `a` to a vector of single-precision numbers.
+/// The format of the source vector elements is specified by `B`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[cfg_attr(test, assert_instr(vclfnh, B = 0))]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_extend_to_fp32_hi<const B: i32>(a: vector_signed_short) -> vector_float {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(B, 4);
+
+    vclfnhs(a, B)
+}
+
+/// Converts the right-most half of `a` to a vector of single-precision numbers.
+/// The format of the source vector elements is specified by `B`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[cfg_attr(test, assert_instr(vclfnl, B = 0))]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_extend_to_fp32_lo<const B: i32>(a: vector_signed_short) -> vector_float {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(B, 4);
+
+    vclfnls(a, B)
+}
+
+/// Converts the elements of vector `a` to the 16-bit IEEE floating point format.
+/// The format of the source vector elements is specified by `B`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[cfg_attr(test, assert_instr(vcfn, B = 0))]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_convert_to_fp16<const B: i32>(a: vector_signed_short) -> vector_signed_short {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(B, 4);
+
+    vcfn(a, B)
+}
+
+/// Converts the elements of vector `a` to an internal floating point format.
+/// The format of the target vector elements is specified by `B`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[cfg_attr(test, assert_instr(vcnf, B = 0))]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_convert_from_fp16<const B: i32>(a: vector_signed_short) -> vector_signed_short {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(B, 4);
+
+    vcnf(a, B)
+}
+
+/// Converts the elements of single-precision vectors `a` and `b` to an internal floating point
+/// format with 16-bit sized elements. The format of the target vector elements is specified by `C`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vcrnf, C = 0))]
+pub unsafe fn vec_round_from_fp32<const C: i32>(
+    a: vector_float,
+    b: vector_float,
+) -> vector_signed_short {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(C, 4);
+
+    vcrnfs(a, b, C)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/library/stdarch/crates/core_arch/src/simd.rs
+++ b/library/stdarch/crates/core_arch/src/simd.rs
@ -16,129 +16,216 @@ pub(crate) const unsafe fn simd_imin<T: Copy>(a: T, b: T) -> T {
    crate::intrinsics::simd::simd_select(mask, a, b)
 }

+/// SAFETY: All bits patterns must be valid
+pub(crate) unsafe trait SimdElement:
+    Copy + const PartialEq + crate::fmt::Debug
+{
+}
+
+unsafe impl SimdElement for u8 {}
+unsafe impl SimdElement for u16 {}
+unsafe impl SimdElement for u32 {}
+unsafe impl SimdElement for u64 {}
+
+unsafe impl SimdElement for i8 {}
+unsafe impl SimdElement for i16 {}
+unsafe impl SimdElement for i32 {}
+unsafe impl SimdElement for i64 {}
+
+unsafe impl SimdElement for f16 {}
+unsafe impl SimdElement for f32 {}
+unsafe impl SimdElement for f64 {}
+
+#[repr(simd)]
+#[derive(Copy)]
+pub(crate) struct Simd<T: SimdElement, const N: usize>([T; N]);
+
+impl<T: SimdElement, const N: usize> Simd<T, N> {
+    /// A value of this type where all elements are zeroed out.
+    // SAFETY: `T` implements `SimdElement`, so it is zeroable.
+    pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
+
+    #[inline(always)]
+    pub(crate) const fn from_array(elements: [T; N]) -> Self {
+        Self(elements)
+    }
+
+    #[inline]
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    pub(crate) const fn splat(value: T) -> Self {
+        unsafe { crate::intrinsics::simd::simd_splat(value) }
+    }
+
+    /// Extract the element at position `index`. Note that `index` is not a constant so this
+    /// operation is not efficient on most platforms. Use for testing only.
+    #[inline]
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    pub(crate) const fn extract_dyn(&self, index: usize) -> T {
+        assert!(index < N);
+        // SAFETY: self is a vector, T its element type.
+        unsafe { crate::intrinsics::simd::simd_extract_dyn(*self, index as u32) }
+    }
+
+    #[inline]
+    pub(crate) const fn as_array(&self) -> &[T; N] {
+        let simd_ptr: *const Self = self;
+        let array_ptr: *const [T; N] = simd_ptr.cast();
+        // SAFETY: We can always read the prefix of a simd type as an array.
+        // There might be more padding afterwards for some widths, but
+        // that's not a problem for reading less than that.
+        unsafe { &*array_ptr }
+    }
+}
+
+// `#[derive(Clone)]` causes ICE "Projecting into SIMD type core_arch::simd::Simd is banned by MCP#838"
+impl<T: SimdElement, const N: usize> Clone for Simd<T, N> {
+    #[inline]
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+impl<T: SimdElement, const N: usize> const crate::cmp::PartialEq for Simd<T, N> {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        self.as_array() == other.as_array()
+    }
+}
+
+impl<T: SimdElement, const N: usize> crate::fmt::Debug for Simd<T, N> {
+    #[inline]
+    fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+        debug_simd_finish(f, "Simd", self.as_array())
+    }
+}
+
+impl<const N: usize> Simd<f16, N> {
+    #[inline]
+    pub(crate) const fn to_bits(self) -> Simd<u16, N> {
+        assert!(size_of::<Self>() == size_of::<Simd<u16, N>>());
+        unsafe { crate::mem::transmute_copy(&self) }
+    }
+
+    #[inline]
+    pub(crate) const fn from_bits(bits: Simd<u16, N>) -> Self {
+        assert!(size_of::<Self>() == size_of::<Simd<u16, N>>());
+        unsafe { crate::mem::transmute_copy(&bits) }
+    }
+}
+
+impl<const N: usize> Simd<f32, N> {
+    #[inline]
+    pub(crate) const fn to_bits(self) -> Simd<u32, N> {
+        assert!(size_of::<Self>() == size_of::<Simd<u32, N>>());
+        unsafe { crate::mem::transmute_copy(&self) }
+    }
+
+    #[inline]
+    pub(crate) const fn from_bits(bits: Simd<u32, N>) -> Self {
+        assert!(size_of::<Self>() == size_of::<Simd<u32, N>>());
+        unsafe { crate::mem::transmute_copy(&bits) }
+    }
+}
+
+impl<const N: usize> Simd<f64, N> {
+    #[inline]
+    pub(crate) const fn to_bits(self) -> Simd<u64, N> {
+        assert!(size_of::<Self>() == size_of::<Simd<u64, N>>());
+        unsafe { crate::mem::transmute_copy(&self) }
+    }
+
+    #[inline]
+    pub(crate) const fn from_bits(bits: Simd<u64, N>) -> Self {
+        assert!(size_of::<Self>() == size_of::<Simd<u64, N>>());
+        unsafe { crate::mem::transmute_copy(&bits) }
+    }
+}
+
 macro_rules! simd_ty {
    ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
-        #[repr(simd)]
-        #[derive(Copy, Clone)]
-        pub(crate) struct $id([$elem_type; $len]);
+        pub(crate) type $id = Simd<$elem_type, $len>;

-        #[allow(clippy::use_self)]
        impl $id {
-            /// A value of this type where all elements are zeroed out.
-            pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
-
            #[inline(always)]
            pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
-                $id([$($param_name),*])
-            }
-            #[inline(always)]
-            pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self {
-                $id(elements)
-            }
-            // FIXME: Workaround rust@60637
-            #[inline(always)]
-            #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
-            pub(crate) const fn splat(value: $elem_type) -> Self {
-                #[derive(Copy, Clone)]
-                #[repr(simd)]
-                struct JustOne([$elem_type; 1]);
-                let one = JustOne([value]);
-                // SAFETY: 0 is always in-bounds because we're shuffling
-                // a simd type with exactly one element.
-                unsafe { simd_shuffle!(one, one, [0; $len]) }
-            }
-
-            /// Extract the element at position `index`.
-            /// `index` is not a constant so this is not efficient!
-            /// Use for testing only.
-            // FIXME: Workaround rust@60637
-            #[inline(always)]
-            pub(crate) const fn extract(&self, index: usize) -> $elem_type {
-                self.as_array()[index]
-            }
-
-            #[inline]
-            pub(crate) const fn as_array(&self) -> &[$elem_type; $len] {
-                let simd_ptr: *const Self = self;
-                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
-                // SAFETY: We can always read the prefix of a simd type as an array.
-                // There might be more padding afterwards for some widths, but
-                // that's not a problem for reading less than that.
-                unsafe { &*array_ptr }
-            }
-        }
-
-        #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
-        const impl core::cmp::PartialEq for $id {
-            #[inline]
-            fn eq(&self, other: &Self) -> bool {
-                self.as_array() == other.as_array()
-            }
-        }
-
-        impl core::fmt::Debug for $id {
-            #[inline]
-            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-                debug_simd_finish(f, stringify!($id), self.as_array())
+                Self([$($param_name),*])
            }
        }
    }
 }

+#[repr(simd)]
+#[derive(Copy)]
+pub(crate) struct SimdM<T: SimdElement, const N: usize>([T; N]);
+
+impl<T: SimdElement, const N: usize> SimdM<T, N> {
+    #[inline(always)]
+    const fn bool_to_internal(x: bool) -> T {
+        // SAFETY: `T` implements `SimdElement`, so all bit patterns are valid.
+        let zeros = const { unsafe { crate::mem::zeroed::<T>() } };
+        let ones = const {
+            // Ideally, this would be `transmute([0xFFu8; size_of::<T>()])`, but
+            // `size_of::<T>()` is not allowed to use a generic parameter there.
+            let mut r = crate::mem::MaybeUninit::<T>::uninit();
+            let mut i = 0;
+            while i < crate::mem::size_of::<T>() {
+                r.as_bytes_mut()[i] = crate::mem::MaybeUninit::new(0xFF);
+                i += 1;
+            }
+            unsafe { r.assume_init() }
+        };
+        [zeros, ones][x as usize]
+    }
+
+    #[inline]
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    pub(crate) const fn splat(value: bool) -> Self {
+        unsafe { crate::intrinsics::simd::simd_splat(value) }
+    }
+
+    #[inline]
+    pub(crate) const fn as_array(&self) -> &[T; N] {
+        let simd_ptr: *const Self = self;
+        let array_ptr: *const [T; N] = simd_ptr.cast();
+        // SAFETY: We can always read the prefix of a simd type as an array.
+        // There might be more padding afterwards for some widths, but
+        // that's not a problem for reading less than that.
+        unsafe { &*array_ptr }
+    }
+}
+
+// `#[derive(Clone)]` causes ICE "Projecting into SIMD type core_arch::simd::SimdM is banned by MCP#838"
+impl<T: SimdElement, const N: usize> Clone for SimdM<T, N> {
+    #[inline]
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+impl<T: SimdElement, const N: usize> const crate::cmp::PartialEq for SimdM<T, N> {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        self.as_array() == other.as_array()
+    }
+}
+
+impl<T: SimdElement, const N: usize> crate::fmt::Debug for SimdM<T, N> {
+    #[inline]
+    fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+        debug_simd_finish(f, "SimdM", self.as_array())
+    }
+}
+
 macro_rules! simd_m_ty {
    ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
-        #[repr(simd)]
-        #[derive(Copy, Clone)]
-        pub(crate) struct $id([$elem_type; $len]);
+        pub(crate) type $id  = SimdM<$elem_type, $len>;

-        #[allow(clippy::use_self)]
        impl $id {
-            #[inline(always)]
-            const fn bool_to_internal(x: bool) -> $elem_type {
-                [0 as $elem_type, !(0 as $elem_type)][x as usize]
-            }
-
            #[inline(always)]
            pub(crate) const fn new($($param_name: bool),*) -> Self {
-                $id([$(Self::bool_to_internal($param_name)),*])
-            }
-
-            // FIXME: Workaround rust@60637
-            #[inline(always)]
-            #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
-            pub(crate) const fn splat(value: bool) -> Self {
-                #[derive(Copy, Clone)]
-                #[repr(simd)]
-                struct JustOne([$elem_type; 1]);
-                let one = JustOne([Self::bool_to_internal(value)]);
-                // SAFETY: 0 is always in-bounds because we're shuffling
-                // a simd type with exactly one element.
-                unsafe { simd_shuffle!(one, one, [0; $len]) }
-            }
-
-            #[inline]
-            pub(crate) const fn as_array(&self) -> &[$elem_type; $len] {
-                let simd_ptr: *const Self = self;
-                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
-                // SAFETY: We can always read the prefix of a simd type as an array.
-                // There might be more padding afterwards for some widths, but
-                // that's not a problem for reading less than that.
-                unsafe { &*array_ptr }
-            }
-        }
-
-        #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
-        const impl core::cmp::PartialEq for $id {
-            #[inline]
-            fn eq(&self, other: &Self) -> bool {
-                self.as_array() == other.as_array()
-            }
-        }
-
-        impl core::fmt::Debug for $id {
-            #[inline]
-            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-                debug_simd_finish(f, stringify!($id), self.as_array())
+                Self([$(Self::bool_to_internal($param_name)),*])
            }
        }
    }
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@ -2746,7 +2746,7 @@ pub const fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_set1_pd(a: f64) -> __m256d {
-    _mm256_setr_pd(a, a, a, a)
+    f64x4::splat(a).as_m256d()
 }

 /// Broadcasts single-precision (32-bit) floating-point value `a` to all
@ -2759,7 +2759,7 @@ pub const fn _mm256_set1_pd(a: f64) -> __m256d {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_set1_ps(a: f32) -> __m256 {
-    _mm256_setr_ps(a, a, a, a, a, a, a, a)
+    f32x8::splat(a).as_m256()
 }

 /// Broadcasts 8-bit integer `a` to all elements of returned vector.
@ -2772,13 +2772,7 @@ pub const fn _mm256_set1_ps(a: f32) -> __m256 {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_set1_epi8(a: i8) -> __m256i {
-    #[rustfmt::skip]
-    _mm256_setr_epi8(
-        a, a, a, a, a, a, a, a,
-        a, a, a, a, a, a, a, a,
-        a, a, a, a, a, a, a, a,
-        a, a, a, a, a, a, a, a,
-    )
+    i8x32::splat(a).as_m256i()
 }

 /// Broadcasts 16-bit integer `a` to all elements of returned vector.
@ -2793,7 +2787,7 @@ pub const fn _mm256_set1_epi8(a: i8) -> __m256i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_set1_epi16(a: i16) -> __m256i {
-    _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
+    i16x16::splat(a).as_m256i()
 }

 /// Broadcasts 32-bit integer `a` to all elements of returned vector.
@ -2806,7 +2800,7 @@ pub const fn _mm256_set1_epi16(a: i16) -> __m256i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_set1_epi32(a: i32) -> __m256i {
-    _mm256_setr_epi32(a, a, a, a, a, a, a, a)
+    i32x8::splat(a).as_m256i()
 }

 /// Broadcasts 64-bit integer `a` to all elements of returned vector.
@ -2821,7 +2815,7 @@ pub const fn _mm256_set1_epi32(a: i32) -> __m256i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_set1_epi64x(a: i64) -> __m256i {
-    _mm256_setr_epi64x(a, a, a, a)
+    i64x4::splat(a).as_m256i()
 }

 /// Cast vector of type __m256d to type __m256.
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@ -932,7 +932,7 @@ pub const fn _mm_set_ss(a: f32) -> __m128 {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm_set1_ps(a: f32) -> __m128 {
-    __m128([a, a, a, a])
+    f32x4::splat(a).as_m128()
 }

 /// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html)
@ -2079,7 +2079,7 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
 #[cfg(test)]
 mod tests {
    use crate::core_arch::assert_eq_const as assert_eq;
-    use crate::{hint::black_box, mem::transmute, ptr};
+    use crate::{hint::black_box, ptr};
    use std::boxed;
    use stdarch_test::simd_test;

@ -2221,7 +2221,7 @@ mod tests {
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_min_ps() {
+    fn test_mm_min_ps() {
        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
        let r = _mm_min_ps(a, b);
@ -2234,10 +2234,10 @@ mod tests {
        // `r1` to `a` and `r2` to `b`.
        let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
        let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
-        let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
-        let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
-        let a: [u8; 16] = transmute(a);
-        let b: [u8; 16] = transmute(b);
+        let r1 = _mm_min_ps(a, b).as_f32x4().to_bits();
+        let r2 = _mm_min_ps(b, a).as_f32x4().to_bits();
+        let a = a.as_f32x4().to_bits();
+        let b = b.as_f32x4().to_bits();
        assert_eq!(r1, b);
        assert_eq!(r2, a);
        assert_ne!(a, b); // sanity check that -0.0 is actually present
@ -2252,7 +2252,7 @@ mod tests {
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_max_ps() {
+    fn test_mm_max_ps() {
        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
        let r = _mm_max_ps(a, b);
@ -2261,67 +2261,67 @@ mod tests {
        // Check SSE-specific semantics for -0.0 handling.
        let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
        let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
-        let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
-        let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
-        let a: [u8; 16] = transmute(a);
-        let b: [u8; 16] = transmute(b);
+        let r1 = _mm_max_ps(a, b).as_f32x4().to_bits();
+        let r2 = _mm_max_ps(b, a).as_f32x4().to_bits();
+        let a = a.as_f32x4().to_bits();
+        let b = b.as_f32x4().to_bits();
        assert_eq!(r1, b);
        assert_eq!(r2, a);
        assert_ne!(a, b); // sanity check that -0.0 is actually present
    }

    #[simd_test(enable = "sse")]
-    const unsafe fn test_mm_and_ps() {
-        let a = transmute(u32x4::splat(0b0011));
-        let b = transmute(u32x4::splat(0b0101));
+    const fn test_mm_and_ps() {
+        let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
+        let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
        let r = _mm_and_ps(*black_box(&a), *black_box(&b));
-        let e = transmute(u32x4::splat(0b0001));
+        let e = f32x4::from_bits(u32x4::splat(0b0001)).as_m128();
        assert_eq_m128(r, e);
    }

    #[simd_test(enable = "sse")]
-    const unsafe fn test_mm_andnot_ps() {
-        let a = transmute(u32x4::splat(0b0011));
-        let b = transmute(u32x4::splat(0b0101));
+    const fn test_mm_andnot_ps() {
+        let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
+        let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
        let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
-        let e = transmute(u32x4::splat(0b0100));
+        let e = f32x4::from_bits(u32x4::splat(0b0100)).as_m128();
        assert_eq_m128(r, e);
    }

    #[simd_test(enable = "sse")]
-    const unsafe fn test_mm_or_ps() {
-        let a = transmute(u32x4::splat(0b0011));
-        let b = transmute(u32x4::splat(0b0101));
+    const fn test_mm_or_ps() {
+        let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
+        let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
        let r = _mm_or_ps(*black_box(&a), *black_box(&b));
-        let e = transmute(u32x4::splat(0b0111));
+        let e = f32x4::from_bits(u32x4::splat(0b0111)).as_m128();
        assert_eq_m128(r, e);
    }

    #[simd_test(enable = "sse")]
-    const unsafe fn test_mm_xor_ps() {
-        let a = transmute(u32x4::splat(0b0011));
-        let b = transmute(u32x4::splat(0b0101));
+    const fn test_mm_xor_ps() {
+        let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
+        let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
        let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
-        let e = transmute(u32x4::splat(0b0110));
+        let e = f32x4::from_bits(u32x4::splat(0b0110)).as_m128();
        assert_eq_m128(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpeq_ss() {
+    fn test_mm_cmpeq_ss() {
        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
-        let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
-        let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
+        let r = _mm_cmpeq_ss(a, b).as_f32x4().to_bits();
+        let e = f32x4::new(f32::from_bits(0), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(r, e);

        let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-        let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
-        let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
+        let r2 = _mm_cmpeq_ss(a, b2).as_f32x4().to_bits();
+        let e2 = f32x4::new(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(r2, e2);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmplt_ss() {
+    fn test_mm_cmplt_ss() {
        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2331,21 +2331,21 @@ mod tests {
        let c1 = 0u32; // a.extract(0) < c.extract(0)
        let d1 = !0u32; // a.extract(0) < d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmplt_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmplt_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmplt_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmple_ss() {
+    fn test_mm_cmple_ss() {
        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2355,21 +2355,21 @@ mod tests {
        let c1 = !0u32; // a.extract(0) <= c.extract(0)
        let d1 = !0u32; // a.extract(0) <= d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmple_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmple_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmple_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpgt_ss() {
+    fn test_mm_cmpgt_ss() {
        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2379,21 +2379,21 @@ mod tests {
        let c1 = 0u32; // a.extract(0) > c.extract(0)
        let d1 = 0u32; // a.extract(0) > d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpgt_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpgt_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpgt_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpge_ss() {
+    fn test_mm_cmpge_ss() {
        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2403,21 +2403,21 @@ mod tests {
        let c1 = !0u32; // a.extract(0) >= c.extract(0)
        let d1 = 0u32; // a.extract(0) >= d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpge_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpge_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpge_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpneq_ss() {
+    fn test_mm_cmpneq_ss() {
        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2427,21 +2427,21 @@ mod tests {
        let c1 = 0u32; // a.extract(0) != c.extract(0)
        let d1 = !0u32; // a.extract(0) != d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpneq_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpneq_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpneq_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnlt_ss() {
+    fn test_mm_cmpnlt_ss() {
        // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there
        // must be a difference. It may have to do with behavior in the
        // presence of NaNs (signaling or quiet). If so, we should add tests
@ -2456,21 +2456,21 @@ mod tests {
        let c1 = !0u32; // a.extract(0) >= c.extract(0)
        let d1 = 0u32; // a.extract(0) >= d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpnlt_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpnlt_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpnlt_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnle_ss() {
+    fn test_mm_cmpnle_ss() {
        // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there
        // must be a difference. It may have to do with behavior in the
        // presence
@ -2485,21 +2485,21 @@ mod tests {
        let c1 = 0u32; // a.extract(0) > c.extract(0)
        let d1 = 0u32; // a.extract(0) > d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpnle_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpnle_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpnle_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpngt_ss() {
+    fn test_mm_cmpngt_ss() {
        // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there
        // must be a difference. It may have to do with behavior in the
        // presence of NaNs (signaling or quiet). If so, we should add tests
@ -2514,21 +2514,21 @@ mod tests {
        let c1 = !0u32; // a.extract(0) <= c.extract(0)
        let d1 = !0u32; // a.extract(0) <= d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpngt_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpngt_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpngt_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnge_ss() {
+    fn test_mm_cmpnge_ss() {
        // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there
        // must be a difference. It may have to do with behavior in the
        // presence of NaNs (signaling or quiet). If so, we should add tests
@ -2543,21 +2543,21 @@ mod tests {
        let c1 = 0u32; // a.extract(0) < c.extract(0)
        let d1 = !0u32; // a.extract(0) < d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpnge_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpnge_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpnge_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpord_ss() {
+    fn test_mm_cmpord_ss() {
        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
        let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
@ -2567,21 +2567,21 @@ mod tests {
        let c1 = 0u32; // a.extract(0) ord c.extract(0)
        let d1 = !0u32; // a.extract(0) ord d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpord_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpord_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpord_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpunord_ss() {
+    fn test_mm_cmpunord_ss() {
        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
        let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
@ -2591,160 +2591,160 @@ mod tests {
        let c1 = !0u32; // a.extract(0) unord c.extract(0)
        let d1 = 0u32; // a.extract(0) unord d.extract(0)

-        let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpunord_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rb, eb);

-        let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpunord_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rc, ec);

-        let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpunord_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
        assert_eq!(rd, ed);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpeq_ps() {
+    fn test_mm_cmpeq_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(fls, fls, tru, fls);
-        let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
+        let r = _mm_cmpeq_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmplt_ps() {
+    fn test_mm_cmplt_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(tru, fls, fls, fls);
-        let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
+        let r = _mm_cmplt_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmple_ps() {
+    fn test_mm_cmple_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(tru, fls, tru, fls);
-        let r: u32x4 = transmute(_mm_cmple_ps(a, b));
+        let r = _mm_cmple_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpgt_ps() {
+    fn test_mm_cmpgt_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(fls, tru, fls, fls);
-        let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
+        let r = _mm_cmpgt_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpge_ps() {
+    fn test_mm_cmpge_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(fls, tru, tru, fls);
-        let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
+        let r = _mm_cmpge_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpneq_ps() {
+    fn test_mm_cmpneq_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(tru, tru, fls, tru);
-        let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
+        let r = _mm_cmpneq_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnlt_ps() {
+    fn test_mm_cmpnlt_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(fls, tru, tru, tru);
-        let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
+        let r = _mm_cmpnlt_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnle_ps() {
+    fn test_mm_cmpnle_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(fls, tru, fls, tru);
-        let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
+        let r = _mm_cmpnle_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpngt_ps() {
+    fn test_mm_cmpngt_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(tru, fls, tru, tru);
-        let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
+        let r = _mm_cmpngt_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnge_ps() {
+    fn test_mm_cmpnge_ps() {
        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(tru, fls, fls, tru);
-        let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
+        let r = _mm_cmpnge_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpord_ps() {
+    fn test_mm_cmpord_ps() {
        let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
        let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(tru, fls, fls, fls);
-        let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
+        let r = _mm_cmpord_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpunord_ps() {
+    fn test_mm_cmpunord_ps() {
        let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
        let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
        let tru = !0u32;
        let fls = 0u32;

        let e = u32x4::new(fls, tru, tru, tru);
-        let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
+        let r = _mm_cmpunord_ps(a, b).as_f32x4().to_bits();
        assert_eq!(r, e);
    }

--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@ -1176,7 +1176,7 @@ pub const fn _mm_set_epi8(
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
-    _mm_set_epi64x(a, a)
+    i64x2::splat(a).as_m128i()
 }

 /// Broadcasts 32-bit integer `a` to all elements.
@ -1188,7 +1188,7 @@ pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm_set1_epi32(a: i32) -> __m128i {
-    _mm_set_epi32(a, a, a, a)
+    i32x4::splat(a).as_m128i()
 }

 /// Broadcasts 16-bit integer `a` to all elements.
@ -1200,7 +1200,7 @@ pub const fn _mm_set1_epi32(a: i32) -> __m128i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm_set1_epi16(a: i16) -> __m128i {
-    _mm_set_epi16(a, a, a, a, a, a, a, a)
+    i16x8::splat(a).as_m128i()
 }

 /// Broadcasts 8-bit integer `a` to all elements.
@ -1212,7 +1212,7 @@ pub const fn _mm_set1_epi16(a: i16) -> __m128i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm_set1_epi8(a: i8) -> __m128i {
-    _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
+    i8x16::splat(a).as_m128i()
 }

 /// Sets packed 32-bit integers with the supplied values in reverse order.
@ -3280,11 +3280,7 @@ mod tests {
        core_arch::{simd::*, x86::*},
        hint::black_box,
    };
-    use std::{
-        boxed, f32, f64,
-        mem::{self, transmute},
-        ptr,
-    };
+    use std::{boxed, f32, f64, mem, ptr};
    use stdarch_test::simd_test;

    const NAN: f64 = f64::NAN;
@ -4593,38 +4589,38 @@ mod tests {
    }

    #[simd_test(enable = "sse2")]
-    const unsafe fn test_mm_and_pd() {
-        let a = transmute(u64x2::splat(5));
-        let b = transmute(u64x2::splat(3));
+    const fn test_mm_and_pd() {
+        let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
+        let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
        let r = _mm_and_pd(a, b);
-        let e = transmute(u64x2::splat(1));
+        let e = f64x2::from_bits(u64x2::splat(1)).as_m128d();
        assert_eq_m128d(r, e);
    }

    #[simd_test(enable = "sse2")]
-    const unsafe fn test_mm_andnot_pd() {
-        let a = transmute(u64x2::splat(5));
-        let b = transmute(u64x2::splat(3));
+    const fn test_mm_andnot_pd() {
+        let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
+        let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
        let r = _mm_andnot_pd(a, b);
-        let e = transmute(u64x2::splat(2));
+        let e = f64x2::from_bits(u64x2::splat(2)).as_m128d();
        assert_eq_m128d(r, e);
    }

    #[simd_test(enable = "sse2")]
-    const unsafe fn test_mm_or_pd() {
-        let a = transmute(u64x2::splat(5));
-        let b = transmute(u64x2::splat(3));
+    const fn test_mm_or_pd() {
+        let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
+        let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
        let r = _mm_or_pd(a, b);
-        let e = transmute(u64x2::splat(7));
+        let e = f64x2::from_bits(u64x2::splat(7)).as_m128d();
        assert_eq_m128d(r, e);
    }

    #[simd_test(enable = "sse2")]
-    const unsafe fn test_mm_xor_pd() {
-        let a = transmute(u64x2::splat(5));
-        let b = transmute(u64x2::splat(3));
+    const fn test_mm_xor_pd() {
+        let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
+        let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
        let r = _mm_xor_pd(a, b);
-        let e = transmute(u64x2::splat(6));
+        let e = f64x2::from_bits(u64x2::splat(6)).as_m128d();
        assert_eq_m128d(r, e);
    }

--- a/library/stdarch/crates/core_arch/src/x86/test.rs
+++ b/library/stdarch/crates/core_arch/src/x86/test.rs
@ -78,38 +78,45 @@ pub(crate) const fn assert_eq_m512h(a: __m512h, b: __m512h) {
 }

 #[target_feature(enable = "sse2")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(crate) const fn get_m128d(a: __m128d, idx: usize) -> f64 {
-    a.as_f64x2().extract(idx)
+    a.as_f64x2().extract_dyn(idx)
 }

 #[target_feature(enable = "sse")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(crate) const fn get_m128(a: __m128, idx: usize) -> f32 {
-    a.as_f32x4().extract(idx)
+    a.as_f32x4().extract_dyn(idx)
 }

 #[target_feature(enable = "avx")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(crate) const fn get_m256d(a: __m256d, idx: usize) -> f64 {
-    a.as_f64x4().extract(idx)
+    a.as_f64x4().extract_dyn(idx)
 }

 #[target_feature(enable = "avx")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(crate) const fn get_m256(a: __m256, idx: usize) -> f32 {
-    a.as_f32x8().extract(idx)
+    a.as_f32x8().extract_dyn(idx)
 }

 #[target_feature(enable = "avx512f")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(crate) const fn get_m512(a: __m512, idx: usize) -> f32 {
-    a.as_f32x16().extract(idx)
+    a.as_f32x16().extract_dyn(idx)
 }

 #[target_feature(enable = "avx512f")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(crate) const fn get_m512d(a: __m512d, idx: usize) -> f64 {
-    a.as_f64x8().extract(idx)
+    a.as_f64x8().extract_dyn(idx)
 }

 #[target_feature(enable = "avx512f")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(crate) const fn get_m512i(a: __m512i, idx: usize) -> i64 {
-    a.as_i64x8().extract(idx)
+    a.as_i64x8().extract_dyn(idx)
 }

 // not actually an intrinsic but useful in various tests as we ported from
--- a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt
+++ b/library/stdarch/crates/intrinsic-test/missing_aarch64.txt
@ -59,6 +59,3 @@ vluti4q_laneq_u8

 # Broken in Clang
 vcvth_s16_f16
-# FIXME: Broken output due to missing f16 printing support in Rust, see git blame for this line
-vmulh_lane_f16
-vmulh_laneq_f16
--- a/library/stdarch/crates/intrinsic-test/missing_aarch64_be.txt
+++ b/library/stdarch/crates/intrinsic-test/missing_aarch64_be.txt
@ -100,6 +100,3 @@ vluti4q_laneq_u8

 # Broken in Clang
 vcvth_s16_f16
-# FIXME: Broken output due to missing f16 printing support in Rust
-vmulh_lane_f16
-vmulh_laneq_f16
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@ -17,6 +17,15 @@ pub fn simd_test(
    item: proc_macro::TokenStream,
 ) -> proc_macro::TokenStream {
    let tokens = TokenStream::from(attr).into_iter().collect::<Vec<_>>();
+
+    let target = env::var("TARGET").expect(
+        "TARGET environment variable should be set for rustc (e.g. TARGET=x86_64-apple-darwin cargo test)"
+    );
+    let target_arch = target
+        .split('-')
+        .next()
+        .unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"));
+
    let (target_features, target_feature_attr) = match &tokens[..] {
        [] => (Vec::new(), TokenStream::new()),
        [
@ -24,13 +33,20 @@ pub fn simd_test(
            TokenTree::Punct(equals),
            TokenTree::Literal(literal),
        ] if enable == "enable" && equals.as_char() == '=' => {
-            let enable_feature = literal.to_string();
-            let enable_feature = enable_feature.trim_start_matches('"').trim_end_matches('"');
+            let mut enable_feature = literal
+                .to_string()
+                .trim_start_matches('"')
+                .trim_end_matches('"')
+                .to_string();
            let target_features: Vec<_> = enable_feature
                .replace('+', "")
                .split(',')
                .map(String::from)
                .collect();
+            // Allows using `#[simd_test(enable = "neon")]` on aarch64/armv7 shared tests.
+            if target_arch == "armv7" && target_features.iter().any(|feat| feat == "neon") {
+                enable_feature.push_str(",v7");
+            }

            (
                target_features,
@ -46,14 +62,7 @@ pub fn simd_test(
    let item_attrs = std::mem::take(&mut item.attrs);
    let name = &item.sig.ident;

-    let target = env::var("TARGET").expect(
-        "TARGET environment variable should be set for rustc (e.g. TARGET=x86_64-apple-darwin cargo test)"
-    );
-    let macro_test = match target
-        .split('-')
-        .next()
-        .unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"))
-    {
+    let macro_test = match target_arch {
        "i686" | "x86_64" | "i586" => "is_x86_feature_detected",
        "arm" | "armv7" | "thumbv7neon" => "is_arm_feature_detected",
        "aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected",
@ -85,10 +94,20 @@ pub fn simd_test(

    let mut detect_missing_features = TokenStream::new();
    for feature in target_features {
-        let q = quote_spanned! {
-            proc_macro2::Span::call_site() =>
-            if !::std::arch::#macro_test!(#feature) {
-                missing_features.push(#feature);
+        let q = if target_arch == "armv7" && feature == "fp16" {
+            // "fp16" cannot be checked at runtime
+            quote_spanned! {
+                proc_macro2::Span::call_site() =>
+                if !cfg!(target_feature = #feature) {
+                    missing_features.push(#feature);
+                }
+            }
+        } else {
+            quote_spanned! {
+                proc_macro2::Span::call_site() =>
+                if !::std::arch::#macro_test!(#feature) {
+                    missing_features.push(#feature);
+                }
            }
        };
        q.to_tokens(&mut detect_missing_features);
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@ -63,8 +63,8 @@ neon-unstable-f16: &neon-unstable-f16
 neon-unstable-feat-lut: &neon-unstable-feat-lut
  FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']]

-aarch64-unstable-jscvt: &aarch64-unstable-jscvt
-  FnCall: [unstable, ['feature = "stdarch_aarch64_jscvt"', 'issue = "147555"']]
+aarch64-stable-jscvt: &aarch64-stable-jscvt
+  FnCall: [stable, ['feature = "stdarch_aarch64_jscvt"', 'since = "CURRENT_RUSTC_VERSION"']]

 # #[cfg(target_endian = "little")]
 little-endian: &little-endian
@ -14275,7 +14275,7 @@ intrinsics:
    attr:
      - FnCall: [target_feature, ['enable = "jsconv"']]
      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["fjcvtzs"]] }]]
-      - *aarch64-unstable-jscvt
+      - *aarch64-stable-jscvt
    safety: safe
    types:
      - f64
--- a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
@ -1736,7 +1736,7 @@ fn create_tokens(intrinsic: &Intrinsic, endianness: Endianness, tokens: &mut Tok
        );
    }

-    tokens.append_all(quote! { #[inline] });
+    tokens.append_all(quote! { #[inline(always)] });

    match endianness {
        Endianness::Little => tokens.append_all(quote! { #[cfg(target_endian = "little")] }),
--- a/library/stdarch/rust-version
+++ b/library/stdarch/rust-version
@ -1 +1 @@
-48622726c4a91c87bf6cd4dbe1000c95df59906e
+873d4682c7d285540b8f28bfe637006cef8918a6