Rollup merge of #143642 - Kobzol:stdarch-push, r=Amanieu

stdarch subtree update Subtree update of `stdarch` to b262a9af85. Created using https://github.com/rust-lang/josh-sync. r? ```@ghost```
2025-07-10 20:20:38 -04:00 · 2025-07-10 20:20:38 -04:00 · a5aff96d4d
commit a5aff96d4d
parent 6debe03634 a310bdd0d8
31 changed files with 243 additions and 455 deletions
--- a/library/stdarch/Cargo.lock
+++ b/library/stdarch/Cargo.lock
@ -147,7 +147,6 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
 name = "core_arch"
 version = "0.1.5"
 dependencies = [
- "std_detect",
 "stdarch-test",
 "syscalls",
 ]
--- a/library/stdarch/README.md
+++ b/library/stdarch/README.md
@ -16,3 +16,9 @@ This repository contains two main crates:

 The `std::simd` component now lives in the
 [`packed_simd_2`](https://github.com/rust-lang/packed_simd) crate.
+
+## Synchronizing josh subtree with rustc
+
+This repository is linked to `rust-lang/rust` as a [josh](https://josh-project.github.io/josh/intro.html) subtree. You can use the [rustc-josh-sync](https://github.com/rust-lang/josh-sync) tool to perform synchronization.
+
+You can find a guide on how to perform the synchronization [here](https://rustc-dev-guide.rust-lang.org/external-repos.html#synchronizing-a-josh-subtree).
--- a/library/stdarch/crates/core_arch/Cargo.toml
+++ b/library/stdarch/crates/core_arch/Cargo.toml
@ -22,7 +22,6 @@ maintenance = { status = "experimental" }

 [dev-dependencies]
 stdarch-test = { version = "0.*", path = "../stdarch-test" }
-std_detect = { version = "0.*", path = "../std_detect" }

 [target.'cfg(all(target_arch = "x86_64", target_os = "linux"))'.dev-dependencies]
 syscalls = { version = "0.6.18", default-features = false }
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@ -7925,7 +7925,7 @@ pub fn vcvth_f16_u64(a: u64) -> f16 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 pub fn vcvth_n_f16_s16<const N: i32>(a: i16) -> f16 {
    static_assert!(N >= 1 && N <= 16);
-    vcvth_n_f16_s32::<N>(a as i32) as f16
+    vcvth_n_f16_s32::<N>(a as i32)
 }
 #[doc = "Fixed-point convert to floating-point"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_s32)"]
@ -7972,7 +7972,7 @@ pub fn vcvth_n_f16_s64<const N: i32>(a: i64) -> f16 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 pub fn vcvth_n_f16_u16<const N: i32>(a: u16) -> f16 {
    static_assert!(N >= 1 && N <= 16);
-    vcvth_n_f16_u32::<N>(a as u32) as f16
+    vcvth_n_f16_u32::<N>(a as u32)
 }
 #[doc = "Fixed-point convert to floating-point"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_u32)"]
@ -17158,7 +17158,7 @@ pub fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
    let x: i64 = vqaddd_s64(a, vqdmulls_s32(b, c));
-    x as i64
+    x
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s16)"]
@ -17324,7 +17324,7 @@ pub fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
    let x: i64 = vqsubd_s64(a, vqdmulls_s32(b, c));
-    x as i64
+    x
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s16)"]
@ -19495,10 +19495,7 @@ pub fn vqtbl1q_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let x = transmute(vqtbl1(transmute(a), b));
-        x
-    }
+    unsafe { transmute(vqtbl1(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"]
@ -19507,10 +19504,7 @@ pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let x = transmute(vqtbl1q(transmute(a), b));
-        x
-    }
+    unsafe { transmute(vqtbl1q(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"]
@ -19519,10 +19513,7 @@ pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        let x = transmute(vqtbl1(transmute(a), b));
-        x
-    }
+    unsafe { transmute(vqtbl1(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"]
@ -19531,10 +19522,7 @@ pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t {
-    unsafe {
-        let x = transmute(vqtbl1q(transmute(a), b));
-        x
-    }
+    unsafe { transmute(vqtbl1q(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2)"]
@ -20397,10 +20385,7 @@ pub fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let x = transmute(vqtbx1(transmute(a), transmute(b), c));
-        x
-    }
+    unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_u8)"]
@ -20409,10 +20394,7 @@ pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let x = transmute(vqtbx1q(transmute(a), transmute(b), c));
-        x
-    }
+    unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_p8)"]
@ -20421,10 +20403,7 @@ pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        let x = transmute(vqtbx1(transmute(a), transmute(b), c));
-        x
-    }
+    unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_p8)"]
@ -20433,10 +20412,7 @@ pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbx1q_p8(a: poly8x16_t, b: poly8x16_t, c: uint8x16_t) -> poly8x16_t {
-    unsafe {
-        let x = transmute(vqtbx1q(transmute(a), transmute(b), c));
-        x
-    }
+    unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2)"]
@ -23785,14 +23761,7 @@ pub fn vrndph_f16(a: f16) -> f16 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v4f16"
-        )]
-        fn _vrndx_f16(a: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vrndx_f16(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"]
@ -23801,14 +23770,7 @@ pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v8f16"
-        )]
-        fn _vrndxq_f16(a: float16x8_t) -> float16x8_t;
-    }
-    unsafe { _vrndxq_f16(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"]
@ -23817,14 +23779,7 @@ pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndx_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v2f32"
-        )]
-        fn _vrndx_f32(a: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vrndx_f32(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"]
@ -23833,14 +23788,7 @@ pub fn vrndx_f32(a: float32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v4f32"
-        )]
-        fn _vrndxq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vrndxq_f32(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"]
@ -23849,14 +23797,7 @@ pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndx_f64(a: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v1f64"
-        )]
-        fn _vrndx_f64(a: float64x1_t) -> float64x1_t;
-    }
-    unsafe { _vrndx_f64(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"]
@ -23865,14 +23806,7 @@ pub fn vrndx_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v2f64"
-        )]
-        fn _vrndxq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    unsafe { _vrndxq_f64(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"]
@ -24082,7 +24016,6 @@ pub fn vrsqrtes_f32(a: f32) -> f32 {
 #[doc = "Reciprocal square-root estimate."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
--- a/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
@ -20,10 +20,10 @@
 //! Section 10.1 of ACLE says:
 //!
 //! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes
-//! its predecessor instruction set."
+//!   its predecessor's instruction set."
 //!
 //! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes
-//! its predecessor instruction set."
+//!   its predecessor's instruction set."
 //!
 //! From that info and from looking at how LLVM features work (using custom targets) we can identify
 //! features that are subsets of others:
@ -38,7 +38,7 @@
 //! *NOTE*: Section 5.4.7 of ACLE says:
 //!
 //! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the
-//! intrinsics defined in Saturating intrinsics are available."
+//!   intrinsics defined in Saturating intrinsics are available."
 //!
 //! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te
 //! targets so we have to work around this difference.
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@ -40758,16 +40758,7 @@ pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
    }
-    unsafe {
-        _vqshlu_n_s8(
-            a,
-            const {
-                int8x8_t([
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
@ -40783,17 +40774,7 @@ pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
    }
-    unsafe {
-        _vqshluq_n_s8(
-            a,
-            const {
-                int8x16_t([
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
@ -40809,12 +40790,7 @@ pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
    }
-    unsafe {
-        _vqshlu_n_s16(
-            a,
-            const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
-        )
-    }
+    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
@ -40830,16 +40806,7 @@ pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
    }
-    unsafe {
-        _vqshluq_n_s16(
-            a,
-            const {
-                int16x8_t([
-                    N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
@ -40855,7 +40822,7 @@ pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
    }
-    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }) }
+    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
@ -40871,12 +40838,7 @@ pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
    }
-    unsafe {
-        _vqshluq_n_s32(
-            a,
-            const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
-        )
-    }
+    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
@ -40908,7 +40870,7 @@ pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
    }
-    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }) }
+    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
@ -40927,16 +40889,7 @@ pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
        )]
        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
    }
-    unsafe {
-        _vqshlu_n_s8(
-            a,
-            const {
-                int8x8_t([
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
@ -40955,17 +40908,7 @@ pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
        )]
        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
    }
-    unsafe {
-        _vqshluq_n_s8(
-            a,
-            const {
-                int8x16_t([
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
@ -40984,12 +40927,7 @@ pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
        )]
        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
    }
-    unsafe {
-        _vqshlu_n_s16(
-            a,
-            const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
-        )
-    }
+    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
@ -41008,16 +40946,7 @@ pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
        )]
        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
    }
-    unsafe {
-        _vqshluq_n_s16(
-            a,
-            const {
-                int16x8_t([
-                    N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
@ -41036,7 +40965,7 @@ pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
        )]
        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
    }
-    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }) }
+    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
@ -41055,12 +40984,7 @@ pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
        )]
        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
    }
-    unsafe {
-        _vqshluq_n_s32(
-            a,
-            const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
-        )
-    }
+    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
@ -41098,7 +41022,7 @@ pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
        )]
        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
    }
-    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }) }
+    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@ -777,8 +777,8 @@ pub struct float16x8x2_t(pub float16x8_t, pub float16x8_t);
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-
 pub struct float16x8x3_t(pub float16x8_t, pub float16x8_t, pub float16x8_t);
+
 /// Arm-specific type containing four `float16x8_t` vectors.
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@ -75,9 +75,7 @@
 #[cfg(test)]
 #[macro_use]
 extern crate std;
-#[cfg(test)]
-#[macro_use]
-extern crate std_detect;
+
 #[path = "mod.rs"]
 mod core_arch;

--- a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
@ -360,25 +360,6 @@ unsafe extern "C" {
    #[link_name = "llvm.ppc.altivec.vsrv"]
    fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;

-    #[link_name = "llvm.fshl.v16i8"]
-    fn fshlb(
-        a: vector_unsigned_char,
-        b: vector_unsigned_char,
-        c: vector_unsigned_char,
-    ) -> vector_unsigned_char;
-    #[link_name = "llvm.fshl.v8i16"]
-    fn fshlh(
-        a: vector_unsigned_short,
-        b: vector_unsigned_short,
-        c: vector_unsigned_short,
-    ) -> vector_unsigned_short;
-    #[link_name = "llvm.fshl.v4i32"]
-    fn fshlw(
-        a: vector_unsigned_int,
-        b: vector_unsigned_int,
-        c: vector_unsigned_int,
-    ) -> vector_unsigned_int;
-
    #[link_name = "llvm.nearbyint.v4f32"]
    fn vrfin(a: vector_float) -> vector_float;
 }
@ -3193,19 +3174,19 @@ mod sealed {
    impl_vec_cntlz! { vec_vcntlzw(vector_unsigned_int) }

    macro_rules! impl_vrl {
-        ($fun:ident $intr:ident $ty:ident) => {
+        ($fun:ident $ty:ident) => {
            #[inline]
            #[target_feature(enable = "altivec")]
            #[cfg_attr(test, assert_instr($fun))]
            unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) {
-                transmute($intr(transmute(a), transmute(a), transmute(b)))
+                simd_funnel_shl(a, a, b)
            }
        };
    }

-    impl_vrl! { vrlb fshlb u8 }
-    impl_vrl! { vrlh fshlh u16 }
-    impl_vrl! { vrlw fshlw u32 }
+    impl_vrl! { vrlb u8 }
+    impl_vrl! { vrlh u16 }
+    impl_vrl! { vrlw u32 }

    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
    pub trait VectorRl {
--- a/library/stdarch/crates/core_arch/src/powerpc/macros.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/macros.rs
@ -278,6 +278,7 @@ macro_rules! impl_from {
    ($s: ident) => {
        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
        impl From<$s> for s_t_l!($s) {
+            #[inline]
            fn from (v: $s) -> Self {
                unsafe {
                    transmute(v)
@ -297,6 +298,7 @@ macro_rules! impl_neg {
        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
        impl crate::ops::Neg for s_t_l!($s) {
            type Output = s_t_l!($s);
+            #[inline]
            fn neg(self) -> Self::Output {
                unsafe { simd_neg(self) }
            }
--- a/library/stdarch/crates/core_arch/src/s390x/macros.rs
+++ b/library/stdarch/crates/core_arch/src/s390x/macros.rs
@ -435,6 +435,7 @@ macro_rules! impl_from {
    ($s: ident) => {
        #[unstable(feature = "stdarch_s390x", issue = "135681")]
        impl From<$s> for s_t_l!($s) {
+            #[inline]
            fn from (v: $s) -> Self {
                unsafe {
                    transmute(v)
@ -454,6 +455,7 @@ macro_rules! impl_neg {
        #[unstable(feature = "stdarch_s390x", issue = "135681")]
        impl crate::ops::Neg for s_t_l!($s) {
            type Output = s_t_l!($s);
+            #[inline]
            fn neg(self) -> Self::Output {
                unsafe { simd_neg(self) }
            }
--- a/library/stdarch/crates/core_arch/src/s390x/vector.rs
+++ b/library/stdarch/crates/core_arch/src/s390x/vector.rs
@ -51,7 +51,7 @@ types! {
    pub struct vector_double(2 x f64);
 }

-#[repr(packed)]
+#[repr(C, packed)]
 struct PackedTuple<T, U> {
    x: T,
    y: U,
@ -83,9 +83,6 @@ unsafe extern "unadjusted" {
    #[link_name = "llvm.nearbyint.v4f32"] fn nearbyint_v4f32(a: vector_float) -> vector_float;
    #[link_name = "llvm.nearbyint.v2f64"] fn nearbyint_v2f64(a: vector_double) -> vector_double;

-    #[link_name = "llvm.rint.v4f32"] fn rint_v4f32(a: vector_float) -> vector_float;
-    #[link_name = "llvm.rint.v2f64"] fn rint_v2f64(a: vector_double) -> vector_double;
-
    #[link_name = "llvm.roundeven.v4f32"] fn roundeven_v4f32(a: vector_float) -> vector_float;
    #[link_name = "llvm.roundeven.v2f64"] fn roundeven_v2f64(a: vector_double) -> vector_double;

@ -101,11 +98,6 @@ unsafe extern "unadjusted" {
    #[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16;
    #[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16;

-    #[link_name = "llvm.fshl.v16i8"] fn fshlb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char;
-    #[link_name = "llvm.fshl.v8i16"] fn fshlh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short;
-    #[link_name = "llvm.fshl.v4i32"] fn fshlf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int;
-    #[link_name = "llvm.fshl.v2i64"] fn fshlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: vector_unsigned_long_long) -> vector_unsigned_long_long;
-
    #[link_name = "llvm.s390.verimb"] fn verimb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char, d: i32) -> vector_signed_char;
    #[link_name = "llvm.s390.verimh"] fn verimh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, d: i32) -> vector_signed_short;
    #[link_name = "llvm.s390.verimf"] fn verimf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int, d: i32) -> vector_signed_int;
@ -1197,8 +1189,8 @@ mod sealed {
    test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
    test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }

-    test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [rint_v4f32, "vector-enhancements-1" vfisb] }
-    test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [rint_v2f64, vfidb] }
+    test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] }
+    test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] }

    #[unstable(feature = "stdarch_s390x", issue = "135681")]
    pub trait VectorRoundc {
@ -1221,8 +1213,8 @@ mod sealed {
    impl_vec_trait! { [VectorRound vec_round] vec_round_f32 (vector_float) }
    impl_vec_trait! { [VectorRound vec_round] vec_round_f64 (vector_double) }

-    impl_vec_trait! { [VectorRint vec_rint] vec_rint_f32 (vector_float) }
-    impl_vec_trait! { [VectorRint vec_rint] vec_rint_f64 (vector_double) }
+    impl_vec_trait! { [VectorRint vec_rint] simd_round_ties_even (vector_float) }
+    impl_vec_trait! { [VectorRint vec_rint] simd_round_ties_even (vector_double) }

    #[unstable(feature = "stdarch_s390x", issue = "135681")]
    pub trait VectorTrunc {
@ -1411,43 +1403,42 @@ mod sealed {
    }

    macro_rules! impl_rot {
-        ($fun:ident $intr:ident $ty:ident) => {
+        ($fun:ident $ty:ident) => {
            #[inline]
            #[target_feature(enable = "vector")]
            #[cfg_attr(test, assert_instr($fun))]
            unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) {
-                transmute($intr(transmute(a), transmute(a), transmute(b)))
+                simd_funnel_shl(a, a, b)
            }
        };
    }

-    impl_rot! { verllvb fshlb u8 }
-    impl_rot! { verllvh fshlh u16 }
-    impl_rot! { verllvf fshlf u32 }
-    impl_rot! { verllvg fshlg u64 }
+    impl_rot! { verllvb u8 }
+    impl_rot! { verllvh u16 }
+    impl_rot! { verllvf u32 }
+    impl_rot! { verllvg u64 }

    impl_vec_shift! { [VectorRl vec_rl] (verllvb, verllvh, verllvf, verllvg) }

    macro_rules! test_rot_imm {
-        ($fun:ident $instr:ident $intr:ident $ty:ident) => {
+        ($fun:ident $instr:ident $ty:ident) => {
            #[inline]
            #[target_feature(enable = "vector")]
            #[cfg_attr(test, assert_instr($instr))]
            unsafe fn $fun(a: t_t_l!($ty), bits: core::ffi::c_ulong) -> t_t_l!($ty) {
                // mod by the number of bits in a's element type to prevent UB
                let bits = (bits % $ty::BITS as core::ffi::c_ulong) as $ty;
-                let a = transmute(a);
                let b = <t_t_s!($ty)>::splat(bits);

-                transmute($intr(a, a, transmute(b)))
+                simd_funnel_shl(a, a, transmute(b))
            }
        };
    }

-    test_rot_imm! { verllvb_imm verllb fshlb u8 }
-    test_rot_imm! { verllvh_imm verllh fshlh u16 }
-    test_rot_imm! { verllvf_imm verllf fshlf u32 }
-    test_rot_imm! { verllvg_imm verllg fshlg u64 }
+    test_rot_imm! { verllvb_imm verllb u8 }
+    test_rot_imm! { verllvh_imm verllh u16 }
+    test_rot_imm! { verllvf_imm verllf u32 }
+    test_rot_imm! { verllvg_imm verllg u64 }

    #[unstable(feature = "stdarch_s390x", issue = "135681")]
    pub trait VectorRli {
@ -4787,7 +4778,7 @@ pub unsafe fn vec_splat_s8<const IMM: i8>() -> vector_signed_char {
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 #[cfg_attr(test, assert_instr(vrepih, IMM = 42))]
 pub unsafe fn vec_splat_s16<const IMM: i16>() -> vector_signed_short {
-    vector_signed_short([IMM as i16; 8])
+    vector_signed_short([IMM; 8])
 }

 /// Vector Splat Signed Word
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@ -141,7 +141,7 @@ unsafe extern "unadjusted" {
    fn llvm_f64x2_max(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2;
 }

-#[repr(packed)]
+#[repr(C, packed)]
 #[derive(Copy)]
 struct Unaligned<T>(T);

--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@ -1258,7 +1258,7 @@ pub fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::trivially_copy_pass_by_ref)]
-pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
+pub fn _mm256_broadcast_ss(f: &f32) -> __m256 {
    _mm256_set1_ps(*f)
 }

@ -1271,7 +1271,7 @@ pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::trivially_copy_pass_by_ref)]
-pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
+pub fn _mm_broadcast_ss(f: &f32) -> __m128 {
    _mm_set1_ps(*f)
 }

@ -1284,7 +1284,7 @@ pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::trivially_copy_pass_by_ref)]
-pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
+pub fn _mm256_broadcast_sd(f: &f64) -> __m256d {
    _mm256_set1_pd(*f)
 }

@ -1296,8 +1296,8 @@ pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
-    simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3])
+pub fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
+    unsafe { simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3]) }
 }

 /// Broadcasts 128 bits from memory (composed of 2 packed double-precision
@ -1308,8 +1308,8 @@ pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
-    simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1])
+pub fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
+    unsafe { simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1]) }
 }

 /// Copies `a` to result, then inserts 128 bits (composed of 4 packed
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@ -33248,7 +33248,7 @@ pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
        );
        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
-        simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1)
+        simd_extract!(a, 0, f32) + simd_extract!(a, 1, f32)
    }
 }

@ -33275,7 +33275,7 @@ pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
            _mm512_extractf64x4_pd::<1>(a),
        );
        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
-        simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1)
+        simd_extract!(a, 0, f64) + simd_extract!(a, 1, f64)
    }
 }

@ -33356,7 +33356,7 @@ pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
        );
        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
-        simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1)
+        simd_extract!(a, 0, f32) * simd_extract!(a, 1, f32)
    }
 }

@ -33383,7 +33383,7 @@ pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
            _mm512_extractf64x4_pd::<1>(a),
        );
        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
-        simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1)
+        simd_extract!(a, 0, f64) * simd_extract!(a, 1, f64)
    }
 }

--- a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
@ -11032,7 +11032,7 @@ pub fn _mm_reduce_add_ph(a: __m128h) -> f16 {
        let a = _mm_add_ph(a, b);
        let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
        let a = _mm_add_ph(a, b);
-        simd_extract::<_, f16>(a, 0) + simd_extract::<_, f16>(a, 1)
+        simd_extract!(a, 0, f16) + simd_extract!(a, 1, f16)
    }
 }

@ -11085,7 +11085,7 @@ pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
        let a = _mm_mul_ph(a, b);
        let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
        let a = _mm_mul_ph(a, b);
-        simd_extract::<_, f16>(a, 0) * simd_extract::<_, f16>(a, 1)
+        simd_extract!(a, 0, f16) * simd_extract!(a, 1, f16)
    }
 }

--- a/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs
@ -500,7 +500,7 @@ pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
 pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) }
 }

 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -539,7 +539,7 @@ pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
 pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) }
 }

 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -578,7 +578,7 @@ pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
 pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) }
 }

 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -617,7 +617,7 @@ pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
 pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) }
 }

 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -656,7 +656,7 @@ pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
 pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) }
 }

 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -695,7 +695,7 @@ pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
 pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
 }

 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -734,7 +734,7 @@ pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
 pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) }
 }

 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -773,7 +773,7 @@ pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
 pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) }
 }

 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -812,7 +812,7 @@ pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
 pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) }
 }

 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -851,7 +851,7 @@ pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
 pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshrdvq(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) }
 }

 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -890,7 +890,7 @@ pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
 pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshrdvq256(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) }
 }

 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -929,7 +929,7 @@ pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
 pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshrdvq128(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) }
 }

 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -968,7 +968,7 @@ pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
 pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshrdvd(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) }
 }

 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -1007,7 +1007,7 @@ pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
 pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshrdvd256(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) }
 }

 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -1046,7 +1046,7 @@ pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
 pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshrdvd128(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) }
 }

 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -1085,7 +1085,7 @@ pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
 pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshrdvw(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) }
 }

 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -1124,7 +1124,7 @@ pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
 pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshrdvw256(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) }
 }

 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -1163,7 +1163,7 @@ pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
 pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshrdvw128(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) }
 }

 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -2138,44 +2138,6 @@ unsafe extern "C" {
    #[link_name = "llvm.x86.avx512.mask.expand.b.128"]
    fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;

-    #[link_name = "llvm.fshl.v8i64"]
-    fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
-    #[link_name = "llvm.fshl.v4i64"]
-    fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
-    #[link_name = "llvm.fshl.v2i64"]
-    fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
-    #[link_name = "llvm.fshl.v16i32"]
-    fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
-    #[link_name = "llvm.fshl.v8i32"]
-    fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
-    #[link_name = "llvm.fshl.v4i32"]
-    fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
-    #[link_name = "llvm.fshl.v32i16"]
-    fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
-    #[link_name = "llvm.fshl.v16i16"]
-    fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
-    #[link_name = "llvm.fshl.v8i16"]
-    fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
-
-    #[link_name = "llvm.fshr.v8i64"]
-    fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
-    #[link_name = "llvm.fshr.v4i64"]
-    fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
-    #[link_name = "llvm.fshr.v2i64"]
-    fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
-    #[link_name = "llvm.fshr.v16i32"]
-    fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
-    #[link_name = "llvm.fshr.v8i32"]
-    fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
-    #[link_name = "llvm.fshr.v4i32"]
-    fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
-    #[link_name = "llvm.fshr.v32i16"]
-    fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
-    #[link_name = "llvm.fshr.v16i16"]
-    fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
-    #[link_name = "llvm.fshr.v8i16"]
-    fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
-
    #[link_name = "llvm.x86.avx512.mask.expand.load.b.128"]
    fn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
    #[link_name = "llvm.x86.avx512.mask.expand.load.w.128"]
--- a/library/stdarch/crates/core_arch/src/x86/kl.rs
+++ b/library/stdarch/crates/core_arch/src/x86/kl.rs
@ -127,7 +127,7 @@ unsafe extern "unadjusted" {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadiwkey)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(loadiwkey))]
 pub unsafe fn _mm_loadiwkey(
    control: u32,
@ -153,7 +153,7 @@ pub unsafe fn _mm_loadiwkey(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_encodekey128_u32)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(encodekey128))]
 pub unsafe fn _mm_encodekey128_u32(key_params: u32, key: __m128i, handle: *mut u8) -> u32 {
    let EncodeKey128Output(control, key0, key1, key2, _, _, _) = encodekey128(key_params, key);
@ -176,7 +176,7 @@ pub unsafe fn _mm_encodekey128_u32(key_params: u32, key: __m128i, handle: *mut u
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_encodekey256_u32)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(encodekey256))]
 pub unsafe fn _mm_encodekey256_u32(
    key_params: u32,
@ -198,7 +198,7 @@ pub unsafe fn _mm_encodekey256_u32(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc128kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesenc128kl))]
 pub unsafe fn _mm_aesenc128kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
    let AesOutput(status, result) = aesenc128kl(input, handle);
@ -214,7 +214,7 @@ pub unsafe fn _mm_aesenc128kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec128kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesdec128kl))]
 pub unsafe fn _mm_aesdec128kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
    let AesOutput(status, result) = aesdec128kl(input, handle);
@ -230,7 +230,7 @@ pub unsafe fn _mm_aesdec128kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc256kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesenc256kl))]
 pub unsafe fn _mm_aesenc256kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
    let AesOutput(status, result) = aesenc256kl(input, handle);
@ -246,7 +246,7 @@ pub unsafe fn _mm_aesenc256kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec256kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesdec256kl))]
 pub unsafe fn _mm_aesdec256kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
    let AesOutput(status, result) = aesdec256kl(input, handle);
@ -262,7 +262,7 @@ pub unsafe fn _mm_aesdec256kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesencwide128kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesencwide128kl))]
 pub unsafe fn _mm_aesencwide128kl_u8(
    output: *mut __m128i,
@ -285,7 +285,7 @@ pub unsafe fn _mm_aesencwide128kl_u8(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdecwide128kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesdecwide128kl))]
 pub unsafe fn _mm_aesdecwide128kl_u8(
    output: *mut __m128i,
@ -308,7 +308,7 @@ pub unsafe fn _mm_aesdecwide128kl_u8(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesencwide256kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesencwide256kl))]
 pub unsafe fn _mm_aesencwide256kl_u8(
    output: *mut __m128i,
@ -331,7 +331,7 @@ pub unsafe fn _mm_aesencwide256kl_u8(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdecwide256kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesdecwide256kl))]
 pub unsafe fn _mm_aesdecwide256kl_u8(
    output: *mut __m128i,
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@ -772,5 +772,5 @@ mod avx512fp16;
 pub use self::avx512fp16::*;

 mod kl;
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 pub use self::kl::*;
--- a/library/stdarch/crates/core_arch/src/x86/sha.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sha.rs
@ -146,7 +146,7 @@ pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sha512,avx")]
 #[cfg_attr(test, assert_instr(vsha512msg1))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
    unsafe { transmute(vsha512msg1(a.as_i64x4(), b.as_i64x2())) }
 }
@ -159,7 +159,7 @@ pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sha512,avx")]
 #[cfg_attr(test, assert_instr(vsha512msg2))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
    unsafe { transmute(vsha512msg2(a.as_i64x4(), b.as_i64x4())) }
 }
@ -175,7 +175,7 @@ pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sha512,avx")]
 #[cfg_attr(test, assert_instr(vsha512rnds2))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
    unsafe { transmute(vsha512rnds2(a.as_i64x4(), b.as_i64x4(), k.as_i64x2())) }
 }
@ -188,7 +188,7 @@ pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sm3,avx")]
 #[cfg_attr(test, assert_instr(vsm3msg1))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
    unsafe { transmute(vsm3msg1(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
 }
@ -201,7 +201,7 @@ pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sm3,avx")]
 #[cfg_attr(test, assert_instr(vsm3msg2))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
    unsafe { transmute(vsm3msg2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
 }
@ -219,7 +219,7 @@ pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[target_feature(enable = "sm3,avx")]
 #[cfg_attr(test, assert_instr(vsm3rnds2, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
    static_assert!(
        IMM8 == (IMM8 & 0x3e),
@ -235,7 +235,7 @@ pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4key4))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
    unsafe { transmute(vsm4key4128(a.as_i32x4(), b.as_i32x4())) }
 }
@ -247,7 +247,7 @@ pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4key4))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
    unsafe { transmute(vsm4key4256(a.as_i32x8(), b.as_i32x8())) }
 }
@ -259,7 +259,7 @@ pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4rnds4))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
    unsafe { transmute(vsm4rnds4128(a.as_i32x4(), b.as_i32x4())) }
 }
@ -271,7 +271,7 @@ pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4rnds4))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sm4rnds4_epi32(a: __m256i, b: __m256i) -> __m256i {
    unsafe { transmute(vsm4rnds4256(a.as_i32x8(), b.as_i32x8())) }
 }
--- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
@ -48,7 +48,7 @@ pub unsafe fn cmpxchg16b(
    success: Ordering,
    failure: Ordering,
 ) -> u128 {
-    debug_assert!(dst as usize % 16 == 0);
+    debug_assert!(dst.addr().is_multiple_of(16));

    let res = crate::sync::atomic::atomic_compare_exchange(dst, old, new, success, failure);
    res.unwrap_or_else(|x| x)
--- a/library/stdarch/crates/intrinsic-test/src/common/compare.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/compare.rs
@ -48,7 +48,7 @@ pub fn compare_outputs(
                return Some(FailureReason::RunRust(intrinsic_name.clone()));
            }

-            info!("Comparing intrinsic: {}", intrinsic_name);
+            info!("Comparing intrinsic: {intrinsic_name}");

            let c = std::str::from_utf8(&c.stdout)
                .unwrap()
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
@ -79,7 +79,7 @@ pub fn compile_c_programs(compiler_commands: &[String]) -> bool {
                    false
                }
            } else {
-                error!("Command failed: {:#?}", output);
+                error!("Command failed: {output:#?}");
                false
            }
        })
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
@ -120,7 +120,7 @@ path = "{binary}/main.rs""#,
            false
        }
    } else {
-        error!("Command failed: {:#?}", output);
+        error!("Command failed: {output:#?}");
        false
    }
 }
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@ -89,7 +89,7 @@ pub fn simd_test(
    for feature in target_features {
        let q = quote_spanned! {
            proc_macro2::Span::call_site() =>
-            if !#macro_test!(#feature) {
+            if !::std::arch::#macro_test!(#feature) {
                missing_features.push(#feature);
            }
        };
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@ -157,11 +157,11 @@ features! {
    /// AVX (Advanced Vector Extensions)
    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2";
    /// AVX2 (Advanced Vector Extensions 2)
-    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sha512: "sha512";
+    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "1.89.0")] sha512: "sha512";
    /// SHA512
-    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm3: "sm3";
+    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "1.89.0")] sm3: "sm3";
    /// SM3
-    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm4: "sm4";
+    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "1.89.0")] sm4: "sm4";
    /// SM4
    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ;
    /// AVX-512 F (Foundation)
@ -259,9 +259,9 @@ features! {
    /// XSAVEC (Save Processor Extended States Compacted)
    @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] cmpxchg16b: "cmpxchg16b";
    /// CMPXCH16B (16-byte compare-and-swap instruction)
-    @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] kl: "kl";
+    @FEATURE: #[stable(feature = "keylocker_x86", since = "1.89.0")] kl: "kl";
    /// Intel Key Locker
-    @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] widekl: "widekl";
+    @FEATURE: #[stable(feature = "keylocker_x86", since = "1.89.0")] widekl: "widekl";
    /// Intel Key Locker Wide
    @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx";
    /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@ -25,6 +25,13 @@ struct riscv_hwprobe {
    value: u64,
 }

+impl riscv_hwprobe {
+    // key is overwritten to -1 if not supported by riscv_hwprobe syscall.
+    pub fn get(&self) -> Option<u64> {
+        (self.key != -1).then_some(self.value)
+    }
+}
+
 #[allow(non_upper_case_globals)]
 const __NR_riscv_hwprobe: libc::c_long = 258;

@ -124,8 +131,7 @@ fn _riscv_hwprobe(out: &mut [riscv_hwprobe]) -> bool {
        }
    }

-    let len = out.len();
-    unsafe { __riscv_hwprobe(out.as_mut_ptr(), len, 0, ptr::null_mut(), 0) == 0 }
+    unsafe { __riscv_hwprobe(out.as_mut_ptr(), out.len(), 0, ptr::null_mut(), 0) == 0 }
 }

 /// Read list of supported features from (1) the auxiliary vector
@ -156,49 +162,45 @@ pub(crate) fn detect_features() -> cache::Initializer {
    // Use riscv_hwprobe syscall to query more extensions and
    // performance-related capabilities.
    'hwprobe: {
-        let mut out = [
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
-                value: 0,
-            },
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_IMA_EXT_0,
-                value: 0,
-            },
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF,
-                value: 0,
-            },
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF,
-                value: 0,
-            },
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_CPUPERF_0,
-                value: 0,
-            },
-        ];
-        if !_riscv_hwprobe(&mut out) {
+        macro_rules! init {
+            { $($name: ident : $key: expr),* $(,)? } => {
+                #[repr(usize)]
+                enum Indices { $($name),* }
+                let mut t = [$(riscv_hwprobe { key: $key, value: 0 }),*];
+                macro_rules! data_mut { () => { &mut t } }
+                macro_rules! query { [$idx: ident] => { t[Indices::$idx as usize].get() } }
+            }
+        }
+        init! {
+            BaseBehavior: RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
+            Extensions:   RISCV_HWPROBE_KEY_IMA_EXT_0,
+            MisalignedScalarPerf: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF,
+            MisalignedVectorPerf: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF,
+            MisalignedScalarPerfFallback: RISCV_HWPROBE_KEY_CPUPERF_0,
+        };
+        if !_riscv_hwprobe(data_mut!()) {
            break 'hwprobe;
        }

-        // Query scalar/vector misaligned behavior.
-        if out[2].key != -1 {
+        // Query scalar misaligned behavior.
+        if let Some(value) = query![MisalignedScalarPerf] {
            enable_feature(
                Feature::unaligned_scalar_mem,
-                out[2].value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST,
+                value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST,
            );
-        } else if out[4].key != -1 {
+        } else if let Some(value) = query![MisalignedScalarPerfFallback] {
            // Deprecated method for fallback
            enable_feature(
                Feature::unaligned_scalar_mem,
-                out[4].value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST,
+                value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST,
            );
        }
-        if out[3].key != -1 {
+
+        // Query vector misaligned behavior.
+        if let Some(value) = query![MisalignedVectorPerf] {
            enable_feature(
                Feature::unaligned_vector_mem,
-                out[3].value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST,
+                value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST,
            );
        }

@ -208,22 +210,20 @@ pub(crate) fn detect_features() -> cache::Initializer {
        // 20240411).
        // This is a current requirement of
        // `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests.
-        let has_ima = (out[0].key != -1) && (out[0].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA != 0);
-        if !has_ima {
+        if query![BaseBehavior].is_none_or(|value| value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA == 0) {
            break 'hwprobe;
        }
-        has_i |= has_ima;
-        enable_feature(Feature::zicsr, has_ima);
-        enable_feature(Feature::zicntr, has_ima);
-        enable_feature(Feature::zifencei, has_ima);
-        enable_feature(Feature::m, has_ima);
-        enable_feature(Feature::a, has_ima);
+        has_i = true;
+        enable_feature(Feature::zicsr, true);
+        enable_feature(Feature::zicntr, true);
+        enable_feature(Feature::zifencei, true);
+        enable_feature(Feature::m, true);
+        enable_feature(Feature::a, true);

        // Enable features based on `RISCV_HWPROBE_KEY_IMA_EXT_0`.
-        if out[1].key == -1 {
+        let Some(ima_ext_0) = query![Extensions] else {
            break 'hwprobe;
-        }
-        let ima_ext_0 = out[1].value;
+        };
        let test = |mask| (ima_ext_0 & mask) != 0;

        enable_feature(Feature::d, test(RISCV_HWPROBE_IMA_FD)); // F is implied.
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@ -1252,7 +1252,7 @@ intrinsics:
      - [i16, f16, 'h', 'i32', 'as i32']
    compose:
      - FnCall: [static_assert!, ['N >= 1 && N <= 16']]
-      - "vcvt{type[2]}_n_{type[1]}_{type[3]}::<N>(a {type[4]}) as {type[1]}"
+      - "vcvt{type[2]}_n_{type[1]}_{type[3]}::<N>(a {type[4]})"


  - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}"
@ -1270,7 +1270,7 @@ intrinsics:
      - [u16, f16, 'h', u32]
    compose:
      - FnCall: [static_assert!, ['N >= 1 && N <= 16']]
-      - "vcvt{type[2]}_n_{type[1]}_{type[3]}::<N>(a as {type[3]}) as {type[1]}"
+      - "vcvt{type[2]}_n_{type[1]}_{type[3]}::<N>(a as {type[3]})"


  - name: "vcvt{type[2]}"
@ -2976,11 +2976,7 @@ intrinsics:
      - float64x1_t
      - float64x2_t
    compose:
-      - LLVMLink:
-          name: "llvm.rint.{neon_type}"
-          links:
-            - link: "llvm.rint.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_round_ties_even, [a]]


  - name: "vrndx{neon_type.no}"
@ -2996,11 +2992,7 @@ intrinsics:
      - float16x4_t
      - float16x8_t
    compose:
-      - LLVMLink:
-          name: "llvm.rint.{neon_type}"
-          links:
-            - link: "llvm.rint.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_round_ties_even, [a]]


  - name: "vrndx{type[1]}{type[0]}"
@ -5391,7 +5383,7 @@ intrinsics:
    attr:
      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
      - FnCall: [rustc_legacy_const_generics, ['2']]
-      - *neon-fp16 
+      - *neon-fp16
      - *neon-unstable-f16
    static_defs: ["const LANE: i32"]
    safety: safe
@ -5444,7 +5436,7 @@ intrinsics:
    attr:
      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
      - FnCall: [rustc_legacy_const_generics, ['2']]
-      - *neon-fp16 
+      - *neon-fp16
      - *neon-unstable-f16
    static_defs: ["const LANE: i32"]
    safety: safe
@ -5468,7 +5460,7 @@ intrinsics:
    return_type: "{neon_type[0]}"
    attr:
      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx]]}]]
-      - *neon-fp16 
+      - *neon-fp16
      - *neon-unstable-f16
    safety: safe
    types:
@ -5552,7 +5544,7 @@ intrinsics:
    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
    return_type: "{neon_type[0]}"
    attr:
-      - *neon-fp16 
+      - *neon-fp16
      - *neon-unstable-f16
    assert_instr: [fmla]
    safety: safe
@ -7320,7 +7312,7 @@ intrinsics:
      - ["i64", "i32", "i32", "i64"]
    compose:
      - Let: [x, i64, {FnCall: [vqaddd_s64, [a, {FnCall: [vqdmulls_s32, [b, c]]}]]}]
-      - Identifier: ['x as i64', Symbol]
+      - Identifier: ['x', Symbol]

  - name: "vqdmlal{type[4]}"
    doc: "Signed saturating doubling multiply-add long"
@ -7434,7 +7426,7 @@ intrinsics:
      - ["i64", "i32", "i32", "i64"]
    compose:
      - Let: [x, i64, {FnCall: [vqsubd_s64, [a, {FnCall: [vqdmulls_s32, [b, c]]}]]}]
-      - Identifier: ['x as i64', Symbol]
+      - Identifier: ['x', Symbol]

  - name: "vqdmlsl{type[4]}"
    doc: "Signed saturating doubling multiply-subtract long"
@ -11697,7 +11689,6 @@ intrinsics:
    arguments: ["a: {type[1]}"]
    return_type: "{type[1]}"
    attr:
-      - *neon-fp16
      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
      - *neon-fp16
      - *neon-unstable-f16
@ -12104,7 +12095,7 @@ intrinsics:
      - [uint8x8_t, 'uint8x8_t', 'b']
      - [poly8x8_t, 'uint8x8_t', 'b']
    compose:
-      - FnCall: 
+      - FnCall:
          - 'vqtbl1{neon_type[0].no}'
          - - FnCall:
                - 'vcombine{neon_type[0].no}'
@ -12174,7 +12165,7 @@ intrinsics:
              - '{type[2]}_t'
              - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
                - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
-      - FnCall: 
+      - FnCall:
          - transmute
          - - FnCall:
                - vqtbl2
@ -12193,7 +12184,7 @@ intrinsics:
    types:
      - [uint8x8x3_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
      - [poly8x8x3_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
-    big_endian_inverse: true 
+    big_endian_inverse: true
    compose:
      - Let:
          - x
@ -12201,7 +12192,7 @@ intrinsics:
              - '{type[2]}_t'
              - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
                - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
-      - FnCall: 
+      - FnCall:
          - transmute
          - - FnCall:
                - vqtbl2
@ -12288,18 +12279,16 @@ intrinsics:
      - [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"]
      - [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"]
      - [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"]
+    big_endian_inverse: false
    compose:
-      - Let:
-        - x
-        - FnCall:
-            - transmute
-            - - FnCall:
-                  - "{type[3]}"
-                  - - FnCall: [transmute, [a]]
-                    - FnCall: [transmute, [b]]
-                    - c
-      - Identifier: [x, Symbol]
-  
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "{type[3]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+                  - c
+
  - name: "vtbx1{neon_type[0].no}"
    doc: "Extended table look-up"
    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
@ -12315,13 +12304,13 @@ intrinsics:
    compose:
      - FnCall:
          - simd_select
-          - - FnCall: 
+          - - FnCall:
                - "simd_lt::<{type[4]}_t, int8x8_t>"
                - - c
                  - FnCall: [transmute, ["{type[3]}"]]
            - FnCall:
                - transmute
-                - - FnCall: 
+                - - FnCall:
                      - "vqtbx1"
                      - - "transmute(a)"
                        - FnCall:
@ -12470,16 +12459,14 @@ intrinsics:
      - ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t']
      - ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t']
      - ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t']
+    big_endian_inverse: false
    compose:
-      - Let:
-          - x
-          - FnCall:
-              - transmute
-              - - FnCall: 
-                    - '{type[2]}'
-                    - - FnCall: [transmute, ['a']]
-                      - b
-      - Identifier: [x, Symbol]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - '{type[2]}'
+                - - FnCall: [transmute, ['a']]
+                  - b

  - name: "vqtbl2{neon_type[3].no}"
    doc: "Table look-up"
@ -12511,7 +12498,7 @@ intrinsics:
    compose:
      - FnCall:
          - transmute
-          - - FnCall: 
+          - - FnCall:
                - '{type[2]}'
                - - FnCall: [transmute, ['a.0']]
                  - FnCall: [transmute, ['a.1']]
@ -12547,7 +12534,7 @@ intrinsics:
    compose:
      - FnCall:
          - transmute
-          - - FnCall: 
+          - - FnCall:
                - '{type[3]}'
                - - FnCall: [transmute, [a]]
                  - FnCall: [transmute, ['b.0']]
@ -12584,7 +12571,7 @@ intrinsics:
    compose:
      - FnCall:
          - transmute
-          - - FnCall: 
+          - - FnCall:
                - '{type[3]}'
                - - FnCall: [transmute, ['a.0']]
                  - FnCall: [transmute, ['a.1']]
@ -12621,7 +12608,7 @@ intrinsics:
    compose:
      - FnCall:
          - transmute
-          - - FnCall: 
+          - - FnCall:
                - '{type[3]}'
                - - FnCall: [transmute, [a]]
                  - FnCall: [transmute, ['b.0']]
@ -12659,7 +12646,7 @@ intrinsics:
    compose:
      - FnCall:
          - transmute
-          - - FnCall: 
+          - - FnCall:
                - '{type[2]}'
                - - FnCall: [transmute, ['a.0']]
                  - FnCall: [transmute, ['a.1']]
@ -12697,7 +12684,7 @@ intrinsics:
    compose:
      - FnCall:
          - transmute
-          - - FnCall: 
+          - - FnCall:
                - '{type[3]}'
                - - FnCall: [transmute, [a]]
                  - FnCall: [transmute, ['b.0']]
@ -13204,7 +13191,7 @@ intrinsics:
    assert_instr: [addp]
    safety: safe
    types:
-      - [int32x2_t, i32] 
+      - [int32x2_t, i32]
    compose:
      - LLVMLink:
          name: "vaddv{neon_type[0].no}"
@ -13259,7 +13246,7 @@ intrinsics:
    assert_instr: [addp]
    safety: safe
    types:
-      - [uint32x2_t, u32, i32] 
+      - [uint32x2_t, u32, i32]
    compose:
      - LLVMLink:
          name: "vaddv{neon_type[0].no}"
@ -13335,7 +13322,7 @@ intrinsics:
    types:
      - [int8x8_t, i8, 'smaxv']
      - [int16x4_t, i16, 'smaxv']
-      - [int32x2_t, i32, 'smaxp'] 
+      - [int32x2_t, i32, 'smaxp']
      - [int8x16_t, i8, 'smaxv']
      - [int16x8_t, i16, 'smaxv']
      - [int32x4_t, i32, 'smaxv']
@ -13357,7 +13344,7 @@ intrinsics:
    types:
      - [uint8x8_t, u8, 'umaxv']
      - [uint16x4_t, u16, 'umaxv']
-      - [uint32x2_t, u32, 'umaxp'] 
+      - [uint32x2_t, u32, 'umaxp']
      - [uint8x16_t, u8, 'umaxv']
      - [uint16x8_t, u16, 'umaxv']
      - [uint32x4_t, u32, 'umaxv']
@ -13379,7 +13366,7 @@ intrinsics:
    types:
      - [float32x2_t, f32, 'fmaxp']
      - [float32x4_t, f32, 'fmaxv']
-      - [float64x2_t, f64, 'fmaxp'] 
+      - [float64x2_t, f64, 'fmaxp']
    compose:
      - LLVMLink:
          name: "vmaxv{neon_type[0].no}"
@ -13398,7 +13385,7 @@ intrinsics:
    types:
      - [int8x8_t, i8, 'sminv']
      - [int16x4_t, i16, 'sminv']
-      - [int32x2_t, i32, 'sminp'] 
+      - [int32x2_t, i32, 'sminp']
      - [int8x16_t, i8, 'sminv']
      - [int16x8_t, i16, 'sminv']
      - [int32x4_t, i32, 'sminv']
@ -13420,7 +13407,7 @@ intrinsics:
    types:
      - [uint8x8_t, u8, 'uminv']
      - [uint16x4_t, u16, 'uminv']
-      - [uint32x2_t, u32, 'uminp'] 
+      - [uint32x2_t, u32, 'uminp']
      - [uint8x16_t, u8, 'uminv']
      - [uint16x8_t, u16, 'uminv']
      - [uint32x4_t, u32, 'uminv']
@ -13442,7 +13429,7 @@ intrinsics:
    types:
      - [float32x2_t, f32, 'fminp']
      - [float32x4_t, f32, 'fminv']
-      - [float64x2_t, f64, 'fminp'] 
+      - [float64x2_t, f64, 'fminp']
    compose:
      - LLVMLink:
          name: "vminv{neon_type[0].no}"
@ -13498,7 +13485,7 @@ intrinsics:
    safety: safe
    types:
      - float32x4_t
-      - float64x2_t 
+      - float64x2_t
    compose:
      - LLVMLink:
          name: "vpmin{neon_type.no}"
@ -13554,7 +13541,7 @@ intrinsics:
    safety: safe
    types:
      - float32x4_t
-      - float64x2_t 
+      - float64x2_t
    compose:
      - LLVMLink:
          name: "vpmax{neon_type.no}"
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@ -11447,14 +11447,14 @@ intrinsics:
    static_defs: ['const N: i32']
    safety: safe
    types:
-      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
-      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }']
-      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N as i32, N as i32]) }']
+      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8; 8]) }']
+      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16; 4]) }']
+      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N; 2]) }']
      - [int64x1_t, uint64x1_t, '6', 'const { int64x1_t([N as i64]) }']
-      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
-      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) }']
-      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }']
-      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64, N as i64]) }']
+      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8; 16]) }']
+      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16; 8]) }']
+      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N; 4]) }']
+      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64; 2]) }']
    compose:
      - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
      - LLVMLink:
@ -11479,14 +11479,14 @@ intrinsics:
    static_defs: ['const N: i32']
    safety: safe
    types:
-      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
-      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }']
-      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N as i32, N as i32]) }']
+      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8; 8]) }']
+      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16; 4]) }']
+      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N; 2]) }']
      - [int64x1_t, uint64x1_t, '6', 'const { int64x1_t([N as i64]) }']
-      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
-      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) }']
-      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }']
-      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64, N as i64]) }']
+      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8; 16]) }']
+      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16; 8]) }']
+      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N; 4]) }']
+      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64; 2]) }']
    compose:
      - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
      - LLVMLink:
--- a/library/stdarch/josh-sync.toml
+++ b/library/stdarch/josh-sync.toml
@ -0,0 +1,3 @@
+org = "rust-lang"
+repo = "stdarch"
+path = "library/stdarch"
--- a/library/stdarch/rust-version
+++ b/library/stdarch/rust-version
@ -0,0 +1 @@
+040e2f8b9ff2d76fbe2146d6003e297ed4532088