Merge commit 'd9aae8cc54' into sync-from-portable-simd-2026-01-28

2026-01-28 00:56:52 -05:00 · 2026-01-28 00:56:52 -05:00 · 0eaef59233
commit 0eaef59233
parent e96bb7e44f d9aae8cc54
40 changed files with 879 additions and 1193 deletions
--- a/library/portable-simd/.github/workflows/ci.yml
+++ b/library/portable-simd/.github/workflows/ci.yml
@ -59,7 +59,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu]
+        target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, x86_64-unknown-linux-gnu]
        # `default` means we use the default target config for the target,
        # `native` means we run with `-Ctarget-cpu=native`, and anything else is
        # an arg to `-Ctarget-feature`
@ -68,18 +68,12 @@ jobs:
        exclude:
          # -Ctarget-cpu=native sounds like bad-news if target != host
          - { target: i686-pc-windows-msvc, target_feature: native }
-          - { target: i586-pc-windows-msvc, target_feature: native }

        include:
          # Populate the `matrix.os` field
          - { target: x86_64-unknown-linux-gnu, os: ubuntu-latest }
          - { target: x86_64-pc-windows-msvc,   os: windows-latest }
          - { target: i686-pc-windows-msvc,     os: windows-latest }
-          - { target: i586-pc-windows-msvc,     os: windows-latest }
-
-          # These are globally available on all the other targets.
-          - { target: i586-pc-windows-msvc, target_feature: +sse, os: windows-latest }
-          - { target: i586-pc-windows-msvc, target_feature: +sse2, os: windows-latest }

          # Annoyingly, the x86_64-unknown-linux-gnu runner *almost* always has
          # avx512vl, but occasionally doesn't.  Maybe one day we can enable it.
@ -246,9 +240,18 @@ jobs:

  miri:
    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        shard: [1, 2, 3, 4]
    env:
      PROPTEST_CASES: 16
    steps:
      - uses: actions/checkout@v4
-      - name: Test (Miri)
-        run: cargo miri test
+
+      - name: Install cargo-nextest
+        uses: taiki-e/install-action@nextest
+
+      - name: Test (Miri) (partition ${{ matrix.shard }}/4)
+        run: |
+          cargo miri nextest run --partition count:${{ matrix.shard }}/4
--- a/library/portable-simd/Cargo.lock
+++ b/library/portable-simd/Cargo.lock
@ -1,12 +1,12 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-version = 3
+version = 4

 [[package]]
 name = "autocfg"
-version = "1.1.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"

 [[package]]
 name = "bitflags"
@ -16,31 +16,30 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"

 [[package]]
 name = "bumpalo"
-version = "3.13.0"
+version = "3.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"

 [[package]]
 name = "byteorder"
-version = "1.4.3"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "cc"
+version = "1.2.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f"
+dependencies = [
+ "shlex",
+]

 [[package]]
 name = "cfg-if"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "console_error_panic_hook"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
-dependencies = [
- "cfg-if",
- "wasm-bindgen",
-]
+checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"

 [[package]]
 name = "core_simd"
@ -54,46 +53,69 @@ dependencies = [
 ]

 [[package]]
-name = "js-sys"
-version = "0.3.64"
+name = "float-cmp"
+version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
+checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8"
 dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
+dependencies = [
+ "once_cell",
 "wasm-bindgen",
 ]

 [[package]]
 name = "log"
-version = "0.4.20"
+version = "0.4.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "minicov"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b"
+dependencies = [
+ "cc",
+ "walkdir",
+]

 [[package]]
 name = "num-traits"
-version = "0.2.16"
+version = "0.2.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
 dependencies = [
 "autocfg",
 ]

 [[package]]
 name = "once_cell"
-version = "1.18.0"
+version = "1.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"

 [[package]]
 name = "ppv-lite86"
-version = "0.2.17"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]

 [[package]]
 name = "proc-macro2"
-version = "1.0.66"
+version = "1.0.101"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
 dependencies = [
 "unicode-ident",
 ]
@ -114,9 +136,9 @@ dependencies = [

 [[package]]
 name = "quote"
-version = "1.0.33"
+version = "1.0.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
 dependencies = [
 "proc-macro2",
 ]
@ -167,10 +189,25 @@ dependencies = [
 ]

 [[package]]
-name = "scoped-tls"
-version = "1.0.1"
+name = "rustversion"
+version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"

 [[package]]
 name = "std_float"
@ -184,9 +221,9 @@ dependencies = [

 [[package]]
 name = "syn"
-version = "2.0.29"
+version = "2.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
 dependencies = [
 "proc-macro2",
 "quote",
@ -197,34 +234,46 @@ dependencies = [
 name = "test_helpers"
 version = "0.1.0"
 dependencies = [
+ "float-cmp",
 "proptest",
 ]

 [[package]]
 name = "unicode-ident"
-version = "1.0.11"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]

 [[package]]
 name = "wasm-bindgen"
-version = "0.2.87"
+version = "0.2.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
+checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
 dependencies = [
 "cfg-if",
+ "once_cell",
+ "rustversion",
 "wasm-bindgen-macro",
 ]

 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.87"
+version = "0.2.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
+checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
 dependencies = [
 "bumpalo",
 "log",
- "once_cell",
 "proc-macro2",
 "quote",
 "syn",
@ -233,21 +282,22 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.37"
+version = "0.4.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03"
+checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
 dependencies = [
 "cfg-if",
 "js-sys",
+ "once_cell",
 "wasm-bindgen",
 "web-sys",
 ]

 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.87"
+version = "0.2.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
+checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
 dependencies = [
 "quote",
 "wasm-bindgen-macro-support",
@ -255,9 +305,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.87"
+version = "0.2.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
+checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
 dependencies = [
 "proc-macro2",
 "quote",
@ -268,19 +318,21 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.87"
+version = "0.2.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
+checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+dependencies = [
+ "unicode-ident",
+]

 [[package]]
 name = "wasm-bindgen-test"
-version = "0.3.37"
+version = "0.3.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671"
+checksum = "66c8d5e33ca3b6d9fa3b4676d774c5778031d27a578c2b007f905acf816152c3"
 dependencies = [
- "console_error_panic_hook",
 "js-sys",
- "scoped-tls",
+ "minicov",
 "wasm-bindgen",
 "wasm-bindgen-futures",
 "wasm-bindgen-test-macro",
@ -288,20 +340,123 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-test-macro"
-version = "0.3.37"
+version = "0.3.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575"
+checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b"
 dependencies = [
 "proc-macro2",
 "quote",
+ "syn",
 ]

 [[package]]
 name = "web-sys"
-version = "0.3.64"
+version = "0.3.77"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
+checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
 dependencies = [
 "js-sys",
 "wasm-bindgen",
 ]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "zerocopy"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
--- a/library/portable-simd/beginners-guide.md
+++ b/library/portable-simd/beginners-guide.md
@ -25,7 +25,7 @@ SIMD has a few special vocabulary terms you should know:

 * **Scalar:** "Scalar" in mathematical contexts refers to values that can be represented as a single element, mostly numbers like 6, 3.14, or -2. It can also be used to describe "scalar operations" that use strictly scalar values, like addition. This term is mostly used to differentiate between vectorized operations that use SIMD instructions and scalar operations that don't.

-* **Lane:** A single element position within a vector is called a lane. If you have `N` lanes available then they're numbered from `0` to `N-1` when referring to them, again like an array. The biggest difference between an array element and a vector lane is that in general is *relatively costly* to access an individual lane value. On most architectures, the vector has to be pushed out of the SIMD register onto the stack, then an individual lane is accessed while it's on the stack (and possibly the stack value is read back into a register). For this reason, when working with SIMD you should avoid reading or writing the value of an individual lane during hot loops.
+* **Lane:** A single element position within a vector is called a lane. If you have `N` lanes available then they're numbered from `0` to `N-1` when referring to them, again like an array. The biggest difference between an array element and a vector lane is that in general it is *relatively costly* to access an individual lane value. On most architectures, the vector has to be pushed out of the SIMD register onto the stack, then an individual lane is accessed while it's on the stack (and possibly the stack value is read back into a register). For this reason, when working with SIMD you should avoid reading or writing the value of an individual lane during hot loops.

 * **Bit Widths:** When talking about SIMD, the bit widths used are the bit size of the vectors involved, *not* the individual elements. So "128-bit SIMD" has 128-bit vectors, and that might be `f32x4`, `i32x4`, `i16x8`, or other variations. While 128-bit SIMD is the most common, there's also 64-bit, 256-bit, and even 512-bit on the newest CPUs.

--- a/library/portable-simd/crates/core_simd/examples/dot_product.rs
+++ b/library/portable-simd/crates/core_simd/examples/dot_product.rs
@ -1,8 +1,6 @@
 //! Code taken from the `packed_simd` crate.
 //! Run this code with `cargo test --example dot_product`.

-#![feature(array_chunks)]
-#![feature(slice_as_chunks)]
 // Add these imports to use the stdsimd library
 #![feature(portable_simd)]
 use core_simd::simd::prelude::*;
@ -33,7 +31,7 @@ pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 {
 }

 // We now move on to the SIMD implementations: notice the following constructs:
-// `array_chunks::<4>`: mapping this over the vector will let use construct SIMD vectors
+// `as_chunks::<4>`: mapping this over the vector will let us construct SIMD vectors
 // `f32x4::from_array`: construct the SIMD vector from a slice
 // `(a * b).reduce_sum()`: Multiply both f32x4 vectors together, and then reduce them.
 // This approach essentially uses SIMD to produce a vector of length N/4 of all the products,
@ -42,9 +40,11 @@ pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 {
 pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 {
    assert_eq!(a.len(), b.len());
    // TODO handle remainder when a.len() % 4 != 0
-    a.array_chunks::<4>()
+    a.as_chunks::<4>()
+        .0
+        .iter()
        .map(|&a| f32x4::from_array(a))
-        .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
+        .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
        .map(|(a, b)| (a * b).reduce_sum())
        .sum()
 }
@ -60,9 +60,11 @@ pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 {
 pub fn dot_prod_simd_1(a: &[f32], b: &[f32]) -> f32 {
    assert_eq!(a.len(), b.len());
    // TODO handle remainder when a.len() % 4 != 0
-    a.array_chunks::<4>()
+    a.as_chunks::<4>()
+        .0
+        .iter()
        .map(|&a| f32x4::from_array(a))
-        .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
+        .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
        .fold(f32x4::splat(0.0), |acc, zipped| acc + zipped.0 * zipped.1)
        .reduce_sum()
 }
@ -74,9 +76,11 @@ pub fn dot_prod_simd_2(a: &[f32], b: &[f32]) -> f32 {
    assert_eq!(a.len(), b.len());
    // TODO handle remainder when a.len() % 4 != 0
    let mut res = f32x4::splat(0.0);
-    a.array_chunks::<4>()
+    a.as_chunks::<4>()
+        .0
+        .iter()
        .map(|&a| f32x4::from_array(a))
-        .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
+        .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
        .for_each(|(a, b)| {
            res = a.mul_add(b, res);
        });
@ -113,9 +117,11 @@ pub fn dot_prod_simd_3(a: &[f32], b: &[f32]) -> f32 {
 // next example.
 pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 {
    let mut sum = a
-        .array_chunks::<4>()
+        .as_chunks::<4>()
+        .0
+        .iter()
        .map(|&a| f32x4::from_array(a))
-        .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
+        .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
        .map(|(a, b)| a * b)
        .fold(f32x4::splat(0.0), std::ops::Add::add)
        .reduce_sum();
@ -131,9 +137,11 @@ pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 {
 // This version allocates a single `XMM` register for accumulation, and the folds don't allocate on top of that.
 // Notice the use of `mul_add`, which can do a multiply and an add operation ber iteration.
 pub fn dot_prod_simd_5(a: &[f32], b: &[f32]) -> f32 {
-    a.array_chunks::<4>()
+    a.as_chunks::<4>()
+        .0
+        .iter()
        .map(|&a| f32x4::from_array(a))
-        .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
+        .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
        .fold(f32x4::splat(0.), |acc, (a, b)| a.mul_add(b, acc))
        .reduce_sum()
 }
--- a/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs
+++ b/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs
@ -1,7 +1,7 @@
 //! 4x4 matrix inverse
 // Code ported from the `packed_simd` crate
 // Run this code with `cargo test --example matrix_inversion`
-#![feature(array_chunks, portable_simd)]
+#![feature(portable_simd)]
 use core_simd::simd::prelude::*;

 // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
--- a/library/portable-simd/crates/core_simd/src/fmt.rs
+++ b/library/portable-simd/crates/core_simd/src/fmt.rs
@ -1,9 +1,8 @@
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use crate::simd::{Simd, SimdElement};
 use core::fmt;

 impl<T, const N: usize> fmt::Debug for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement + fmt::Debug,
 {
    /// A `Simd<T, N>` has a debug format like the one for `[T]`:
--- a/library/portable-simd/crates/core_simd/src/iter.rs
+++ b/library/portable-simd/crates/core_simd/src/iter.rs
@ -1,4 +1,4 @@
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::Simd;
 use core::{
    iter::{Product, Sum},
    ops::{Add, Mul},
@ -7,8 +7,6 @@ use core::{
 macro_rules! impl_traits {
    { $type:ty } => {
        impl<const N: usize> Sum<Self> for Simd<$type, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
@ -17,8 +15,6 @@ macro_rules! impl_traits {
        }

        impl<const N: usize> Product<Self> for Simd<$type, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
@ -27,8 +23,6 @@ macro_rules! impl_traits {
        }

        impl<'a, const N: usize> Sum<&'a Self> for Simd<$type, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
@ -37,8 +31,6 @@ macro_rules! impl_traits {
        }

        impl<'a, const N: usize> Product<&'a Self> for Simd<$type, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
--- a/library/portable-simd/crates/core_simd/src/lane_count.rs
+++ b/library/portable-simd/crates/core_simd/src/lane_count.rs
@ -1,40 +0,0 @@
-mod sealed {
-    pub trait Sealed {}
-}
-use sealed::Sealed;
-
-/// Specifies the number of lanes in a SIMD vector as a type.
-pub struct LaneCount<const N: usize>;
-
-impl<const N: usize> LaneCount<N> {
-    /// The number of bytes in a bitmask with this many lanes.
-    pub const BITMASK_LEN: usize = N.div_ceil(8);
-}
-
-/// Statically guarantees that a lane count is marked as supported.
-///
-/// This trait is *sealed*: the list of implementors below is total.
-/// Users do not have the ability to mark additional `LaneCount<N>` values as supported.
-/// Only SIMD vectors with supported lane counts are constructable.
-pub trait SupportedLaneCount: Sealed {
-    #[doc(hidden)]
-    type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
-}
-
-impl<const N: usize> Sealed for LaneCount<N> {}
-
-macro_rules! supported_lane_count {
-    ($($lanes:literal),+) => {
-        $(
-            impl SupportedLaneCount for LaneCount<$lanes> {
-                type BitMask = [u8; ($lanes + 7) / 8];
-            }
-        )+
-    };
-}
-
-supported_lane_count!(
-    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
-    27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
-    51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
-);
--- a/library/portable-simd/crates/core_simd/src/lib.rs
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
@ -9,7 +9,8 @@
    simd_ffi,
    staged_api,
    prelude_import,
-    ptr_metadata
+    ptr_metadata,
+    rustc_attrs
 )]
 #![cfg_attr(
    all(
@ -30,10 +31,6 @@
    any(target_arch = "powerpc", target_arch = "powerpc64"),
    feature(stdarch_powerpc)
 )]
-#![cfg_attr(
-    all(target_arch = "x86_64", target_feature = "avx512f"),
-    feature(stdarch_x86_avx512)
-)]
 #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
 #![deny(
    unsafe_op_in_unsafe_fn,
@ -41,7 +38,7 @@
    clippy::undocumented_unsafe_blocks
 )]
 #![doc(test(attr(deny(warnings))))]
-#![allow(internal_features)]
+#![allow(internal_features, clippy::repr_packed_without_abi)]
 #![unstable(feature = "portable_simd", issue = "86656")]
 //! Portable SIMD module.

--- a/library/portable-simd/crates/core_simd/src/masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@ -2,20 +2,33 @@
 //! Types representing
 #![allow(non_camel_case_types)]

-#[cfg_attr(
-    not(all(target_arch = "x86_64", target_feature = "avx512f")),
-    path = "masks/full_masks.rs"
-)]
-#[cfg_attr(
-    all(target_arch = "x86_64", target_feature = "avx512f"),
-    path = "masks/bitmask.rs"
-)]
-mod mask_impl;
-
-use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
+use crate::simd::{Select, Simd, SimdCast, SimdElement};
 use core::cmp::Ordering;
 use core::{fmt, mem};

+pub(crate) trait FixEndianness {
+    fn fix_endianness(self) -> Self;
+}
+
+macro_rules! impl_fix_endianness {
+    { $($int:ty),* } => {
+        $(
+        impl FixEndianness for $int {
+            #[inline(always)]
+            fn fix_endianness(self) -> Self {
+                if cfg!(target_endian = "big") {
+                    <$int>::reverse_bits(self)
+                } else {
+                    self
+                }
+            }
+        }
+        )*
+    }
+}
+
+impl_fix_endianness! { u8, u16, u32, u64 }
+
 mod sealed {
    use super::*;

@ -28,7 +41,6 @@ mod sealed {
    pub trait Sealed {
        fn valid<const N: usize>(values: Simd<Self, N>) -> bool
        where
-            LaneCount<N>: SupportedLaneCount,
            Self: SimdElement;

        fn eq(self, other: Self) -> bool;
@ -56,8 +68,6 @@ macro_rules! impl_element {
        impl Sealed for $ty {
            #[inline]
            fn valid<const N: usize>(value: Simd<Self, N>) -> bool
-            where
-                LaneCount<N>: SupportedLaneCount,
            {
                // We can't use `Simd` directly, because `Simd`'s functions call this function and
                // we will end up with an infinite loop.
@ -108,23 +118,19 @@ impl_element! { isize, usize }
 /// The layout of this type is unspecified, and may change between platforms
 /// and/or Rust versions, and code should not assume that it is equivalent to
 /// `[T; N]`.
+///
+/// `N` cannot be 0 and may be at most 64. This limit may be increased in
+/// the future.
 #[repr(transparent)]
-pub struct Mask<T, const N: usize>(mask_impl::Mask<T, N>)
+pub struct Mask<T, const N: usize>(Simd<T, N>)
 where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount;
+    T: MaskElement;

-impl<T, const N: usize> Copy for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-}
+impl<T, const N: usize> Copy for Mask<T, N> where T: MaskElement {}

 impl<T, const N: usize> Clone for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn clone(&self) -> Self {
@ -135,12 +141,12 @@ where
 impl<T, const N: usize> Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    /// Constructs a mask by setting all elements to the given value.
    #[inline]
-    pub fn splat(value: bool) -> Self {
-        Self(mask_impl::Mask::splat(value))
+    #[rustc_const_unstable(feature = "portable_simd", issue = "86656")]
+    pub const fn splat(value: bool) -> Self {
+        Self(Simd::splat(if value { T::TRUE } else { T::FALSE }))
    }

    /// Converts an array of bools to a SIMD mask.
@ -156,7 +162,7 @@ where
            let bytes: [u8; N] = mem::transmute_copy(&array);
            let bools: Simd<i8, N> =
                core::intrinsics::simd::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
-            Mask::from_int_unchecked(core::intrinsics::simd::simd_cast(bools))
+            Mask::from_simd_unchecked(core::intrinsics::simd::simd_cast(bools))
        }
    }

@ -174,7 +180,7 @@ where
        // This would be hypothetically valid as an "in-place" transmute,
        // but these are "dependently-sized" types, so copy elision it is!
        unsafe {
-            let mut bytes: Simd<i8, N> = core::intrinsics::simd::simd_cast(self.to_int());
+            let mut bytes: Simd<i8, N> = core::intrinsics::simd::simd_cast(self.to_simd());
            bytes &= Simd::splat(1i8);
            mem::transmute_copy(&bytes)
        }
@ -187,12 +193,12 @@ where
    /// All elements must be either 0 or -1.
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
+    pub unsafe fn from_simd_unchecked(value: Simd<T, N>) -> Self {
        // Safety: the caller must confirm this invariant
        unsafe {
            core::intrinsics::assume(<T as Sealed>::valid(value));
-            Self(mask_impl::Mask::from_int_unchecked(value))
        }
+        Self(value)
    }

    /// Converts a vector of integers to a mask, where 0 represents `false` and -1
@ -203,25 +209,26 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    #[track_caller]
-    pub fn from_int(value: Simd<T, N>) -> Self {
+    pub fn from_simd(value: Simd<T, N>) -> Self {
        assert!(T::valid(value), "all values must be either 0 or -1",);
        // Safety: the validity has been checked
-        unsafe { Self::from_int_unchecked(value) }
+        unsafe { Self::from_simd_unchecked(value) }
    }

    /// Converts the mask to a vector of integers, where 0 represents `false` and -1
    /// represents `true`.
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub fn to_int(self) -> Simd<T, N> {
-        self.0.to_int()
+    pub fn to_simd(self) -> Simd<T, N> {
+        self.0
    }

    /// Converts the mask to a mask of any other element size.
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn cast<U: MaskElement>(self) -> Mask<U, N> {
-        Mask(self.0.convert())
+        // Safety: mask elements are integers
+        unsafe { Mask(core::intrinsics::simd::simd_as(self.0)) }
    }

    /// Tests the value of the specified element.
@ -232,7 +239,7 @@ where
    #[must_use = "method returns a new bool and does not mutate the original value"]
    pub unsafe fn test_unchecked(&self, index: usize) -> bool {
        // Safety: the caller must confirm this invariant
-        unsafe { self.0.test_unchecked(index) }
+        unsafe { T::eq(*self.0.as_array().get_unchecked(index), T::TRUE) }
    }

    /// Tests the value of the specified element.
@ -243,9 +250,7 @@ where
    #[must_use = "method returns a new bool and does not mutate the original value"]
    #[track_caller]
    pub fn test(&self, index: usize) -> bool {
-        assert!(index < N, "element index out of range");
-        // Safety: the element index has been checked
-        unsafe { self.test_unchecked(index) }
+        T::eq(self.0[index], T::TRUE)
    }

    /// Sets the value of the specified element.
@ -256,7 +261,7 @@ where
    pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
        // Safety: the caller must confirm this invariant
        unsafe {
-            self.0.set_unchecked(index, value);
+            *self.0.as_mut_array().get_unchecked_mut(index) = if value { T::TRUE } else { T::FALSE }
        }
    }

@ -267,35 +272,65 @@ where
    #[inline]
    #[track_caller]
    pub fn set(&mut self, index: usize, value: bool) {
-        assert!(index < N, "element index out of range");
-        // Safety: the element index has been checked
-        unsafe {
-            self.set_unchecked(index, value);
-        }
+        self.0[index] = if value { T::TRUE } else { T::FALSE }
    }

    /// Returns true if any element is set, or false otherwise.
    #[inline]
    #[must_use = "method returns a new bool and does not mutate the original value"]
    pub fn any(self) -> bool {
-        self.0.any()
+        // Safety: `self` is a mask vector
+        unsafe { core::intrinsics::simd::simd_reduce_any(self.0) }
    }

    /// Returns true if all elements are set, or false otherwise.
    #[inline]
    #[must_use = "method returns a new bool and does not mutate the original value"]
    pub fn all(self) -> bool {
-        self.0.all()
+        // Safety: `self` is a mask vector
+        unsafe { core::intrinsics::simd::simd_reduce_all(self.0) }
    }

    /// Creates a bitmask from a mask.
    ///
    /// Each bit is set if the corresponding element in the mask is `true`.
-    /// If the mask contains more than 64 elements, the bitmask is truncated to the first 64.
    #[inline]
    #[must_use = "method returns a new integer and does not mutate the original value"]
    pub fn to_bitmask(self) -> u64 {
-        self.0.to_bitmask_integer()
+        const {
+            assert!(N <= 64, "number of elements can't be greater than 64");
+        }
+
+        #[inline]
+        unsafe fn to_bitmask_impl<T, U: FixEndianness, const M: usize, const N: usize>(
+            mask: Mask<T, N>,
+        ) -> U
+        where
+            T: MaskElement,
+        {
+            let resized = mask.resize::<M>(false);
+
+            // Safety: `resized` is an integer vector with length M, which must match T
+            let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized.0) };
+
+            // LLVM assumes bit order should match endianness
+            bitmask.fix_endianness()
+        }
+
+        // TODO modify simd_bitmask to zero-extend output, making this unnecessary
+        if N <= 8 {
+            // Safety: bitmask matches length
+            unsafe { to_bitmask_impl::<T, u8, 8, N>(self) as u64 }
+        } else if N <= 16 {
+            // Safety: bitmask matches length
+            unsafe { to_bitmask_impl::<T, u16, 16, N>(self) as u64 }
+        } else if N <= 32 {
+            // Safety: bitmask matches length
+            unsafe { to_bitmask_impl::<T, u32, 32, N>(self) as u64 }
+        } else {
+            // Safety: bitmask matches length
+            unsafe { to_bitmask_impl::<T, u64, 64, N>(self) }
+        }
    }

    /// Creates a mask from a bitmask.
@ -305,7 +340,7 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn from_bitmask(bitmask: u64) -> Self {
-        Self(mask_impl::Mask::from_bitmask_integer(bitmask))
+        Self(bitmask.select(Simd::splat(T::TRUE), Simd::splat(T::FALSE)))
    }

    /// Finds the index of the first set element.
@ -351,7 +386,7 @@ where
        // Safety: the input and output are integer vectors
        let index: Simd<T, N> = unsafe { core::intrinsics::simd::simd_cast(index) };

-        let masked_index = self.select(index, Self::splat(true).to_int());
+        let masked_index = self.select(index, Self::splat(true).to_simd());

        // Safety: the input and output are integer vectors
        let masked_index: Simd<T::Unsigned, N> =
@ -376,7 +411,6 @@ where
 impl<T, const N: usize> From<[bool; N]> for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn from(array: [bool; N]) -> Self {
@ -387,7 +421,6 @@ where
 impl<T, const N: usize> From<Mask<T, N>> for [bool; N]
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn from(vector: Mask<T, N>) -> Self {
@ -398,7 +431,6 @@ where
 impl<T, const N: usize> Default for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn default() -> Self {
@ -409,7 +441,6 @@ where
 impl<T, const N: usize> PartialEq for Mask<T, N>
 where
    T: MaskElement + PartialEq,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn eq(&self, other: &Self) -> bool {
@ -420,7 +451,6 @@ where
 impl<T, const N: usize> PartialOrd for Mask<T, N>
 where
    T: MaskElement + PartialOrd,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
@ -431,7 +461,6 @@ where
 impl<T, const N: usize> fmt::Debug for Mask<T, N>
 where
    T: MaskElement + fmt::Debug,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@ -444,19 +473,18 @@ where
 impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Self;
    #[inline]
    fn bitand(self, rhs: Self) -> Self {
-        Self(self.0 & rhs.0)
+        // Safety: `self` is an integer vector
+        unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) }
    }
 }

 impl<T, const N: usize> core::ops::BitAnd<bool> for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Self;
    #[inline]
@ -468,7 +496,6 @@ where
 impl<T, const N: usize> core::ops::BitAnd<Mask<T, N>> for bool
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Mask<T, N>;
    #[inline]
@ -480,19 +507,18 @@ where
 impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Self;
    #[inline]
    fn bitor(self, rhs: Self) -> Self {
-        Self(self.0 | rhs.0)
+        // Safety: `self` is an integer vector
+        unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) }
    }
 }

 impl<T, const N: usize> core::ops::BitOr<bool> for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Self;
    #[inline]
@ -504,7 +530,6 @@ where
 impl<T, const N: usize> core::ops::BitOr<Mask<T, N>> for bool
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Mask<T, N>;
    #[inline]
@ -516,19 +541,18 @@ where
 impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Self;
    #[inline]
    fn bitxor(self, rhs: Self) -> Self::Output {
-        Self(self.0 ^ rhs.0)
+        // Safety: `self` is an integer vector
+        unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) }
    }
 }

 impl<T, const N: usize> core::ops::BitXor<bool> for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Self;
    #[inline]
@ -540,7 +564,6 @@ where
 impl<T, const N: usize> core::ops::BitXor<Mask<T, N>> for bool
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Mask<T, N>;
    #[inline]
@ -552,30 +575,27 @@ where
 impl<T, const N: usize> core::ops::Not for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    type Output = Mask<T, N>;
    #[inline]
    fn not(self) -> Self::Output {
-        Self(!self.0)
+        Self::splat(true) ^ self
    }
 }

 impl<T, const N: usize> core::ops::BitAndAssign for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn bitand_assign(&mut self, rhs: Self) {
-        self.0 = self.0 & rhs.0;
+        *self = *self & rhs;
    }
 }

 impl<T, const N: usize> core::ops::BitAndAssign<bool> for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn bitand_assign(&mut self, rhs: bool) {
@ -586,18 +606,16 @@ where
 impl<T, const N: usize> core::ops::BitOrAssign for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn bitor_assign(&mut self, rhs: Self) {
-        self.0 = self.0 | rhs.0;
+        *self = *self | rhs;
    }
 }

 impl<T, const N: usize> core::ops::BitOrAssign<bool> for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn bitor_assign(&mut self, rhs: bool) {
@ -608,18 +626,16 @@ where
 impl<T, const N: usize> core::ops::BitXorAssign for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn bitxor_assign(&mut self, rhs: Self) {
-        self.0 = self.0 ^ rhs.0;
+        *self = *self ^ rhs;
    }
 }

 impl<T, const N: usize> core::ops::BitXorAssign<bool> for Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    #[inline]
    fn bitxor_assign(&mut self, rhs: bool) {
@ -631,8 +647,6 @@ macro_rules! impl_from {
    { $from:ty  => $($to:ty),* } => {
        $(
        impl<const N: usize> From<Mask<$from, N>> for Mask<$to, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn from(value: Mask<$from, N>) -> Self {
--- a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
@ -1,228 +0,0 @@
-#![allow(unused_imports)]
-use super::MaskElement;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
-use core::marker::PhantomData;
-
-/// A mask where each lane is represented by a single bit.
-#[repr(transparent)]
-pub(crate) struct Mask<T, const N: usize>(
-    <LaneCount<N> as SupportedLaneCount>::BitMask,
-    PhantomData<T>,
-)
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount;
-
-impl<T, const N: usize> Copy for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-}
-
-impl<T, const N: usize> Clone for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-impl<T, const N: usize> PartialEq for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    fn eq(&self, other: &Self) -> bool {
-        self.0.as_ref() == other.0.as_ref()
-    }
-}
-
-impl<T, const N: usize> PartialOrd for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
-        self.0.as_ref().partial_cmp(other.0.as_ref())
-    }
-}
-
-impl<T, const N: usize> Eq for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-}
-
-impl<T, const N: usize> Ord for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
-        self.0.as_ref().cmp(other.0.as_ref())
-    }
-}
-
-impl<T, const N: usize> Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub(crate) fn splat(value: bool) -> Self {
-        let mut mask = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
-        if value {
-            mask.as_mut().fill(u8::MAX)
-        } else {
-            mask.as_mut().fill(u8::MIN)
-        }
-        if N % 8 > 0 {
-            *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
-        }
-        Self(mask, PhantomData)
-    }
-
-    #[inline]
-    #[must_use = "method returns a new bool and does not mutate the original value"]
-    pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool {
-        (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0
-    }
-
-    #[inline]
-    pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
-        unsafe {
-            self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8)
-        }
-    }
-
-    #[inline]
-    #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub(crate) fn to_int(self) -> Simd<T, N> {
-        unsafe {
-            core::intrinsics::simd::simd_select_bitmask(
-                self.0,
-                Simd::splat(T::TRUE),
-                Simd::splat(T::FALSE),
-            )
-        }
-    }
-
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub(crate) unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
-        unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) }
-    }
-
-    #[inline]
-    pub(crate) fn to_bitmask_integer(self) -> u64 {
-        let mut bitmask = [0u8; 8];
-        bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
-        u64::from_ne_bytes(bitmask)
-    }
-
-    #[inline]
-    pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
-        let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
-        let len = bytes.as_mut().len();
-        bytes
-            .as_mut()
-            .copy_from_slice(&bitmask.to_ne_bytes()[..len]);
-        Self(bytes, PhantomData)
-    }
-
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub(crate) fn convert<U>(self) -> Mask<U, N>
-    where
-        U: MaskElement,
-    {
-        // Safety: bitmask layout does not depend on the element width
-        unsafe { core::mem::transmute_copy(&self) }
-    }
-
-    #[inline]
-    #[must_use = "method returns a new bool and does not mutate the original value"]
-    pub(crate) fn any(self) -> bool {
-        self != Self::splat(false)
-    }
-
-    #[inline]
-    #[must_use = "method returns a new bool and does not mutate the original value"]
-    pub(crate) fn all(self) -> bool {
-        self == Self::splat(true)
-    }
-}
-
-impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-    <LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
-{
-    type Output = Self;
-    #[inline]
-    fn bitand(mut self, rhs: Self) -> Self {
-        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
-            *l &= r;
-        }
-        self
-    }
-}
-
-impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-    <LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
-{
-    type Output = Self;
-    #[inline]
-    fn bitor(mut self, rhs: Self) -> Self {
-        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
-            *l |= r;
-        }
-        self
-    }
-}
-
-impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    type Output = Self;
-    #[inline]
-    fn bitxor(mut self, rhs: Self) -> Self::Output {
-        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
-            *l ^= r;
-        }
-        self
-    }
-}
-
-impl<T, const N: usize> core::ops::Not for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    type Output = Self;
-    #[inline]
-    fn not(mut self) -> Self::Output {
-        for x in self.0.as_mut() {
-            *x = !*x;
-        }
-        if N % 8 > 0 {
-            *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
-        }
-        self
-    }
-}
--- a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
@ -1,296 +0,0 @@
-//! Masks that take up full SIMD vector registers.
-
-use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount};
-
-#[repr(transparent)]
-pub(crate) struct Mask<T, const N: usize>(Simd<T, N>)
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount;
-
-impl<T, const N: usize> Copy for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-}
-
-impl<T, const N: usize> Clone for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-impl<T, const N: usize> PartialEq for Mask<T, N>
-where
-    T: MaskElement + PartialEq,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    fn eq(&self, other: &Self) -> bool {
-        self.0.eq(&other.0)
-    }
-}
-
-impl<T, const N: usize> PartialOrd for Mask<T, N>
-where
-    T: MaskElement + PartialOrd,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
-        self.0.partial_cmp(&other.0)
-    }
-}
-
-impl<T, const N: usize> Eq for Mask<T, N>
-where
-    T: MaskElement + Eq,
-    LaneCount<N>: SupportedLaneCount,
-{
-}
-
-impl<T, const N: usize> Ord for Mask<T, N>
-where
-    T: MaskElement + Ord,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
-        self.0.cmp(&other.0)
-    }
-}
-
-// Used for bitmask bit order workaround
-pub(crate) trait ReverseBits {
-    // Reverse the least significant `n` bits of `self`.
-    // (Remaining bits must be 0.)
-    fn reverse_bits(self, n: usize) -> Self;
-}
-
-macro_rules! impl_reverse_bits {
-    { $($int:ty),* } => {
-        $(
-        impl ReverseBits for $int {
-            #[inline(always)]
-            fn reverse_bits(self, n: usize) -> Self {
-                let rev = <$int>::reverse_bits(self);
-                let bitsize = size_of::<$int>() * 8;
-                if n < bitsize {
-                    // Shift things back to the right
-                    rev >> (bitsize - n)
-                } else {
-                    rev
-                }
-            }
-        }
-        )*
-    }
-}
-
-impl_reverse_bits! { u8, u16, u32, u64 }
-
-impl<T, const N: usize> Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub(crate) fn splat(value: bool) -> Self {
-        Self(Simd::splat(if value { T::TRUE } else { T::FALSE }))
-    }
-
-    #[inline]
-    #[must_use = "method returns a new bool and does not mutate the original value"]
-    pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool {
-        T::eq(self.0[lane], T::TRUE)
-    }
-
-    #[inline]
-    pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
-        self.0[lane] = if value { T::TRUE } else { T::FALSE }
-    }
-
-    #[inline]
-    #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub(crate) fn to_int(self) -> Simd<T, N> {
-        self.0
-    }
-
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub(crate) unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
-        Self(value)
-    }
-
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub(crate) fn convert<U>(self) -> Mask<U, N>
-    where
-        U: MaskElement,
-    {
-        // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
-        unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) }
-    }
-
-    #[inline]
-    unsafe fn to_bitmask_impl<U: ReverseBits, const M: usize>(self) -> U
-    where
-        LaneCount<M>: SupportedLaneCount,
-    {
-        let resized = self.to_int().resize::<M>(T::FALSE);
-
-        // Safety: `resized` is an integer vector with length M, which must match T
-        let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized) };
-
-        // LLVM assumes bit order should match endianness
-        if cfg!(target_endian = "big") {
-            bitmask.reverse_bits(M)
-        } else {
-            bitmask
-        }
-    }
-
-    #[inline]
-    unsafe fn from_bitmask_impl<U: ReverseBits, const M: usize>(bitmask: U) -> Self
-    where
-        LaneCount<M>: SupportedLaneCount,
-    {
-        // LLVM assumes bit order should match endianness
-        let bitmask = if cfg!(target_endian = "big") {
-            bitmask.reverse_bits(M)
-        } else {
-            bitmask
-        };
-
-        // SAFETY: `mask` is the correct bitmask type for a u64 bitmask
-        let mask: Simd<T, M> = unsafe {
-            core::intrinsics::simd::simd_select_bitmask(
-                bitmask,
-                Simd::<T, M>::splat(T::TRUE),
-                Simd::<T, M>::splat(T::FALSE),
-            )
-        };
-
-        // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE`
-        unsafe { Self::from_int_unchecked(mask.resize::<N>(T::FALSE)) }
-    }
-
-    #[inline]
-    pub(crate) fn to_bitmask_integer(self) -> u64 {
-        // TODO modify simd_bitmask to zero-extend output, making this unnecessary
-        if N <= 8 {
-            // Safety: bitmask matches length
-            unsafe { self.to_bitmask_impl::<u8, 8>() as u64 }
-        } else if N <= 16 {
-            // Safety: bitmask matches length
-            unsafe { self.to_bitmask_impl::<u16, 16>() as u64 }
-        } else if N <= 32 {
-            // Safety: bitmask matches length
-            unsafe { self.to_bitmask_impl::<u32, 32>() as u64 }
-        } else {
-            // Safety: bitmask matches length
-            unsafe { self.to_bitmask_impl::<u64, 64>() }
-        }
-    }
-
-    #[inline]
-    pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
-        // TODO modify simd_bitmask_select to truncate input, making this unnecessary
-        if N <= 8 {
-            // Safety: bitmask matches length
-            unsafe { Self::from_bitmask_impl::<u8, 8>(bitmask as u8) }
-        } else if N <= 16 {
-            // Safety: bitmask matches length
-            unsafe { Self::from_bitmask_impl::<u16, 16>(bitmask as u16) }
-        } else if N <= 32 {
-            // Safety: bitmask matches length
-            unsafe { Self::from_bitmask_impl::<u32, 32>(bitmask as u32) }
-        } else {
-            // Safety: bitmask matches length
-            unsafe { Self::from_bitmask_impl::<u64, 64>(bitmask) }
-        }
-    }
-
-    #[inline]
-    #[must_use = "method returns a new bool and does not mutate the original value"]
-    pub(crate) fn any(self) -> bool {
-        // Safety: use `self` as an integer vector
-        unsafe { core::intrinsics::simd::simd_reduce_any(self.to_int()) }
-    }
-
-    #[inline]
-    #[must_use = "method returns a new bool and does not mutate the original value"]
-    pub(crate) fn all(self) -> bool {
-        // Safety: use `self` as an integer vector
-        unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) }
-    }
-}
-
-impl<T, const N: usize> From<Mask<T, N>> for Simd<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    #[inline]
-    fn from(value: Mask<T, N>) -> Self {
-        value.0
-    }
-}
-
-impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    type Output = Self;
-    #[inline]
-    fn bitand(self, rhs: Self) -> Self {
-        // Safety: `self` is an integer vector
-        unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) }
-    }
-}
-
-impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    type Output = Self;
-    #[inline]
-    fn bitor(self, rhs: Self) -> Self {
-        // Safety: `self` is an integer vector
-        unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) }
-    }
-}
-
-impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    type Output = Self;
-    #[inline]
-    fn bitxor(self, rhs: Self) -> Self {
-        // Safety: `self` is an integer vector
-        unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) }
-    }
-}
-
-impl<T, const N: usize> core::ops::Not for Mask<T, N>
-where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
-{
-    type Output = Self;
-    #[inline]
-    fn not(self) -> Self::Output {
-        Self::splat(true) ^ self
-    }
-}
--- a/library/portable-simd/crates/core_simd/src/mod.rs
+++ b/library/portable-simd/crates/core_simd/src/mod.rs
@ -5,7 +5,6 @@ mod alias;
 mod cast;
 mod fmt;
 mod iter;
-mod lane_count;
 mod masks;
 mod ops;
 mod select;
@ -27,8 +26,8 @@ pub mod simd {

    pub use crate::core_simd::alias::*;
    pub use crate::core_simd::cast::*;
-    pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
    pub use crate::core_simd::masks::*;
+    pub use crate::core_simd::select::*;
    pub use crate::core_simd::swizzle::*;
    pub use crate::core_simd::to_bytes::ToBytes;
    pub use crate::core_simd::vector::*;
--- a/library/portable-simd/crates/core_simd/src/ops.rs
+++ b/library/portable-simd/crates/core_simd/src/ops.rs
@ -1,4 +1,4 @@
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount, cmp::SimdPartialEq};
+use crate::simd::{Select, Simd, SimdElement, cmp::SimdPartialEq};
 use core::ops::{Add, Mul};
 use core::ops::{BitAnd, BitOr, BitXor};
 use core::ops::{Div, Rem, Sub};
@ -12,7 +12,6 @@ mod unary;
 impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
 where
    T: SimdElement,
-    LaneCount<N>: SupportedLaneCount,
    I: core::slice::SliceIndex<[T]>,
 {
    type Output = I::Output;
@ -25,7 +24,6 @@ where
 impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
 where
    T: SimdElement,
-    LaneCount<N>: SupportedLaneCount,
    I: core::slice::SliceIndex<[T]>,
 {
    #[inline]
@ -130,7 +128,6 @@ macro_rules! for_base_types {
                impl<const N: usize> $op<Self> for Simd<$scalar, N>
                where
                    $scalar: SimdElement,
-                    LaneCount<N>: SupportedLaneCount,
                {
                    type Output = $out;

--- a/library/portable-simd/crates/core_simd/src/ops/assign.rs
+++ b/library/portable-simd/crates/core_simd/src/ops/assign.rs
@ -21,7 +21,6 @@ macro_rules! assign_ops {
        where
            Self: $trait<U, Output = Self>,
            T: SimdElement,
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn $assign_call(&mut self, rhs: U) {
--- a/library/portable-simd/crates/core_simd/src/ops/deref.rs
+++ b/library/portable-simd/crates/core_simd/src/ops/deref.rs
@ -13,7 +13,6 @@ macro_rules! deref_lhs {
        where
            T: SimdElement,
            $simd: $trait<$simd, Output = $simd>,
-            LaneCount<N>: SupportedLaneCount,
        {
            type Output = Simd<T, N>;

@ -33,7 +32,6 @@ macro_rules! deref_rhs {
        where
            T: SimdElement,
            $simd: $trait<$simd, Output = $simd>,
-            LaneCount<N>: SupportedLaneCount,
        {
            type Output = Simd<T, N>;

@ -64,7 +62,6 @@ macro_rules! deref_ops {
            where
                T: SimdElement,
                $simd: $trait<$simd, Output = $simd>,
-                LaneCount<N>: SupportedLaneCount,
            {
                type Output = $simd;

--- a/library/portable-simd/crates/core_simd/src/ops/shift_scalar.rs
+++ b/library/portable-simd/crates/core_simd/src/ops/shift_scalar.rs
@ -1,13 +1,11 @@
 // Shift operations uniquely typically only have a scalar on the right-hand side.
 // Here, we implement shifts for scalar RHS arguments.

-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::Simd;

 macro_rules! impl_splatted_shifts {
    { impl $trait:ident :: $trait_fn:ident for $ty:ty } => {
        impl<const N: usize> core::ops::$trait<$ty> for Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
@ -17,8 +15,6 @@ macro_rules! impl_splatted_shifts {
        }

        impl<const N: usize> core::ops::$trait<&$ty> for Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
@ -28,8 +24,6 @@ macro_rules! impl_splatted_shifts {
        }

        impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            type Output = Simd<$ty, N>;
            #[inline]
@ -39,8 +33,6 @@ macro_rules! impl_splatted_shifts {
        }

        impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            type Output = Simd<$ty, N>;
            #[inline]
--- a/library/portable-simd/crates/core_simd/src/ops/unary.rs
+++ b/library/portable-simd/crates/core_simd/src/ops/unary.rs
@ -1,4 +1,4 @@
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use crate::simd::{Simd, SimdElement};
 use core::ops::{Neg, Not}; // unary ops

 macro_rules! neg {
@ -6,7 +6,6 @@ macro_rules! neg {
        $(impl<const N: usize> Neg for Simd<$scalar, N>
        where
            $scalar: SimdElement,
-            LaneCount<N>: SupportedLaneCount,
        {
            type Output = Self;

@ -40,7 +39,6 @@ macro_rules! not {
        $(impl<const N: usize> Not for Simd<$scalar, N>
        where
            $scalar: SimdElement,
-            LaneCount<N>: SupportedLaneCount,
        {
            type Output = Self;

--- a/library/portable-simd/crates/core_simd/src/select.rs
+++ b/library/portable-simd/crates/core_simd/src/select.rs
@ -1,54 +1,155 @@
-use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
+use crate::simd::{FixEndianness, Mask, MaskElement, Simd, SimdElement};

-impl<T, const N: usize> Mask<T, N>
+/// Choose elements from two vectors using a mask.
+///
+/// For each element in the mask, choose the corresponding element from `true_values` if
+/// that element mask is true, and `false_values` if that element mask is false.
+///
+/// If the mask is `u64`, it's treated as a bitmask with the least significant bit
+/// corresponding to the first element.
+///
+/// # Examples
+///
+/// ## Selecting values from `Simd`
+/// ```
+/// # #![feature(portable_simd)]
+/// # #[cfg(feature = "as_crate")] use core_simd::simd;
+/// # #[cfg(not(feature = "as_crate"))] use core::simd;
+/// # use simd::{Simd, Mask, Select};
+/// let a = Simd::from_array([0, 1, 2, 3]);
+/// let b = Simd::from_array([4, 5, 6, 7]);
+/// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
+/// let c = mask.select(a, b);
+/// assert_eq!(c.to_array(), [0, 5, 6, 3]);
+/// ```
+///
+/// ## Selecting values from `Mask`
+/// ```
+/// # #![feature(portable_simd)]
+/// # #[cfg(feature = "as_crate")] use core_simd::simd;
+/// # #[cfg(not(feature = "as_crate"))] use core::simd;
+/// # use simd::{Mask, Select};
+/// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
+/// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
+/// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
+/// let c = mask.select(a, b);
+/// assert_eq!(c.to_array(), [true, false, true, false]);
+/// ```
+///
+/// ## Selecting with a bitmask
+/// ```
+/// # #![feature(portable_simd)]
+/// # #[cfg(feature = "as_crate")] use core_simd::simd;
+/// # #[cfg(not(feature = "as_crate"))] use core::simd;
+/// # use simd::{Mask, Select};
+/// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
+/// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
+/// let mask = 0b1001;
+/// let c = mask.select(a, b);
+/// assert_eq!(c.to_array(), [true, false, true, false]);
+/// ```
+pub trait Select<T> {
+    /// Choose elements
+    fn select(self, true_values: T, false_values: T) -> T;
+}
+
+impl<T, U, const N: usize> Select<Simd<T, N>> for Mask<U, N>
 where
-    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
+    T: SimdElement,
+    U: MaskElement,
 {
-    /// Choose elements from two vectors.
-    ///
-    /// For each element in the mask, choose the corresponding element from `true_values` if
-    /// that element mask is true, and `false_values` if that element mask is false.
-    ///
-    /// # Examples
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core::simd::{Simd, Mask};
-    /// let a = Simd::from_array([0, 1, 2, 3]);
-    /// let b = Simd::from_array([4, 5, 6, 7]);
-    /// let mask = Mask::from_array([true, false, false, true]);
-    /// let c = mask.select(a, b);
-    /// assert_eq!(c.to_array(), [0, 5, 6, 3]);
-    /// ```
    #[inline]
-    #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    pub fn select<U>(self, true_values: Simd<U, N>, false_values: Simd<U, N>) -> Simd<U, N>
-    where
-        U: SimdElement<Mask = T>,
-    {
-        // Safety: The mask has been cast to a vector of integers,
-        // and the operands to select between are vectors of the same type and length.
-        unsafe { core::intrinsics::simd::simd_select(self.to_int(), true_values, false_values) }
+    fn select(self, true_values: Simd<T, N>, false_values: Simd<T, N>) -> Simd<T, N> {
+        // Safety:
+        // simd_as between masks is always safe (they're vectors of ints).
+        // simd_select uses a mask that matches the width and number of elements
+        unsafe {
+            let mask: Simd<T::Mask, N> = core::intrinsics::simd::simd_as(self.to_simd());
+            core::intrinsics::simd::simd_select(mask, true_values, false_values)
        }
-
-    /// Choose elements from two masks.
-    ///
-    /// For each element in the mask, choose the corresponding element from `true_values` if
-    /// that element mask is true, and `false_values` if that element mask is false.
-    ///
-    /// # Examples
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core::simd::Mask;
-    /// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
-    /// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
-    /// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
-    /// let c = mask.select_mask(a, b);
-    /// assert_eq!(c.to_array(), [true, false, true, false]);
-    /// ```
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original inputs"]
-    pub fn select_mask(self, true_values: Self, false_values: Self) -> Self {
-        self & true_values | !self & false_values
+    }
+}
+
+impl<T, const N: usize> Select<Simd<T, N>> for u64
+where
+    T: SimdElement,
+{
+    #[inline]
+    fn select(self, true_values: Simd<T, N>, false_values: Simd<T, N>) -> Simd<T, N> {
+        const {
+            assert!(N <= 64, "number of elements can't be greater than 64");
+        }
+
+        #[inline]
+        unsafe fn select_impl<T, U: FixEndianness, const M: usize, const N: usize>(
+            bitmask: U,
+            true_values: Simd<T, N>,
+            false_values: Simd<T, N>,
+        ) -> Simd<T, N>
+        where
+            T: SimdElement,
+        {
+            let default = true_values[0];
+            let true_values = true_values.resize::<M>(default);
+            let false_values = false_values.resize::<M>(default);
+
+            // LLVM assumes bit order should match endianness
+            let bitmask = bitmask.fix_endianness();
+
+            // Safety: the caller guarantees that the size of U matches M
+            let selected = unsafe {
+                core::intrinsics::simd::simd_select_bitmask(bitmask, true_values, false_values)
+            };
+
+            selected.resize::<N>(default)
+        }
+
+        // TODO modify simd_bitmask_select to truncate input, making this unnecessary
+        if N <= 8 {
+            let bitmask = self as u8;
+            // Safety: bitmask matches length
+            unsafe { select_impl::<T, u8, 8, N>(bitmask, true_values, false_values) }
+        } else if N <= 16 {
+            let bitmask = self as u16;
+            // Safety: bitmask matches length
+            unsafe { select_impl::<T, u16, 16, N>(bitmask, true_values, false_values) }
+        } else if N <= 32 {
+            let bitmask = self as u32;
+            // Safety: bitmask matches length
+            unsafe { select_impl::<T, u32, 32, N>(bitmask, true_values, false_values) }
+        } else {
+            let bitmask = self;
+            // Safety: bitmask matches length
+            unsafe { select_impl::<T, u64, 64, N>(bitmask, true_values, false_values) }
+        }
+    }
+}
+
+impl<T, U, const N: usize> Select<Mask<T, N>> for Mask<U, N>
+where
+    T: MaskElement,
+    U: MaskElement,
+{
+    #[inline]
+    fn select(self, true_values: Mask<T, N>, false_values: Mask<T, N>) -> Mask<T, N> {
+        let selected: Simd<T, N> =
+            Select::select(self, true_values.to_simd(), false_values.to_simd());
+
+        // Safety: all values come from masks
+        unsafe { Mask::from_simd_unchecked(selected) }
+    }
+}
+
+impl<T, const N: usize> Select<Mask<T, N>> for u64
+where
+    T: MaskElement,
+{
+    #[inline]
+    fn select(self, true_values: Mask<T, N>, false_values: Mask<T, N>) -> Mask<T, N> {
+        let selected: Simd<T, N> =
+            Select::select(self, true_values.to_simd(), false_values.to_simd());
+
+        // Safety: all values come from masks
+        unsafe { Mask::from_simd_unchecked(selected) }
    }
 }
--- a/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs
@ -1,5 +1,5 @@
 use crate::simd::{
-    LaneCount, Mask, Simd, SimdElement, SupportedLaneCount,
+    Mask, Simd, SimdElement,
    ptr::{SimdConstPtr, SimdMutPtr},
 };

@ -21,8 +21,6 @@ macro_rules! impl_number {
    { $($number:ty),* } => {
        $(
        impl<const N: usize> SimdPartialEq for Simd<$number, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            type Mask = Mask<<$number as SimdElement>::Mask, N>;

@ -30,14 +28,14 @@ macro_rules! impl_number {
            fn simd_eq(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_eq(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_eq(self, other)) }
            }

            #[inline]
            fn simd_ne(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ne(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ne(self, other)) }
            }
        }
        )*
@ -50,8 +48,6 @@ macro_rules! impl_mask {
    { $($integer:ty),* } => {
        $(
        impl<const N: usize> SimdPartialEq for Mask<$integer, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            type Mask = Self;

@ -59,14 +55,14 @@ macro_rules! impl_mask {
            fn simd_eq(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_eq(self.to_int(), other.to_int())) }
+                unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_eq(self.to_simd(), other.to_simd())) }
            }

            #[inline]
            fn simd_ne(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ne(self.to_int(), other.to_int())) }
+                unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_ne(self.to_simd(), other.to_simd())) }
            }
        }
        )*
@ -75,10 +71,7 @@ macro_rules! impl_mask {

 impl_mask! { i8, i16, i32, i64, isize }

-impl<T, const N: usize> SimdPartialEq for Simd<*const T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+impl<T, const N: usize> SimdPartialEq for Simd<*const T, N> {
    type Mask = Mask<isize, N>;

    #[inline]
@ -92,10 +85,7 @@ where
    }
 }

-impl<T, const N: usize> SimdPartialEq for Simd<*mut T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+impl<T, const N: usize> SimdPartialEq for Simd<*mut T, N> {
    type Mask = Mask<isize, N>;

    #[inline]
--- a/library/portable-simd/crates/core_simd/src/simd/cmp/ord.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/cmp/ord.rs
@ -1,5 +1,5 @@
 use crate::simd::{
-    LaneCount, Mask, Simd, SupportedLaneCount,
+    Mask, Select, Simd,
    cmp::SimdPartialEq,
    ptr::{SimdConstPtr, SimdMutPtr},
 };
@ -49,41 +49,37 @@ macro_rules! impl_integer {
    { $($integer:ty),* } => {
        $(
        impl<const N: usize> SimdPartialOrd for Simd<$integer, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn simd_lt(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
            }

            #[inline]
            fn simd_le(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_le(self, other)) }
            }

            #[inline]
            fn simd_gt(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
            }

            #[inline]
            fn simd_ge(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
            }
        }

        impl<const N: usize> SimdOrd for Simd<$integer, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn simd_max(self, other: Self) -> Self {
@ -115,35 +111,33 @@ macro_rules! impl_float {
    { $($float:ty),* } => {
        $(
        impl<const N: usize> SimdPartialOrd for Simd<$float, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn simd_lt(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
            }

            #[inline]
            fn simd_le(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_le(self, other)) }
            }

            #[inline]
            fn simd_gt(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
            }

            #[inline]
            fn simd_ge(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
+                unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
            }
        }
        )*
@ -156,50 +150,46 @@ macro_rules! impl_mask {
    { $($integer:ty),* } => {
        $(
        impl<const N: usize> SimdPartialOrd for Mask<$integer, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn simd_lt(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_lt(self.to_int(), other.to_int())) }
+                unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_lt(self.to_simd(), other.to_simd())) }
            }

            #[inline]
            fn simd_le(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_le(self.to_int(), other.to_int())) }
+                unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_le(self.to_simd(), other.to_simd())) }
            }

            #[inline]
            fn simd_gt(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_gt(self.to_int(), other.to_int())) }
+                unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_gt(self.to_simd(), other.to_simd())) }
            }

            #[inline]
            fn simd_ge(self, other: Self) -> Self::Mask {
                // Safety: `self` is a vector, and the result of the comparison
                // is always a valid mask.
-                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ge(self.to_int(), other.to_int())) }
+                unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_ge(self.to_simd(), other.to_simd())) }
            }
        }

        impl<const N: usize> SimdOrd for Mask<$integer, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            #[inline]
            fn simd_max(self, other: Self) -> Self {
-                self.simd_gt(other).select_mask(other, self)
+                self.simd_gt(other).select(other, self)
            }

            #[inline]
            fn simd_min(self, other: Self) -> Self {
-                self.simd_lt(other).select_mask(other, self)
+                self.simd_lt(other).select(other, self)
            }

            #[inline]
@ -218,10 +208,7 @@ macro_rules! impl_mask {

 impl_mask! { i8, i16, i32, i64, isize }

-impl<T, const N: usize> SimdPartialOrd for Simd<*const T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+impl<T, const N: usize> SimdPartialOrd for Simd<*const T, N> {
    #[inline]
    fn simd_lt(self, other: Self) -> Self::Mask {
        self.addr().simd_lt(other.addr())
@ -243,10 +230,7 @@ where
    }
 }

-impl<T, const N: usize> SimdOrd for Simd<*const T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+impl<T, const N: usize> SimdOrd for Simd<*const T, N> {
    #[inline]
    fn simd_max(self, other: Self) -> Self {
        self.simd_lt(other).select(other, self)
@ -268,10 +252,7 @@ where
    }
 }

-impl<T, const N: usize> SimdPartialOrd for Simd<*mut T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+impl<T, const N: usize> SimdPartialOrd for Simd<*mut T, N> {
    #[inline]
    fn simd_lt(self, other: Self) -> Self::Mask {
        self.addr().simd_lt(other.addr())
@ -293,10 +274,7 @@ where
    }
 }

-impl<T, const N: usize> SimdOrd for Simd<*mut T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+impl<T, const N: usize> SimdOrd for Simd<*mut T, N> {
    #[inline]
    fn simd_max(self, other: Self) -> Self {
        self.simd_lt(other).select(other, self)
--- a/library/portable-simd/crates/core_simd/src/simd/num/float.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/float.rs
@ -1,6 +1,6 @@
 use super::sealed::Sealed;
 use crate::simd::{
-    LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount,
+    Mask, Select, Simd, SimdCast, SimdElement,
    cmp::{SimdPartialEq, SimdPartialOrd},
 };

@ -240,15 +240,9 @@ pub trait SimdFloat: Copy + Sealed {
 macro_rules! impl_trait {
    { $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => {
        $(
-        impl<const N: usize> Sealed for Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
-        {
-        }
+        impl<const N: usize> Sealed for Simd<$ty, N> {}

        impl<const N: usize> SimdFloat for Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            type Mask = Mask<<$mask_ty as SimdElement>::Mask, N>;
            type Scalar = $ty;
--- a/library/portable-simd/crates/core_simd/src/simd/num/int.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/int.rs
@ -1,7 +1,6 @@
 use super::sealed::Sealed;
 use crate::simd::{
-    LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, cmp::SimdOrd,
-    cmp::SimdPartialOrd, num::SimdUint,
+    Mask, Select, Simd, SimdCast, SimdElement, cmp::SimdOrd, cmp::SimdPartialOrd, num::SimdUint,
 };

 /// Operations on SIMD vectors of signed integers.
@ -242,16 +241,9 @@ pub trait SimdInt: Copy + Sealed {
 macro_rules! impl_trait {
    { $($ty:ident ($unsigned:ident)),* } => {
        $(
-        impl<const N: usize> Sealed for Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
-        {
-        }
+        impl<const N: usize> Sealed for Simd<$ty, N> {}

-        impl<const N: usize> SimdInt for Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
-        {
+        impl<const N: usize> SimdInt for Simd<$ty, N> {
            type Mask = Mask<<$ty as SimdElement>::Mask, N>;
            type Scalar = $ty;
            type Unsigned = Simd<$unsigned, N>;
--- a/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
@ -1,5 +1,5 @@
 use super::sealed::Sealed;
-use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount, cmp::SimdOrd};
+use crate::simd::{Simd, SimdCast, SimdElement, cmp::SimdOrd};

 /// Operations on SIMD vectors of unsigned integers.
 pub trait SimdUint: Copy + Sealed {
@ -124,15 +124,9 @@ pub trait SimdUint: Copy + Sealed {
 macro_rules! impl_trait {
    { $($ty:ident ($signed:ident)),* } => {
        $(
-        impl<const N: usize> Sealed for Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
-        {
-        }
+        impl<const N: usize> Sealed for Simd<$ty, N> {}

        impl<const N: usize> SimdUint for Simd<$ty, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
        {
            type Scalar = $ty;
            type Cast<T: SimdElement> = Simd<T, N>;
--- a/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
@ -1,5 +1,5 @@
 use super::sealed::Sealed;
-use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint};
+use crate::simd::{Mask, Simd, cmp::SimdPartialEq, num::SimdUint};

 /// Operations on SIMD vectors of constant pointers.
 pub trait SimdConstPtr: Copy + Sealed {
@ -88,12 +88,9 @@ pub trait SimdConstPtr: Copy + Sealed {
    fn wrapping_sub(self, count: Self::Usize) -> Self;
 }

-impl<T, const N: usize> Sealed for Simd<*const T, N> where LaneCount<N>: SupportedLaneCount {}
+impl<T, const N: usize> Sealed for Simd<*const T, N> {}

-impl<T, const N: usize> SimdConstPtr for Simd<*const T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+impl<T, const N: usize> SimdConstPtr for Simd<*const T, N> {
    type Usize = Simd<usize, N>;
    type Isize = Simd<isize, N>;
    type CastPtr<U> = Simd<*const U, N>;
--- a/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
@ -1,5 +1,5 @@
 use super::sealed::Sealed;
-use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint};
+use crate::simd::{Mask, Simd, cmp::SimdPartialEq, num::SimdUint};

 /// Operations on SIMD vectors of mutable pointers.
 pub trait SimdMutPtr: Copy + Sealed {
@ -85,12 +85,9 @@ pub trait SimdMutPtr: Copy + Sealed {
    fn wrapping_sub(self, count: Self::Usize) -> Self;
 }

-impl<T, const N: usize> Sealed for Simd<*mut T, N> where LaneCount<N>: SupportedLaneCount {}
+impl<T, const N: usize> Sealed for Simd<*mut T, N> {}

-impl<T, const N: usize> SimdMutPtr for Simd<*mut T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+impl<T, const N: usize> SimdMutPtr for Simd<*mut T, N> {
    type Usize = Simd<usize, N>;
    type Isize = Simd<isize, N>;
    type CastPtr<U> = Simd<*mut U, N>;
--- a/library/portable-simd/crates/core_simd/src/swizzle.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle.rs
@ -1,4 +1,4 @@
-use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
+use crate::simd::{Mask, MaskElement, Simd, SimdElement};

 /// Constructs a new SIMD vector by copying elements from selected elements in other vectors.
 ///
@ -82,8 +82,6 @@ pub trait Swizzle<const N: usize> {
    fn swizzle<T, const M: usize>(vector: Simd<T, M>) -> Simd<T, N>
    where
        T: SimdElement,
-        LaneCount<N>: SupportedLaneCount,
-        LaneCount<M>: SupportedLaneCount,
    {
        // Safety: `vector` is a vector, and the index is a const vector of u32.
        unsafe {
@ -122,8 +120,6 @@ pub trait Swizzle<const N: usize> {
    fn concat_swizzle<T, const M: usize>(first: Simd<T, M>, second: Simd<T, M>) -> Simd<T, N>
    where
        T: SimdElement,
-        LaneCount<N>: SupportedLaneCount,
-        LaneCount<M>: SupportedLaneCount,
    {
        // Safety: `first` and `second` are vectors, and the index is a const vector of u32.
        unsafe {
@ -161,11 +157,9 @@ pub trait Swizzle<const N: usize> {
    fn swizzle_mask<T, const M: usize>(mask: Mask<T, M>) -> Mask<T, N>
    where
        T: MaskElement,
-        LaneCount<N>: SupportedLaneCount,
-        LaneCount<M>: SupportedLaneCount,
    {
        // SAFETY: all elements of this mask come from another mask
-        unsafe { Mask::from_int_unchecked(Self::swizzle(mask.to_int())) }
+        unsafe { Mask::from_simd_unchecked(Self::swizzle(mask.to_simd())) }
    }

    /// Creates a new mask from the elements of `first` and `second`.
@ -177,18 +171,17 @@ pub trait Swizzle<const N: usize> {
    fn concat_swizzle_mask<T, const M: usize>(first: Mask<T, M>, second: Mask<T, M>) -> Mask<T, N>
    where
        T: MaskElement,
-        LaneCount<N>: SupportedLaneCount,
-        LaneCount<M>: SupportedLaneCount,
    {
        // SAFETY: all elements of this mask come from another mask
-        unsafe { Mask::from_int_unchecked(Self::concat_swizzle(first.to_int(), second.to_int())) }
+        unsafe {
+            Mask::from_simd_unchecked(Self::concat_swizzle(first.to_simd(), second.to_simd()))
+        }
    }
 }

 impl<T, const N: usize> Simd<T, N>
 where
    T: SimdElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    /// Reverse the order of the elements in the vector.
    #[inline]
@ -462,10 +455,7 @@ where
    /// ```
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    pub fn resize<const M: usize>(self, value: T) -> Simd<T, M>
-    where
-        LaneCount<M>: SupportedLaneCount,
-    {
+    pub fn resize<const M: usize>(self, value: T) -> Simd<T, M> {
        struct Resize<const N: usize>;
        impl<const N: usize, const M: usize> Swizzle<M> for Resize<N> {
            const INDEX: [usize; M] = const {
@ -493,10 +483,7 @@ where
    /// ```
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    pub fn extract<const START: usize, const LEN: usize>(self) -> Simd<T, LEN>
-    where
-        LaneCount<LEN>: SupportedLaneCount,
-    {
+    pub fn extract<const START: usize, const LEN: usize>(self) -> Simd<T, LEN> {
        struct Extract<const N: usize, const START: usize>;
        impl<const N: usize, const START: usize, const LEN: usize> Swizzle<LEN> for Extract<N, START> {
            const INDEX: [usize; LEN] = const {
@ -517,14 +504,13 @@ where
 impl<T, const N: usize> Mask<T, N>
 where
    T: MaskElement,
-    LaneCount<N>: SupportedLaneCount,
 {
    /// Reverse the order of the elements in the mask.
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original inputs"]
    pub fn reverse(self) -> Self {
        // Safety: swizzles are safe for masks
-        unsafe { Self::from_int_unchecked(self.to_int().reverse()) }
+        unsafe { Self::from_simd_unchecked(self.to_simd().reverse()) }
    }

    /// Rotates the mask such that the first `OFFSET` elements of the slice move to the end
@ -534,7 +520,7 @@ where
    #[must_use = "method returns a new vector and does not mutate the original inputs"]
    pub fn rotate_elements_left<const OFFSET: usize>(self) -> Self {
        // Safety: swizzles are safe for masks
-        unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_left::<OFFSET>()) }
+        unsafe { Self::from_simd_unchecked(self.to_simd().rotate_elements_left::<OFFSET>()) }
    }

    /// Rotates the mask such that the first `self.len() - OFFSET` elements of the mask move to
@ -544,7 +530,7 @@ where
    #[must_use = "method returns a new vector and does not mutate the original inputs"]
    pub fn rotate_elements_right<const OFFSET: usize>(self) -> Self {
        // Safety: swizzles are safe for masks
-        unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_right::<OFFSET>()) }
+        unsafe { Self::from_simd_unchecked(self.to_simd().rotate_elements_right::<OFFSET>()) }
    }

    /// Shifts the mask elements to the left by `OFFSET`, filling in with
@ -554,7 +540,7 @@ where
    pub fn shift_elements_left<const OFFSET: usize>(self, padding: bool) -> Self {
        // Safety: swizzles are safe for masks
        unsafe {
-            Self::from_int_unchecked(self.to_int().shift_elements_left::<OFFSET>(if padding {
+            Self::from_simd_unchecked(self.to_simd().shift_elements_left::<OFFSET>(if padding {
                T::TRUE
            } else {
                T::FALSE
@ -569,7 +555,7 @@ where
    pub fn shift_elements_right<const OFFSET: usize>(self, padding: bool) -> Self {
        // Safety: swizzles are safe for masks
        unsafe {
-            Self::from_int_unchecked(self.to_int().shift_elements_right::<OFFSET>(if padding {
+            Self::from_simd_unchecked(self.to_simd().shift_elements_right::<OFFSET>(if padding {
                T::TRUE
            } else {
                T::FALSE
@ -598,9 +584,9 @@ where
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original inputs"]
    pub fn interleave(self, other: Self) -> (Self, Self) {
-        let (lo, hi) = self.to_int().interleave(other.to_int());
+        let (lo, hi) = self.to_simd().interleave(other.to_simd());
        // Safety: swizzles are safe for masks
-        unsafe { (Self::from_int_unchecked(lo), Self::from_int_unchecked(hi)) }
+        unsafe { (Self::from_simd_unchecked(lo), Self::from_simd_unchecked(hi)) }
    }

    /// Deinterleave two masks.
@ -627,12 +613,12 @@ where
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original inputs"]
    pub fn deinterleave(self, other: Self) -> (Self, Self) {
-        let (even, odd) = self.to_int().deinterleave(other.to_int());
+        let (even, odd) = self.to_simd().deinterleave(other.to_simd());
        // Safety: swizzles are safe for masks
        unsafe {
            (
-                Self::from_int_unchecked(even),
-                Self::from_int_unchecked(odd),
+                Self::from_simd_unchecked(even),
+                Self::from_simd_unchecked(odd),
            )
        }
    }
@ -653,13 +639,10 @@ where
    /// ```
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    pub fn resize<const M: usize>(self, value: bool) -> Mask<T, M>
-    where
-        LaneCount<M>: SupportedLaneCount,
-    {
+    pub fn resize<const M: usize>(self, value: bool) -> Mask<T, M> {
        // Safety: swizzles are safe for masks
        unsafe {
-            Mask::<T, M>::from_int_unchecked(self.to_int().resize::<M>(if value {
+            Mask::<T, M>::from_simd_unchecked(self.to_simd().resize::<M>(if value {
                T::TRUE
            } else {
                T::FALSE
@ -679,11 +662,8 @@ where
    /// ```
    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original inputs"]
-    pub fn extract<const START: usize, const LEN: usize>(self) -> Mask<T, LEN>
-    where
-        LaneCount<LEN>: SupportedLaneCount,
-    {
+    pub fn extract<const START: usize, const LEN: usize>(self) -> Mask<T, LEN> {
        // Safety: swizzles are safe for masks
-        unsafe { Mask::<T, LEN>::from_int_unchecked(self.to_int().extract::<START, LEN>()) }
+        unsafe { Mask::<T, LEN>::from_simd_unchecked(self.to_simd().extract::<START, LEN>()) }
    }
 }
--- a/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
@ -1,10 +1,7 @@
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::Simd;
 use core::mem;

-impl<const N: usize> Simd<u8, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+impl<const N: usize> Simd<u8, N> {
    /// Swizzle a vector of bytes according to the index vector.
    /// Indices within range select the appropriate byte.
    /// Indices "out of bounds" instead select 0.
@ -139,7 +136,7 @@ unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd<u8, 16>, idxs: Simd<u8, 16>) -> S
 #[inline]
 #[allow(clippy::let_and_return)]
 unsafe fn avx2_pshufb(bytes: Simd<u8, 32>, idxs: Simd<u8, 32>) -> Simd<u8, 32> {
-    use crate::simd::cmp::SimdPartialOrd;
+    use crate::simd::{Select, cmp::SimdPartialOrd};
    #[cfg(target_arch = "x86")]
    use core::arch::x86;
    #[cfg(target_arch = "x86_64")]
@ -184,10 +181,7 @@ unsafe fn transize<T, const N: usize>(
    f: unsafe fn(T, T) -> T,
    a: Simd<u8, N>,
    b: Simd<u8, N>,
-) -> Simd<u8, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+) -> Simd<u8, N> {
    // SAFETY: Same obligation to use this function as to use mem::transmute_copy.
    unsafe { mem::transmute_copy(&f(mem::transmute_copy(&a), mem::transmute_copy(&b))) }
 }
@ -196,11 +190,8 @@ where
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[allow(unused)]
 #[inline(always)]
-fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
-    use crate::simd::cmp::SimdPartialOrd;
+fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N> {
+    use crate::simd::{Select, cmp::SimdPartialOrd};
    idxs.simd_lt(Simd::splat(N as u8))
        .select(idxs, Simd::splat(u8::MAX))
 }
--- a/library/portable-simd/crates/core_simd/src/to_bytes.rs
+++ b/library/portable-simd/crates/core_simd/src/to_bytes.rs
@ -1,12 +1,12 @@
 use crate::simd::{
-    LaneCount, Simd, SimdElement, SupportedLaneCount,
+    Simd, SimdElement,
    num::{SimdFloat, SimdInt, SimdUint},
 };

 mod sealed {
    use super::*;
    pub trait Sealed {}
-    impl<T: SimdElement, const N: usize> Sealed for Simd<T, N> where LaneCount<N>: SupportedLaneCount {}
+    impl<T: SimdElement, const N: usize> Sealed for Simd<T, N> {}
 }
 use sealed::Sealed;

--- a/library/portable-simd/crates/core_simd/src/vector.rs
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@ -1,5 +1,7 @@
+use core::intrinsics::simd::SimdAlign;
+
 use crate::simd::{
-    LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle,
+    Mask, MaskElement,
    cmp::SimdPartialOrd,
    num::SimdUint,
    ptr::{SimdConstPtr, SimdMutPtr},
@ -51,6 +53,8 @@ use crate::simd::{
 /// Thus it is sound to [`transmute`] `Simd<T, N>` to `[T; N]` and should optimize to "zero cost",
 /// but the reverse transmutation may require a copy the compiler cannot simply elide.
 ///
+/// `N` cannot be 0 and may be at most 64. This limit may be increased in the future.
+///
 /// # ABI "Features"
 /// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed and returned via memory,
 /// not SIMD registers, except as an optimization. Using `#[inline]` on functions that accept
@ -100,14 +104,13 @@ use crate::simd::{
 // avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also
 // causes rustc to emit illegal LLVM IR in some cases.
 #[repr(simd, packed)]
+#[rustc_simd_monomorphize_lane_limit = "64"]
 pub struct Simd<T, const N: usize>([T; N])
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement;

 impl<T, const N: usize> Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    /// Number of elements in this vector.
@ -146,30 +149,8 @@ where
    #[inline]
    #[rustc_const_unstable(feature = "portable_simd", issue = "86656")]
    pub const fn splat(value: T) -> Self {
-        const fn splat_const<T, const N: usize>(value: T) -> Simd<T, N>
-        where
-            T: SimdElement,
-            LaneCount<N>: SupportedLaneCount,
-        {
-            Simd::from_array([value; N])
-        }
-
-        fn splat_rt<T, const N: usize>(value: T) -> Simd<T, N>
-        where
-            T: SimdElement,
-            LaneCount<N>: SupportedLaneCount,
-        {
-            // This is preferred over `[value; N]`, since it's explicitly a splat:
-            // https://github.com/rust-lang/rust/issues/97804
-            struct Splat;
-            impl<const N: usize> Swizzle<N> for Splat {
-                const INDEX: [usize; N] = [0; N];
-            }
-
-            Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
-        }
-
-        core::intrinsics::const_eval_select((value,), splat_const, splat_rt)
+        // SAFETY: T is a SimdElement, and the item type of Self.
+        unsafe { core::intrinsics::simd::simd_splat(value) }
    }

    /// Returns an array reference containing the entire SIMD vector.
@ -195,7 +176,7 @@ where

    /// Returns a mutable array reference containing the entire SIMD vector.
    #[inline]
-    pub fn as_mut_array(&mut self) -> &mut [T; N] {
+    pub const fn as_mut_array(&mut self) -> &mut [T; N] {
        // SAFETY: `Simd<T, N>` is just an overaligned `[T; N]` with
        // potential padding at the end, so pointer casting to a
        // `&mut [T; N]` is safe.
@ -324,7 +305,7 @@ where
    /// ```
    #[inline]
    #[track_caller]
-    pub fn copy_to_slice(self, slice: &mut [T]) {
+    pub const fn copy_to_slice(self, slice: &mut [T]) {
        assert!(
            slice.len() >= Self::LEN,
            "slice length must be at least the number of elements"
@ -465,7 +446,7 @@ where
    /// value from `or` is passed through.
    ///
    /// # Safety
-    /// Enabled `ptr` elements must be safe to read as if by `std::ptr::read`.
+    /// Enabled `ptr` elements must be safe to read as if by `core::ptr::read`.
    #[must_use]
    #[inline]
    pub unsafe fn load_select_ptr(
@ -475,12 +456,11 @@ where
    ) -> Self {
        // SAFETY: The safety of reading elements through `ptr` is ensured by the caller.
        unsafe {
-            core::intrinsics::simd::simd_masked_load::<
-                _,
-                _,
-                _,
-                { core::intrinsics::simd::SimdAlign::Element },
-            >(enable.to_int(), ptr, or)
+            core::intrinsics::simd::simd_masked_load::<_, _, _, { SimdAlign::Element }>(
+                enable.to_simd(),
+                ptr,
+                or,
+            )
        }
    }

@ -659,7 +639,7 @@ where
        or: Self,
    ) -> Self {
        // Safety: The caller is responsible for upholding all invariants
-        unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) }
+        unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_simd()) }
    }

    /// Conditionally write contiguous elements to `slice`. The `enable` mask controls
@ -731,12 +711,11 @@ where
    pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<<T as SimdElement>::Mask, N>) {
        // SAFETY: The safety of writing elements through `ptr` is ensured by the caller.
        unsafe {
-            core::intrinsics::simd::simd_masked_store::<
-                _,
-                _,
-                _,
-                { core::intrinsics::simd::SimdAlign::Element },
-            >(enable.to_int(), ptr, self)
+            core::intrinsics::simd::simd_masked_store::<_, _, _, { SimdAlign::Element }>(
+                enable.to_simd(),
+                ptr,
+                self,
+            )
        }
    }

@ -896,20 +875,14 @@ where
    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
    pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask<isize, N>) {
        // Safety: The caller is responsible for upholding all invariants
-        unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_int()) }
+        unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_simd()) }
    }
 }

-impl<T, const N: usize> Copy for Simd<T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-    T: SimdElement,
-{
-}
+impl<T, const N: usize> Copy for Simd<T, N> where T: SimdElement {}

 impl<T, const N: usize> Clone for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    #[inline]
@ -920,7 +893,6 @@ where

 impl<T, const N: usize> Default for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement + Default,
 {
    #[inline]
@ -931,7 +903,6 @@ where

 impl<T, const N: usize> PartialEq for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement + PartialEq,
 {
    #[inline]
@ -940,7 +911,7 @@ where
        let mask = unsafe {
            let tfvec: Simd<<T as SimdElement>::Mask, N> =
                core::intrinsics::simd::simd_eq(*self, *other);
-            Mask::from_int_unchecked(tfvec)
+            Mask::from_simd_unchecked(tfvec)
        };

        // Two vectors are equal if all elements are equal when compared elementwise
@ -954,7 +925,7 @@ where
        let mask = unsafe {
            let tfvec: Simd<<T as SimdElement>::Mask, N> =
                core::intrinsics::simd::simd_ne(*self, *other);
-            Mask::from_int_unchecked(tfvec)
+            Mask::from_simd_unchecked(tfvec)
        };

        // Two vectors are non-equal if any elements are non-equal when compared elementwise
@ -965,7 +936,6 @@ where
 /// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
 impl<T, const N: usize> PartialOrd for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement + PartialOrd,
 {
    #[inline]
@ -975,17 +945,11 @@ where
    }
 }

-impl<T, const N: usize> Eq for Simd<T, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-    T: SimdElement + Eq,
-{
-}
+impl<T, const N: usize> Eq for Simd<T, N> where T: SimdElement + Eq {}

 /// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
 impl<T, const N: usize> Ord for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement + Ord,
 {
    #[inline]
@ -997,7 +961,6 @@ where

 impl<T, const N: usize> core::hash::Hash for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement + core::hash::Hash,
 {
    #[inline]
@ -1012,7 +975,6 @@ where
 // array references
 impl<T, const N: usize> AsRef<[T; N]> for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    #[inline]
@ -1023,7 +985,6 @@ where

 impl<T, const N: usize> AsMut<[T; N]> for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    #[inline]
@ -1035,7 +996,6 @@ where
 // slice references
 impl<T, const N: usize> AsRef<[T]> for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    #[inline]
@ -1046,7 +1006,6 @@ where

 impl<T, const N: usize> AsMut<[T]> for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    #[inline]
@ -1058,7 +1017,6 @@ where
 // vector/array conversion
 impl<T, const N: usize> From<[T; N]> for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    #[inline]
@ -1069,7 +1027,6 @@ where

 impl<T, const N: usize> From<Simd<T, N>> for [T; N]
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    #[inline]
@ -1080,7 +1037,6 @@ where

 impl<T, const N: usize> TryFrom<&[T]> for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    type Error = core::array::TryFromSliceError;
@ -1093,7 +1049,6 @@ where

 impl<T, const N: usize> TryFrom<&mut [T]> for Simd<T, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
    type Error = core::array::TryFromSliceError;
@ -1231,10 +1186,7 @@ where
 }

 #[inline]
-fn lane_indices<const N: usize>() -> Simd<usize, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
+fn lane_indices<const N: usize>() -> Simd<usize, N> {
    #![allow(clippy::needless_range_loop)]
    let mut index = [0; N];
    for i in 0..N {
@ -1246,7 +1198,6 @@ where
 #[inline]
 fn mask_up_to<M, const N: usize>(len: usize) -> Mask<M, N>
 where
-    LaneCount<N>: SupportedLaneCount,
    M: MaskElement,
 {
    let index = lane_indices::<N>();
--- a/library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs
@ -1,31 +1,26 @@
 use crate::simd::*;
 use core::arch::loongarch64::*;

-from_transmute! { unsafe u8x16 => v16u8 }
-from_transmute! { unsafe u8x32 => v32u8 }
-from_transmute! { unsafe i8x16 => v16i8 }
-from_transmute! { unsafe i8x32 => v32i8 }
+from_transmute! { unsafe u8x16 => m128i }
+from_transmute! { unsafe u8x32 => m256i }
+from_transmute! { unsafe i8x16 => m128i }
+from_transmute! { unsafe i8x32 => m256i }

-from_transmute! { unsafe u16x8 => v8u16 }
-from_transmute! { unsafe u16x16 => v16u16 }
-from_transmute! { unsafe i16x8 => v8i16 }
-from_transmute! { unsafe i16x16 => v16i16 }
+from_transmute! { unsafe u16x8 => m128i }
+from_transmute! { unsafe u16x16 => m256i }
+from_transmute! { unsafe i16x8 => m128i }
+from_transmute! { unsafe i16x16 => m256i }

-from_transmute! { unsafe u32x4 => v4u32 }
-from_transmute! { unsafe u32x8 => v8u32 }
-from_transmute! { unsafe i32x4 => v4i32 }
-from_transmute! { unsafe i32x8 => v8i32 }
-from_transmute! { unsafe f32x4 => v4f32 }
-from_transmute! { unsafe f32x8 => v8f32 }
+from_transmute! { unsafe u32x4 => m128i }
+from_transmute! { unsafe u32x8 => m256i }
+from_transmute! { unsafe i32x4 => m128i }
+from_transmute! { unsafe i32x8 => m256i }
+from_transmute! { unsafe f32x4 => m128 }
+from_transmute! { unsafe f32x8 => m256 }

-from_transmute! { unsafe u64x2 => v2u64 }
-from_transmute! { unsafe u64x4 => v4u64 }
-from_transmute! { unsafe i64x2 => v2i64 }
-from_transmute! { unsafe i64x4 => v4i64 }
-from_transmute! { unsafe f64x2 => v2f64 }
-from_transmute! { unsafe f64x4 => v4f64 }
-
-from_transmute! { unsafe usizex2 => v2u64 }
-from_transmute! { unsafe usizex4 => v4u64 }
-from_transmute! { unsafe isizex2 => v2i64 }
-from_transmute! { unsafe isizex4 => v4i64 }
+from_transmute! { unsafe u64x2 => m128i }
+from_transmute! { unsafe u64x4 => m256i }
+from_transmute! { unsafe i64x2 => m128i }
+from_transmute! { unsafe i64x4 => m256i }
+from_transmute! { unsafe f64x2 => m128d }
+from_transmute! { unsafe f64x4 => m256d }
--- a/library/portable-simd/crates/core_simd/src/vendor/wasm32.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor/wasm32.rs
@ -14,17 +14,3 @@ from_transmute! { unsafe f32x4 => v128 }
 from_transmute! { unsafe u64x2 => v128 }
 from_transmute! { unsafe i64x2 => v128 }
 from_transmute! { unsafe f64x2 => v128 }
-
-#[cfg(target_pointer_width = "32")]
-mod p32 {
-    use super::*;
-    from_transmute! { unsafe usizex4 => v128 }
-    from_transmute! { unsafe isizex4 => v128 }
-}
-
-#[cfg(target_pointer_width = "64")]
-mod p64 {
-    use super::*;
-    from_transmute! { unsafe usizex2 => v128 }
-    from_transmute! { unsafe isizex2 => v128 }
-}
--- a/library/portable-simd/crates/core_simd/src/vendor/x86.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor/x86.rs
@ -39,25 +39,3 @@ from_transmute! { unsafe i64x8 => __m512i }
 from_transmute! { unsafe f64x2 => __m128d }
 from_transmute! { unsafe f64x4 => __m256d }
 from_transmute! { unsafe f64x8 => __m512d }
-
-#[cfg(target_pointer_width = "32")]
-mod p32 {
-    use super::*;
-    from_transmute! { unsafe usizex4 => __m128i }
-    from_transmute! { unsafe usizex8 => __m256i }
-    from_transmute! { unsafe Simd<usize, 16> => __m512i }
-    from_transmute! { unsafe isizex4 => __m128i }
-    from_transmute! { unsafe isizex8 => __m256i }
-    from_transmute! { unsafe Simd<isize, 16> => __m512i }
-}
-
-#[cfg(target_pointer_width = "64")]
-mod p64 {
-    use super::*;
-    from_transmute! { unsafe usizex2 => __m128i }
-    from_transmute! { unsafe usizex4 => __m256i }
-    from_transmute! { unsafe usizex8 => __m512i }
-    from_transmute! { unsafe isizex2 => __m128i }
-    from_transmute! { unsafe isizex4 => __m256i }
-    from_transmute! { unsafe isizex8 => __m512i }
-}
--- a/library/portable-simd/crates/core_simd/tests/masks.rs
+++ b/library/portable-simd/crates/core_simd/tests/masks.rs
@ -65,9 +65,9 @@ macro_rules! test_mask_api {
            fn roundtrip_int_conversion() {
                let values = [true, false, false, true, false, false, true, false];
                let mask = Mask::<$type, 8>::from_array(values);
-                let int = mask.to_int();
+                let int = mask.to_simd();
                assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]);
-                assert_eq!(Mask::<$type, 8>::from_int(int), mask);
+                assert_eq!(Mask::<$type, 8>::from_simd(int), mask);
            }

            #[test]
--- a/library/portable-simd/crates/std_float/src/lib.rs
+++ b/library/portable-simd/crates/std_float/src/lib.rs
@ -11,7 +11,7 @@ use core_simd::simd;

 use core::intrinsics::simd as intrinsics;

-use simd::{LaneCount, Simd, SupportedLaneCount};
+use simd::Simd;

 #[cfg(feature = "as_crate")]
 mod experimental {
@ -66,28 +66,43 @@ pub trait StdFloat: Sealed + Sized {

    /// Produces a vector where every element has the sine of the value
    /// in the equivalently-indexed element in `self`.
+    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn sin(self) -> Self;
+    fn sin(self) -> Self {
+        unsafe { intrinsics::simd_fsin(self) }
+    }

    /// Produces a vector where every element has the cosine of the value
    /// in the equivalently-indexed element in `self`.
+    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn cos(self) -> Self;
+    fn cos(self) -> Self {
+        unsafe { intrinsics::simd_fcos(self) }
+    }

    /// Produces a vector where every element has the exponential (base e) of the value
    /// in the equivalently-indexed element in `self`.
+    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn exp(self) -> Self;
+    fn exp(self) -> Self {
+        unsafe { intrinsics::simd_fexp(self) }
+    }

    /// Produces a vector where every element has the exponential (base 2) of the value
    /// in the equivalently-indexed element in `self`.
+    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn exp2(self) -> Self;
+    fn exp2(self) -> Self {
+        unsafe { intrinsics::simd_fexp2(self) }
+    }

    /// Produces a vector where every element has the natural logarithm of the value
    /// in the equivalently-indexed element in `self`.
+    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn ln(self) -> Self;
+    fn ln(self) -> Self {
+        unsafe { intrinsics::simd_flog(self) }
+    }

    /// Produces a vector where every element has the logarithm with respect to an arbitrary
    /// in the equivalently-indexed elements in `self` and `base`.
@ -99,13 +114,19 @@ pub trait StdFloat: Sealed + Sized {

    /// Produces a vector where every element has the base-2 logarithm of the value
    /// in the equivalently-indexed element in `self`.
+    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn log2(self) -> Self;
+    fn log2(self) -> Self {
+        unsafe { intrinsics::simd_flog2(self) }
+    }

    /// Produces a vector where every element has the base-10 logarithm of the value
    /// in the equivalently-indexed element in `self`.
+    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn log10(self) -> Self;
+    fn log10(self) -> Self {
+        unsafe { intrinsics::simd_flog10(self) }
+    }

    /// Returns the smallest integer greater than or equal to each element.
    #[must_use = "method returns a new vector and does not mutate the original value"]
@ -140,68 +161,19 @@ pub trait StdFloat: Sealed + Sized {
    fn fract(self) -> Self;
 }

-impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
-impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
+impl<const N: usize> Sealed for Simd<f32, N> {}
+impl<const N: usize> Sealed for Simd<f64, N> {}

-macro_rules! impl_float {
-    {
-        $($fn:ident: $intrinsic:ident,)*
-    } => {
-        impl<const N: usize> StdFloat for Simd<f32, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
-        {
+impl<const N: usize> StdFloat for Simd<f32, N> {
    #[inline]
    fn fract(self) -> Self {
        self - self.trunc()
    }
+}

-            $(
-            #[inline]
-            fn $fn(self) -> Self {
-                unsafe { intrinsics::$intrinsic(self) }
-            }
-            )*
-        }
-
-        impl<const N: usize> StdFloat for Simd<f64, N>
-        where
-            LaneCount<N>: SupportedLaneCount,
-        {
+impl<const N: usize> StdFloat for Simd<f64, N> {
    #[inline]
    fn fract(self) -> Self {
        self - self.trunc()
    }
-
-            $(
-            #[inline]
-            fn $fn(self) -> Self {
-                // https://github.com/llvm/llvm-project/issues/83729
-                #[cfg(target_arch = "aarch64")]
-                {
-                    let mut ln = Self::splat(0f64);
-                    for i in 0..N {
-                        ln[i] = self[i].$fn()
-                    }
-                    ln
-                }
-
-                #[cfg(not(target_arch = "aarch64"))]
-                {
-                    unsafe { intrinsics::$intrinsic(self) }
-                }
-            }
-            )*
-        }
-    }
-}
-
-impl_float! {
-    sin: simd_fsin,
-    cos: simd_fcos,
-    exp: simd_fexp,
-    exp2: simd_fexp2,
-    ln: simd_flog,
-    log2: simd_flog2,
-    log10: simd_flog10,
 }
--- a/library/portable-simd/crates/std_float/tests/float.rs
+++ b/library/portable-simd/crates/std_float/tests/float.rs
@ -16,15 +16,33 @@ macro_rules! unary_test {
    }
 }

-macro_rules! binary_test {
+macro_rules! unary_approx_test {
    { $scalar:tt, $($func:tt),+ } => {
        test_helpers::test_lanes! {
            $(
            fn $func<const LANES: usize>() {
-                test_helpers::test_binary_elementwise(
+                test_helpers::test_unary_elementwise_approx(
+                    &core_simd::simd::Simd::<$scalar, LANES>::$func,
+                    &$scalar::$func,
+                    &|_| true,
+                    8,
+                )
+            }
+            )*
+        }
+    }
+}
+
+macro_rules! binary_approx_test {
+    { $scalar:tt, $($func:tt),+ } => {
+        test_helpers::test_lanes! {
+            $(
+            fn $func<const LANES: usize>() {
+                test_helpers::test_binary_elementwise_approx(
                    &core_simd::simd::Simd::<$scalar, LANES>::$func,
                    &$scalar::$func,
                    &|_, _| true,
+                    16,
                )
            }
            )*
@ -53,10 +71,13 @@ macro_rules! impl_tests {
        mod $scalar {
            use std_float::StdFloat;

-            unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc }
-            binary_test! { $scalar, log }
+            unary_test! { $scalar, sqrt, ceil, floor, round, trunc }
            ternary_test! { $scalar, mul_add }

+            // https://github.com/rust-lang/miri/issues/3555
+            unary_approx_test! { $scalar, sin, cos, exp, exp2, ln, log2, log10 }
+            binary_approx_test! { $scalar, log }
+
            test_helpers::test_lanes! {
                fn fract<const LANES: usize>() {
                    test_helpers::test_unary_elementwise_flush_subnormals(
--- a/library/portable-simd/crates/test_helpers/Cargo.toml
+++ b/library/portable-simd/crates/test_helpers/Cargo.toml
@ -6,3 +6,4 @@ publish = false

 [dependencies]
 proptest = { version = "0.10", default-features = false, features = ["alloc"] }
+float-cmp = "0.10"
--- a/library/portable-simd/crates/test_helpers/src/approxeq.rs
+++ b/library/portable-simd/crates/test_helpers/src/approxeq.rs
@ -0,0 +1,110 @@
+//! Compare numeric types approximately.
+
+use float_cmp::Ulps;
+
+pub trait ApproxEq {
+    fn approxeq(&self, other: &Self, _ulps: i64) -> bool;
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result;
+}
+
+impl ApproxEq for bool {
+    fn approxeq(&self, other: &Self, _ulps: i64) -> bool {
+        self == other
+    }
+
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+macro_rules! impl_integer_approxeq {
+    { $($type:ty),* } => {
+        $(
+        impl ApproxEq for $type {
+            fn approxeq(&self, other: &Self, _ulps: i64) -> bool {
+                self == other
+            }
+
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                write!(f, "{:?} ({:x})", self, self)
+            }
+        }
+        )*
+    };
+}
+
+impl_integer_approxeq! { u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize }
+
+macro_rules! impl_float_approxeq {
+    { $($type:ty),* } => {
+        $(
+        impl ApproxEq for $type {
+            fn approxeq(&self, other: &Self, ulps: i64) -> bool {
+                if self.is_nan() && other.is_nan() {
+                    true
+                } else {
+                    (self.ulps(other) as i64).abs() <= ulps
+                }
+            }
+
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                write!(f, "{:?} ({:x})", self, self.to_bits())
+            }
+        }
+        )*
+    };
+}
+
+impl_float_approxeq! { f32, f64 }
+
+impl<T: ApproxEq, const N: usize> ApproxEq for [T; N] {
+    fn approxeq(&self, other: &Self, ulps: i64) -> bool {
+        self.iter()
+            .zip(other.iter())
+            .fold(true, |value, (left, right)| {
+                value && left.approxeq(right, ulps)
+            })
+    }
+
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        #[repr(transparent)]
+        struct Wrapper<'a, T: ApproxEq>(&'a T);
+
+        impl<T: ApproxEq> core::fmt::Debug for Wrapper<'_, T> {
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                self.0.fmt(f)
+            }
+        }
+
+        f.debug_list()
+            .entries(self.iter().map(|x| Wrapper(x)))
+            .finish()
+    }
+}
+
+#[doc(hidden)]
+pub struct ApproxEqWrapper<'a, T>(pub &'a T, pub i64);
+
+impl<T: ApproxEq> PartialEq<T> for ApproxEqWrapper<'_, T> {
+    fn eq(&self, other: &T) -> bool {
+        self.0.approxeq(other, self.1)
+    }
+}
+
+impl<T: ApproxEq> core::fmt::Debug for ApproxEqWrapper<'_, T> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+#[macro_export]
+macro_rules! prop_assert_approxeq {
+    { $a:expr, $b:expr, $ulps:expr $(,)? } => {
+        {
+            use $crate::approxeq::ApproxEqWrapper;
+            let a = $a;
+            let b = $b;
+            proptest::prop_assert_eq!(ApproxEqWrapper(&a, $ulps), b);
+        }
+    };
+}
--- a/library/portable-simd/crates/test_helpers/src/lib.rs
+++ b/library/portable-simd/crates/test_helpers/src/lib.rs
@ -12,6 +12,9 @@ pub mod wasm;
 #[macro_use]
 pub mod biteq;

+#[macro_use]
+pub mod approxeq;
+
 pub mod subnormals;
 use subnormals::FlushSubnormals;

@ -185,6 +188,41 @@ pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const
    });
 }

+/// Test a unary vector function against a unary scalar function, applied elementwise.
+///
+/// Floats are checked approximately.
+pub fn test_unary_elementwise_approx<
+    Scalar,
+    ScalarResult,
+    Vector,
+    VectorResult,
+    const LANES: usize,
+>(
+    fv: &dyn Fn(Vector) -> VectorResult,
+    fs: &dyn Fn(Scalar) -> ScalarResult,
+    check: &dyn Fn([Scalar; LANES]) -> bool,
+    ulps: i64,
+) where
+    Scalar: Copy + core::fmt::Debug + DefaultStrategy,
+    ScalarResult: Copy + approxeq::ApproxEq + core::fmt::Debug + DefaultStrategy,
+    Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy,
+    VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
+{
+    test_1(&|x: [Scalar; LANES]| {
+        proptest::prop_assume!(check(x));
+        let result_1: [ScalarResult; LANES] = fv(x.into()).into();
+        let result_2: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .map(fs)
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        crate::prop_assert_approxeq!(result_1, result_2, ulps);
+        Ok(())
+    });
+}
+
 /// Test a unary vector function against a unary scalar function, applied elementwise.
 ///
 /// Where subnormals are flushed, use approximate equality.
@ -290,6 +328,44 @@ pub fn test_binary_elementwise<
    });
 }

+/// Test a binary vector function against a binary scalar function, applied elementwise.
+pub fn test_binary_elementwise_approx<
+    Scalar1,
+    Scalar2,
+    ScalarResult,
+    Vector1,
+    Vector2,
+    VectorResult,
+    const LANES: usize,
+>(
+    fv: &dyn Fn(Vector1, Vector2) -> VectorResult,
+    fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult,
+    check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool,
+    ulps: i64,
+) where
+    Scalar1: Copy + core::fmt::Debug + DefaultStrategy,
+    Scalar2: Copy + core::fmt::Debug + DefaultStrategy,
+    ScalarResult: Copy + approxeq::ApproxEq + core::fmt::Debug + DefaultStrategy,
+    Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
+    Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
+    VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
+{
+    test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| {
+        proptest::prop_assume!(check(x, y));
+        let result_1: [ScalarResult; LANES] = fv(x.into(), y.into()).into();
+        let result_2: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .zip(y.iter().copied())
+            .map(|(x, y)| fs(x, y))
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        crate::prop_assert_approxeq!(result_1, result_2, ulps);
+        Ok(())
+    });
+}
+
 /// Test a binary vector function against a binary scalar function, applied elementwise.
 ///
 /// Where subnormals are flushed, use approximate equality.
@ -528,8 +604,6 @@ macro_rules! test_lanes {
                use super::*;

                fn implementation<const $lanes: usize>()
-                where
-                    core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount,
                $body

                #[cfg(target_arch = "wasm32")]
@ -628,8 +702,6 @@ macro_rules! test_lanes_panic {
                use super::*;

                fn implementation<const $lanes: usize>()
-                where
-                    core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount,
                $body

                // test some odd and even non-power-of-2 lengths on miri
--- a/library/portable-simd/rust-toolchain.toml
+++ b/library/portable-simd/rust-toolchain.toml
@ -1,3 +1,3 @@
 [toolchain]
-channel = "nightly-2025-01-16"
+channel = "nightly-2026-01-26"
 components = ["rustfmt", "clippy", "miri", "rust-src"]