Merge commit 'd9aae8cc54' into sync-from-portable-simd-2026-01-28

This commit is contained in:
Caleb Zulawski 2026-01-28 00:56:52 -05:00
commit 0eaef59233
40 changed files with 879 additions and 1193 deletions

View file

@ -59,7 +59,7 @@ jobs:
strategy:
fail-fast: false
matrix:
target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu]
target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, x86_64-unknown-linux-gnu]
# `default` means we use the default target config for the target,
# `native` means we run with `-Ctarget-cpu=native`, and anything else is
# an arg to `-Ctarget-feature`
@ -68,18 +68,12 @@ jobs:
exclude:
# -Ctarget-cpu=native sounds like bad-news if target != host
- { target: i686-pc-windows-msvc, target_feature: native }
- { target: i586-pc-windows-msvc, target_feature: native }
include:
# Populate the `matrix.os` field
- { target: x86_64-unknown-linux-gnu, os: ubuntu-latest }
- { target: x86_64-pc-windows-msvc, os: windows-latest }
- { target: i686-pc-windows-msvc, os: windows-latest }
- { target: i586-pc-windows-msvc, os: windows-latest }
# These are globally available on all the other targets.
- { target: i586-pc-windows-msvc, target_feature: +sse, os: windows-latest }
- { target: i586-pc-windows-msvc, target_feature: +sse2, os: windows-latest }
# Annoyingly, the x86_64-unknown-linux-gnu runner *almost* always has
# avx512vl, but occasionally doesn't. Maybe one day we can enable it.
@ -129,7 +123,7 @@ jobs:
run: cargo doc --verbose --target=${{ matrix.target }}
env:
RUSTDOCFLAGS: -Dwarnings
macos-tests:
name: ${{ matrix.target }}
runs-on: macos-latest
@ -246,9 +240,18 @@ jobs:
miri:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
shard: [1, 2, 3, 4]
env:
PROPTEST_CASES: 16
steps:
- uses: actions/checkout@v4
- name: Test (Miri)
run: cargo miri test
- name: Install cargo-nextest
uses: taiki-e/install-action@nextest
- name: Test (Miri) (partition ${{ matrix.shard }}/4)
run: |
cargo miri nextest run --partition count:${{ matrix.shard }}/4

View file

@ -1,12 +1,12 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "autocfg"
version = "1.1.0"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bitflags"
@ -16,31 +16,30 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bumpalo"
version = "3.13.0"
version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]]
name = "byteorder"
version = "1.4.3"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.2.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f"
dependencies = [
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "console_error_panic_hook"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
dependencies = [
"cfg-if",
"wasm-bindgen",
]
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
[[package]]
name = "core_simd"
@ -54,46 +53,69 @@ dependencies = [
]
[[package]]
name = "js-sys"
version = "0.3.64"
name = "float-cmp"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8"
dependencies = [
"num-traits",
]
[[package]]
name = "js-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "log"
version = "0.4.20"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "minicov"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b"
dependencies = [
"cc",
"walkdir",
]
[[package]]
name = "num-traits"
version = "0.2.16"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.18.0"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "ppv-lite86"
version = "0.2.17"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.66"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [
"unicode-ident",
]
@ -114,9 +136,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.33"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
@ -167,10 +189,25 @@ dependencies = [
]
[[package]]
name = "scoped-tls"
version = "1.0.1"
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "std_float"
@ -184,9 +221,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.29"
version = "2.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a"
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
dependencies = [
"proc-macro2",
"quote",
@ -197,34 +234,46 @@ dependencies = [
name = "test_helpers"
version = "0.1.0"
dependencies = [
"float-cmp",
"proptest",
]
[[package]]
name = "unicode-ident"
version = "1.0.11"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.87"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.87"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
@ -233,21 +282,22 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.37"
version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
"cfg-if",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.87"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@ -255,9 +305,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.87"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
@ -268,19 +318,21 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.87"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]]
name = "wasm-bindgen-test"
version = "0.3.37"
version = "0.3.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671"
checksum = "66c8d5e33ca3b6d9fa3b4676d774c5778031d27a578c2b007f905acf816152c3"
dependencies = [
"console_error_panic_hook",
"js-sys",
"scoped-tls",
"minicov",
"wasm-bindgen",
"wasm-bindgen-futures",
"wasm-bindgen-test-macro",
@ -288,20 +340,123 @@ dependencies = [
[[package]]
name = "wasm-bindgen-test-macro"
version = "0.3.37"
version = "0.3.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575"
checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "web-sys"
version = "0.3.64"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "zerocopy"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View file

@ -25,7 +25,7 @@ SIMD has a few special vocabulary terms you should know:
* **Scalar:** "Scalar" in mathematical contexts refers to values that can be represented as a single element, mostly numbers like 6, 3.14, or -2. It can also be used to describe "scalar operations" that use strictly scalar values, like addition. This term is mostly used to differentiate between vectorized operations that use SIMD instructions and scalar operations that don't.
* **Lane:** A single element position within a vector is called a lane. If you have `N` lanes available then they're numbered from `0` to `N-1` when referring to them, again like an array. The biggest difference between an array element and a vector lane is that in general is *relatively costly* to access an individual lane value. On most architectures, the vector has to be pushed out of the SIMD register onto the stack, then an individual lane is accessed while it's on the stack (and possibly the stack value is read back into a register). For this reason, when working with SIMD you should avoid reading or writing the value of an individual lane during hot loops.
* **Lane:** A single element position within a vector is called a lane. If you have `N` lanes available then they're numbered from `0` to `N-1` when referring to them, again like an array. The biggest difference between an array element and a vector lane is that in general it is *relatively costly* to access an individual lane value. On most architectures, the vector has to be pushed out of the SIMD register onto the stack, then an individual lane is accessed while it's on the stack (and possibly the stack value is read back into a register). For this reason, when working with SIMD you should avoid reading or writing the value of an individual lane during hot loops.
* **Bit Widths:** When talking about SIMD, the bit widths used are the bit size of the vectors involved, *not* the individual elements. So "128-bit SIMD" has 128-bit vectors, and that might be `f32x4`, `i32x4`, `i16x8`, or other variations. While 128-bit SIMD is the most common, there's also 64-bit, 256-bit, and even 512-bit on the newest CPUs.

View file

@ -1,8 +1,6 @@
//! Code taken from the `packed_simd` crate.
//! Run this code with `cargo test --example dot_product`.
#![feature(array_chunks)]
#![feature(slice_as_chunks)]
// Add these imports to use the stdsimd library
#![feature(portable_simd)]
use core_simd::simd::prelude::*;
@ -33,7 +31,7 @@ pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 {
}
// We now move on to the SIMD implementations: notice the following constructs:
// `array_chunks::<4>`: mapping this over the vector will let use construct SIMD vectors
// `as_chunks::<4>`: mapping this over the vector will let us construct SIMD vectors
// `f32x4::from_array`: construct the SIMD vector from a slice
// `(a * b).reduce_sum()`: Multiply both f32x4 vectors together, and then reduce them.
// This approach essentially uses SIMD to produce a vector of length N/4 of all the products,
@ -42,9 +40,11 @@ pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 {
pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 {
assert_eq!(a.len(), b.len());
// TODO handle remainder when a.len() % 4 != 0
a.array_chunks::<4>()
a.as_chunks::<4>()
.0
.iter()
.map(|&a| f32x4::from_array(a))
.zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
.zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
.map(|(a, b)| (a * b).reduce_sum())
.sum()
}
@ -60,9 +60,11 @@ pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 {
pub fn dot_prod_simd_1(a: &[f32], b: &[f32]) -> f32 {
assert_eq!(a.len(), b.len());
// TODO handle remainder when a.len() % 4 != 0
a.array_chunks::<4>()
a.as_chunks::<4>()
.0
.iter()
.map(|&a| f32x4::from_array(a))
.zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
.zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
.fold(f32x4::splat(0.0), |acc, zipped| acc + zipped.0 * zipped.1)
.reduce_sum()
}
@ -74,9 +76,11 @@ pub fn dot_prod_simd_2(a: &[f32], b: &[f32]) -> f32 {
assert_eq!(a.len(), b.len());
// TODO handle remainder when a.len() % 4 != 0
let mut res = f32x4::splat(0.0);
a.array_chunks::<4>()
a.as_chunks::<4>()
.0
.iter()
.map(|&a| f32x4::from_array(a))
.zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
.zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
.for_each(|(a, b)| {
res = a.mul_add(b, res);
});
@ -113,9 +117,11 @@ pub fn dot_prod_simd_3(a: &[f32], b: &[f32]) -> f32 {
// next example.
pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 {
let mut sum = a
.array_chunks::<4>()
.as_chunks::<4>()
.0
.iter()
.map(|&a| f32x4::from_array(a))
.zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
.zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
.map(|(a, b)| a * b)
.fold(f32x4::splat(0.0), std::ops::Add::add)
.reduce_sum();
@ -131,9 +137,11 @@ pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 {
// This version allocates a single `XMM` register for accumulation, and the folds don't allocate on top of that.
// Notice the use of `mul_add`, which can do a multiply and an add operation ber iteration.
pub fn dot_prod_simd_5(a: &[f32], b: &[f32]) -> f32 {
a.array_chunks::<4>()
a.as_chunks::<4>()
.0
.iter()
.map(|&a| f32x4::from_array(a))
.zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
.zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b)))
.fold(f32x4::splat(0.), |acc, (a, b)| a.mul_add(b, acc))
.reduce_sum()
}

View file

@ -1,7 +1,7 @@
//! 4x4 matrix inverse
// Code ported from the `packed_simd` crate
// Run this code with `cargo test --example matrix_inversion`
#![feature(array_chunks, portable_simd)]
#![feature(portable_simd)]
use core_simd::simd::prelude::*;
// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)

View file

@ -1,9 +1,8 @@
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
use crate::simd::{Simd, SimdElement};
use core::fmt;
impl<T, const N: usize> fmt::Debug for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement + fmt::Debug,
{
/// A `Simd<T, N>` has a debug format like the one for `[T]`:

View file

@ -1,4 +1,4 @@
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
use crate::simd::Simd;
use core::{
iter::{Product, Sum},
ops::{Add, Mul},
@ -7,8 +7,6 @@ use core::{
macro_rules! impl_traits {
{ $type:ty } => {
impl<const N: usize> Sum<Self> for Simd<$type, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
@ -17,8 +15,6 @@ macro_rules! impl_traits {
}
impl<const N: usize> Product<Self> for Simd<$type, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
@ -27,8 +23,6 @@ macro_rules! impl_traits {
}
impl<'a, const N: usize> Sum<&'a Self> for Simd<$type, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
@ -37,8 +31,6 @@ macro_rules! impl_traits {
}
impl<'a, const N: usize> Product<&'a Self> for Simd<$type, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {

View file

@ -1,40 +0,0 @@
mod sealed {
pub trait Sealed {}
}
use sealed::Sealed;
/// Specifies the number of lanes in a SIMD vector as a type.
pub struct LaneCount<const N: usize>;
impl<const N: usize> LaneCount<N> {
/// The number of bytes in a bitmask with this many lanes.
pub const BITMASK_LEN: usize = N.div_ceil(8);
}
/// Statically guarantees that a lane count is marked as supported.
///
/// This trait is *sealed*: the list of implementors below is total.
/// Users do not have the ability to mark additional `LaneCount<N>` values as supported.
/// Only SIMD vectors with supported lane counts are constructable.
pub trait SupportedLaneCount: Sealed {
#[doc(hidden)]
type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
}
impl<const N: usize> Sealed for LaneCount<N> {}
macro_rules! supported_lane_count {
($($lanes:literal),+) => {
$(
impl SupportedLaneCount for LaneCount<$lanes> {
type BitMask = [u8; ($lanes + 7) / 8];
}
)+
};
}
supported_lane_count!(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
);

View file

@ -9,7 +9,8 @@
simd_ffi,
staged_api,
prelude_import,
ptr_metadata
ptr_metadata,
rustc_attrs
)]
#![cfg_attr(
all(
@ -30,10 +31,6 @@
any(target_arch = "powerpc", target_arch = "powerpc64"),
feature(stdarch_powerpc)
)]
#![cfg_attr(
all(target_arch = "x86_64", target_feature = "avx512f"),
feature(stdarch_x86_avx512)
)]
#![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
#![deny(
unsafe_op_in_unsafe_fn,
@ -41,7 +38,7 @@
clippy::undocumented_unsafe_blocks
)]
#![doc(test(attr(deny(warnings))))]
#![allow(internal_features)]
#![allow(internal_features, clippy::repr_packed_without_abi)]
#![unstable(feature = "portable_simd", issue = "86656")]
//! Portable SIMD module.

View file

@ -2,20 +2,33 @@
//! Types representing
#![allow(non_camel_case_types)]
#[cfg_attr(
not(all(target_arch = "x86_64", target_feature = "avx512f")),
path = "masks/full_masks.rs"
)]
#[cfg_attr(
all(target_arch = "x86_64", target_feature = "avx512f"),
path = "masks/bitmask.rs"
)]
mod mask_impl;
use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
use crate::simd::{Select, Simd, SimdCast, SimdElement};
use core::cmp::Ordering;
use core::{fmt, mem};
pub(crate) trait FixEndianness {
fn fix_endianness(self) -> Self;
}
macro_rules! impl_fix_endianness {
{ $($int:ty),* } => {
$(
impl FixEndianness for $int {
#[inline(always)]
fn fix_endianness(self) -> Self {
if cfg!(target_endian = "big") {
<$int>::reverse_bits(self)
} else {
self
}
}
}
)*
}
}
impl_fix_endianness! { u8, u16, u32, u64 }
mod sealed {
use super::*;
@ -28,7 +41,6 @@ mod sealed {
pub trait Sealed {
fn valid<const N: usize>(values: Simd<Self, N>) -> bool
where
LaneCount<N>: SupportedLaneCount,
Self: SimdElement;
fn eq(self, other: Self) -> bool;
@ -56,8 +68,6 @@ macro_rules! impl_element {
impl Sealed for $ty {
#[inline]
fn valid<const N: usize>(value: Simd<Self, N>) -> bool
where
LaneCount<N>: SupportedLaneCount,
{
// We can't use `Simd` directly, because `Simd`'s functions call this function and
// we will end up with an infinite loop.
@ -108,23 +118,19 @@ impl_element! { isize, usize }
/// The layout of this type is unspecified, and may change between platforms
/// and/or Rust versions, and code should not assume that it is equivalent to
/// `[T; N]`.
///
/// `N` cannot be 0 and may be at most 64. This limit may be increased in
/// the future.
#[repr(transparent)]
pub struct Mask<T, const N: usize>(mask_impl::Mask<T, N>)
pub struct Mask<T, const N: usize>(Simd<T, N>)
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount;
T: MaskElement;
impl<T, const N: usize> Copy for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
}
impl<T, const N: usize> Copy for Mask<T, N> where T: MaskElement {}
impl<T, const N: usize> Clone for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn clone(&self) -> Self {
@ -135,12 +141,12 @@ where
impl<T, const N: usize> Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
/// Constructs a mask by setting all elements to the given value.
#[inline]
pub fn splat(value: bool) -> Self {
Self(mask_impl::Mask::splat(value))
#[rustc_const_unstable(feature = "portable_simd", issue = "86656")]
pub const fn splat(value: bool) -> Self {
Self(Simd::splat(if value { T::TRUE } else { T::FALSE }))
}
/// Converts an array of bools to a SIMD mask.
@ -156,7 +162,7 @@ where
let bytes: [u8; N] = mem::transmute_copy(&array);
let bools: Simd<i8, N> =
core::intrinsics::simd::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
Mask::from_int_unchecked(core::intrinsics::simd::simd_cast(bools))
Mask::from_simd_unchecked(core::intrinsics::simd::simd_cast(bools))
}
}
@ -174,7 +180,7 @@ where
// This would be hypothetically valid as an "in-place" transmute,
// but these are "dependently-sized" types, so copy elision it is!
unsafe {
let mut bytes: Simd<i8, N> = core::intrinsics::simd::simd_cast(self.to_int());
let mut bytes: Simd<i8, N> = core::intrinsics::simd::simd_cast(self.to_simd());
bytes &= Simd::splat(1i8);
mem::transmute_copy(&bytes)
}
@ -187,12 +193,12 @@ where
/// All elements must be either 0 or -1.
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
pub unsafe fn from_simd_unchecked(value: Simd<T, N>) -> Self {
// Safety: the caller must confirm this invariant
unsafe {
core::intrinsics::assume(<T as Sealed>::valid(value));
Self(mask_impl::Mask::from_int_unchecked(value))
}
Self(value)
}
/// Converts a vector of integers to a mask, where 0 represents `false` and -1
@ -203,25 +209,26 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
#[track_caller]
pub fn from_int(value: Simd<T, N>) -> Self {
pub fn from_simd(value: Simd<T, N>) -> Self {
assert!(T::valid(value), "all values must be either 0 or -1",);
// Safety: the validity has been checked
unsafe { Self::from_int_unchecked(value) }
unsafe { Self::from_simd_unchecked(value) }
}
/// Converts the mask to a vector of integers, where 0 represents `false` and -1
/// represents `true`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
pub fn to_int(self) -> Simd<T, N> {
self.0.to_int()
pub fn to_simd(self) -> Simd<T, N> {
self.0
}
/// Converts the mask to a mask of any other element size.
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn cast<U: MaskElement>(self) -> Mask<U, N> {
Mask(self.0.convert())
// Safety: mask elements are integers
unsafe { Mask(core::intrinsics::simd::simd_as(self.0)) }
}
/// Tests the value of the specified element.
@ -232,7 +239,7 @@ where
#[must_use = "method returns a new bool and does not mutate the original value"]
pub unsafe fn test_unchecked(&self, index: usize) -> bool {
// Safety: the caller must confirm this invariant
unsafe { self.0.test_unchecked(index) }
unsafe { T::eq(*self.0.as_array().get_unchecked(index), T::TRUE) }
}
/// Tests the value of the specified element.
@ -243,9 +250,7 @@ where
#[must_use = "method returns a new bool and does not mutate the original value"]
#[track_caller]
pub fn test(&self, index: usize) -> bool {
assert!(index < N, "element index out of range");
// Safety: the element index has been checked
unsafe { self.test_unchecked(index) }
T::eq(self.0[index], T::TRUE)
}
/// Sets the value of the specified element.
@ -256,7 +261,7 @@ where
pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
// Safety: the caller must confirm this invariant
unsafe {
self.0.set_unchecked(index, value);
*self.0.as_mut_array().get_unchecked_mut(index) = if value { T::TRUE } else { T::FALSE }
}
}
@ -267,35 +272,65 @@ where
#[inline]
#[track_caller]
pub fn set(&mut self, index: usize, value: bool) {
assert!(index < N, "element index out of range");
// Safety: the element index has been checked
unsafe {
self.set_unchecked(index, value);
}
self.0[index] = if value { T::TRUE } else { T::FALSE }
}
/// Returns true if any element is set, or false otherwise.
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub fn any(self) -> bool {
self.0.any()
// Safety: `self` is a mask vector
unsafe { core::intrinsics::simd::simd_reduce_any(self.0) }
}
/// Returns true if all elements are set, or false otherwise.
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub fn all(self) -> bool {
self.0.all()
// Safety: `self` is a mask vector
unsafe { core::intrinsics::simd::simd_reduce_all(self.0) }
}
/// Creates a bitmask from a mask.
///
/// Each bit is set if the corresponding element in the mask is `true`.
/// If the mask contains more than 64 elements, the bitmask is truncated to the first 64.
#[inline]
#[must_use = "method returns a new integer and does not mutate the original value"]
pub fn to_bitmask(self) -> u64 {
self.0.to_bitmask_integer()
const {
assert!(N <= 64, "number of elements can't be greater than 64");
}
#[inline]
unsafe fn to_bitmask_impl<T, U: FixEndianness, const M: usize, const N: usize>(
mask: Mask<T, N>,
) -> U
where
T: MaskElement,
{
let resized = mask.resize::<M>(false);
// Safety: `resized` is an integer vector with length M, which must match T
let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized.0) };
// LLVM assumes bit order should match endianness
bitmask.fix_endianness()
}
// TODO modify simd_bitmask to zero-extend output, making this unnecessary
if N <= 8 {
// Safety: bitmask matches length
unsafe { to_bitmask_impl::<T, u8, 8, N>(self) as u64 }
} else if N <= 16 {
// Safety: bitmask matches length
unsafe { to_bitmask_impl::<T, u16, 16, N>(self) as u64 }
} else if N <= 32 {
// Safety: bitmask matches length
unsafe { to_bitmask_impl::<T, u32, 32, N>(self) as u64 }
} else {
// Safety: bitmask matches length
unsafe { to_bitmask_impl::<T, u64, 64, N>(self) }
}
}
/// Creates a mask from a bitmask.
@ -305,7 +340,7 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn from_bitmask(bitmask: u64) -> Self {
Self(mask_impl::Mask::from_bitmask_integer(bitmask))
Self(bitmask.select(Simd::splat(T::TRUE), Simd::splat(T::FALSE)))
}
/// Finds the index of the first set element.
@ -351,7 +386,7 @@ where
// Safety: the input and output are integer vectors
let index: Simd<T, N> = unsafe { core::intrinsics::simd::simd_cast(index) };
let masked_index = self.select(index, Self::splat(true).to_int());
let masked_index = self.select(index, Self::splat(true).to_simd());
// Safety: the input and output are integer vectors
let masked_index: Simd<T::Unsigned, N> =
@ -376,7 +411,6 @@ where
impl<T, const N: usize> From<[bool; N]> for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn from(array: [bool; N]) -> Self {
@ -387,7 +421,6 @@ where
impl<T, const N: usize> From<Mask<T, N>> for [bool; N]
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn from(vector: Mask<T, N>) -> Self {
@ -398,7 +431,6 @@ where
impl<T, const N: usize> Default for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn default() -> Self {
@ -409,7 +441,6 @@ where
impl<T, const N: usize> PartialEq for Mask<T, N>
where
T: MaskElement + PartialEq,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn eq(&self, other: &Self) -> bool {
@ -420,7 +451,6 @@ where
impl<T, const N: usize> PartialOrd for Mask<T, N>
where
T: MaskElement + PartialOrd,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
@ -431,7 +461,6 @@ where
impl<T, const N: usize> fmt::Debug for Mask<T, N>
where
T: MaskElement + fmt::Debug,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@ -444,19 +473,18 @@ where
impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
fn bitand(self, rhs: Self) -> Self {
Self(self.0 & rhs.0)
// Safety: `self` is an integer vector
unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) }
}
}
impl<T, const N: usize> core::ops::BitAnd<bool> for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@ -468,7 +496,6 @@ where
impl<T, const N: usize> core::ops::BitAnd<Mask<T, N>> for bool
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Mask<T, N>;
#[inline]
@ -480,19 +507,18 @@ where
impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
fn bitor(self, rhs: Self) -> Self {
Self(self.0 | rhs.0)
// Safety: `self` is an integer vector
unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) }
}
}
impl<T, const N: usize> core::ops::BitOr<bool> for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@ -504,7 +530,6 @@ where
impl<T, const N: usize> core::ops::BitOr<Mask<T, N>> for bool
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Mask<T, N>;
#[inline]
@ -516,19 +541,18 @@ where
impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
fn bitxor(self, rhs: Self) -> Self::Output {
Self(self.0 ^ rhs.0)
// Safety: `self` is an integer vector
unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) }
}
}
impl<T, const N: usize> core::ops::BitXor<bool> for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@ -540,7 +564,6 @@ where
impl<T, const N: usize> core::ops::BitXor<Mask<T, N>> for bool
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Mask<T, N>;
#[inline]
@ -552,30 +575,27 @@ where
impl<T, const N: usize> core::ops::Not for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Mask<T, N>;
#[inline]
fn not(self) -> Self::Output {
Self(!self.0)
Self::splat(true) ^ self
}
}
impl<T, const N: usize> core::ops::BitAndAssign for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitand_assign(&mut self, rhs: Self) {
self.0 = self.0 & rhs.0;
*self = *self & rhs;
}
}
impl<T, const N: usize> core::ops::BitAndAssign<bool> for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitand_assign(&mut self, rhs: bool) {
@ -586,18 +606,16 @@ where
impl<T, const N: usize> core::ops::BitOrAssign for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitor_assign(&mut self, rhs: Self) {
self.0 = self.0 | rhs.0;
*self = *self | rhs;
}
}
impl<T, const N: usize> core::ops::BitOrAssign<bool> for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitor_assign(&mut self, rhs: bool) {
@ -608,18 +626,16 @@ where
impl<T, const N: usize> core::ops::BitXorAssign for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitxor_assign(&mut self, rhs: Self) {
self.0 = self.0 ^ rhs.0;
*self = *self ^ rhs;
}
}
impl<T, const N: usize> core::ops::BitXorAssign<bool> for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitxor_assign(&mut self, rhs: bool) {
@ -631,8 +647,6 @@ macro_rules! impl_from {
{ $from:ty => $($to:ty),* } => {
$(
impl<const N: usize> From<Mask<$from, N>> for Mask<$to, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn from(value: Mask<$from, N>) -> Self {

View file

@ -1,228 +0,0 @@
#![allow(unused_imports)]
use super::MaskElement;
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
use core::marker::PhantomData;
/// A mask where each lane is represented by a single bit.
#[repr(transparent)]
pub(crate) struct Mask<T, const N: usize>(
<LaneCount<N> as SupportedLaneCount>::BitMask,
PhantomData<T>,
)
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount;
impl<T, const N: usize> Copy for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
}
impl<T, const N: usize> Clone for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn clone(&self) -> Self {
*self
}
}
impl<T, const N: usize> PartialEq for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn eq(&self, other: &Self) -> bool {
self.0.as_ref() == other.0.as_ref()
}
}
impl<T, const N: usize> PartialOrd for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.0.as_ref().partial_cmp(other.0.as_ref())
}
}
impl<T, const N: usize> Eq for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
}
impl<T, const N: usize> Ord for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
self.0.as_ref().cmp(other.0.as_ref())
}
}
impl<T, const N: usize> Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub(crate) fn splat(value: bool) -> Self {
let mut mask = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
if value {
mask.as_mut().fill(u8::MAX)
} else {
mask.as_mut().fill(u8::MIN)
}
if N % 8 > 0 {
*mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
}
Self(mask, PhantomData)
}
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool {
(self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0
}
#[inline]
pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
unsafe {
self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8)
}
}
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
pub(crate) fn to_int(self) -> Simd<T, N> {
unsafe {
core::intrinsics::simd::simd_select_bitmask(
self.0,
Simd::splat(T::TRUE),
Simd::splat(T::FALSE),
)
}
}
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub(crate) unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) }
}
#[inline]
pub(crate) fn to_bitmask_integer(self) -> u64 {
let mut bitmask = [0u8; 8];
bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
u64::from_ne_bytes(bitmask)
}
#[inline]
pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
let len = bytes.as_mut().len();
bytes
.as_mut()
.copy_from_slice(&bitmask.to_ne_bytes()[..len]);
Self(bytes, PhantomData)
}
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub(crate) fn convert<U>(self) -> Mask<U, N>
where
U: MaskElement,
{
// Safety: bitmask layout does not depend on the element width
unsafe { core::mem::transmute_copy(&self) }
}
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub(crate) fn any(self) -> bool {
self != Self::splat(false)
}
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub(crate) fn all(self) -> bool {
self == Self::splat(true)
}
}
impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
<LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
{
type Output = Self;
#[inline]
fn bitand(mut self, rhs: Self) -> Self {
for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
*l &= r;
}
self
}
}
impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
<LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
{
type Output = Self;
#[inline]
fn bitor(mut self, rhs: Self) -> Self {
for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
*l |= r;
}
self
}
}
impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
fn bitxor(mut self, rhs: Self) -> Self::Output {
for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
*l ^= r;
}
self
}
}
impl<T, const N: usize> core::ops::Not for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
fn not(mut self) -> Self::Output {
for x in self.0.as_mut() {
*x = !*x;
}
if N % 8 > 0 {
*self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
}
self
}
}

View file

@ -1,296 +0,0 @@
//! Masks that take up full SIMD vector registers.
use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount};
#[repr(transparent)]
pub(crate) struct Mask<T, const N: usize>(Simd<T, N>)
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount;
impl<T, const N: usize> Copy for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
}
impl<T, const N: usize> Clone for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn clone(&self) -> Self {
*self
}
}
impl<T, const N: usize> PartialEq for Mask<T, N>
where
T: MaskElement + PartialEq,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn eq(&self, other: &Self) -> bool {
self.0.eq(&other.0)
}
}
impl<T, const N: usize> PartialOrd for Mask<T, N>
where
T: MaskElement + PartialOrd,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.0.partial_cmp(&other.0)
}
}
impl<T, const N: usize> Eq for Mask<T, N>
where
T: MaskElement + Eq,
LaneCount<N>: SupportedLaneCount,
{
}
impl<T, const N: usize> Ord for Mask<T, N>
where
T: MaskElement + Ord,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
self.0.cmp(&other.0)
}
}
// Used for bitmask bit order workaround
pub(crate) trait ReverseBits {
// Reverse the least significant `n` bits of `self`.
// (Remaining bits must be 0.)
fn reverse_bits(self, n: usize) -> Self;
}
macro_rules! impl_reverse_bits {
{ $($int:ty),* } => {
$(
impl ReverseBits for $int {
#[inline(always)]
fn reverse_bits(self, n: usize) -> Self {
let rev = <$int>::reverse_bits(self);
let bitsize = size_of::<$int>() * 8;
if n < bitsize {
// Shift things back to the right
rev >> (bitsize - n)
} else {
rev
}
}
}
)*
}
}
impl_reverse_bits! { u8, u16, u32, u64 }
impl<T, const N: usize> Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub(crate) fn splat(value: bool) -> Self {
Self(Simd::splat(if value { T::TRUE } else { T::FALSE }))
}
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool {
T::eq(self.0[lane], T::TRUE)
}
#[inline]
pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
self.0[lane] = if value { T::TRUE } else { T::FALSE }
}
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
pub(crate) fn to_int(self) -> Simd<T, N> {
self.0
}
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub(crate) unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
Self(value)
}
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub(crate) fn convert<U>(self) -> Mask<U, N>
where
U: MaskElement,
{
// Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) }
}
#[inline]
unsafe fn to_bitmask_impl<U: ReverseBits, const M: usize>(self) -> U
where
LaneCount<M>: SupportedLaneCount,
{
let resized = self.to_int().resize::<M>(T::FALSE);
// Safety: `resized` is an integer vector with length M, which must match T
let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized) };
// LLVM assumes bit order should match endianness
if cfg!(target_endian = "big") {
bitmask.reverse_bits(M)
} else {
bitmask
}
}
#[inline]
unsafe fn from_bitmask_impl<U: ReverseBits, const M: usize>(bitmask: U) -> Self
where
LaneCount<M>: SupportedLaneCount,
{
// LLVM assumes bit order should match endianness
let bitmask = if cfg!(target_endian = "big") {
bitmask.reverse_bits(M)
} else {
bitmask
};
// SAFETY: `mask` is the correct bitmask type for a u64 bitmask
let mask: Simd<T, M> = unsafe {
core::intrinsics::simd::simd_select_bitmask(
bitmask,
Simd::<T, M>::splat(T::TRUE),
Simd::<T, M>::splat(T::FALSE),
)
};
// SAFETY: `mask` only contains `T::TRUE` or `T::FALSE`
unsafe { Self::from_int_unchecked(mask.resize::<N>(T::FALSE)) }
}
#[inline]
pub(crate) fn to_bitmask_integer(self) -> u64 {
// TODO modify simd_bitmask to zero-extend output, making this unnecessary
if N <= 8 {
// Safety: bitmask matches length
unsafe { self.to_bitmask_impl::<u8, 8>() as u64 }
} else if N <= 16 {
// Safety: bitmask matches length
unsafe { self.to_bitmask_impl::<u16, 16>() as u64 }
} else if N <= 32 {
// Safety: bitmask matches length
unsafe { self.to_bitmask_impl::<u32, 32>() as u64 }
} else {
// Safety: bitmask matches length
unsafe { self.to_bitmask_impl::<u64, 64>() }
}
}
#[inline]
pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
// TODO modify simd_bitmask_select to truncate input, making this unnecessary
if N <= 8 {
// Safety: bitmask matches length
unsafe { Self::from_bitmask_impl::<u8, 8>(bitmask as u8) }
} else if N <= 16 {
// Safety: bitmask matches length
unsafe { Self::from_bitmask_impl::<u16, 16>(bitmask as u16) }
} else if N <= 32 {
// Safety: bitmask matches length
unsafe { Self::from_bitmask_impl::<u32, 32>(bitmask as u32) }
} else {
// Safety: bitmask matches length
unsafe { Self::from_bitmask_impl::<u64, 64>(bitmask) }
}
}
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub(crate) fn any(self) -> bool {
// Safety: use `self` as an integer vector
unsafe { core::intrinsics::simd::simd_reduce_any(self.to_int()) }
}
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub(crate) fn all(self) -> bool {
// Safety: use `self` as an integer vector
unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) }
}
}
impl<T, const N: usize> From<Mask<T, N>> for Simd<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn from(value: Mask<T, N>) -> Self {
value.0
}
}
impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
fn bitand(self, rhs: Self) -> Self {
// Safety: `self` is an integer vector
unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) }
}
}
impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
fn bitor(self, rhs: Self) -> Self {
// Safety: `self` is an integer vector
unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) }
}
}
impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
fn bitxor(self, rhs: Self) -> Self {
// Safety: `self` is an integer vector
unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) }
}
}
impl<T, const N: usize> core::ops::Not for Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
fn not(self) -> Self::Output {
Self::splat(true) ^ self
}
}

View file

@ -5,7 +5,6 @@ mod alias;
mod cast;
mod fmt;
mod iter;
mod lane_count;
mod masks;
mod ops;
mod select;
@ -27,8 +26,8 @@ pub mod simd {
pub use crate::core_simd::alias::*;
pub use crate::core_simd::cast::*;
pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
pub use crate::core_simd::masks::*;
pub use crate::core_simd::select::*;
pub use crate::core_simd::swizzle::*;
pub use crate::core_simd::to_bytes::ToBytes;
pub use crate::core_simd::vector::*;

View file

@ -1,4 +1,4 @@
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount, cmp::SimdPartialEq};
use crate::simd::{Select, Simd, SimdElement, cmp::SimdPartialEq};
use core::ops::{Add, Mul};
use core::ops::{BitAnd, BitOr, BitXor};
use core::ops::{Div, Rem, Sub};
@ -12,7 +12,6 @@ mod unary;
impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
I: core::slice::SliceIndex<[T]>,
{
type Output = I::Output;
@ -25,7 +24,6 @@ where
impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
I: core::slice::SliceIndex<[T]>,
{
#[inline]
@ -130,7 +128,6 @@ macro_rules! for_base_types {
impl<const N: usize> $op<Self> for Simd<$scalar, N>
where
$scalar: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = $out;

View file

@ -21,7 +21,6 @@ macro_rules! assign_ops {
where
Self: $trait<U, Output = Self>,
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn $assign_call(&mut self, rhs: U) {

View file

@ -13,7 +13,6 @@ macro_rules! deref_lhs {
where
T: SimdElement,
$simd: $trait<$simd, Output = $simd>,
LaneCount<N>: SupportedLaneCount,
{
type Output = Simd<T, N>;
@ -33,7 +32,6 @@ macro_rules! deref_rhs {
where
T: SimdElement,
$simd: $trait<$simd, Output = $simd>,
LaneCount<N>: SupportedLaneCount,
{
type Output = Simd<T, N>;
@ -64,7 +62,6 @@ macro_rules! deref_ops {
where
T: SimdElement,
$simd: $trait<$simd, Output = $simd>,
LaneCount<N>: SupportedLaneCount,
{
type Output = $simd;

View file

@ -1,13 +1,11 @@
// Shift operations uniquely typically only have a scalar on the right-hand side.
// Here, we implement shifts for scalar RHS arguments.
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
use crate::simd::Simd;
macro_rules! impl_splatted_shifts {
{ impl $trait:ident :: $trait_fn:ident for $ty:ty } => {
impl<const N: usize> core::ops::$trait<$ty> for Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@ -17,8 +15,6 @@ macro_rules! impl_splatted_shifts {
}
impl<const N: usize> core::ops::$trait<&$ty> for Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@ -28,8 +24,6 @@ macro_rules! impl_splatted_shifts {
}
impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
type Output = Simd<$ty, N>;
#[inline]
@ -39,8 +33,6 @@ macro_rules! impl_splatted_shifts {
}
impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
type Output = Simd<$ty, N>;
#[inline]

View file

@ -1,4 +1,4 @@
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
use crate::simd::{Simd, SimdElement};
use core::ops::{Neg, Not}; // unary ops
macro_rules! neg {
@ -6,7 +6,6 @@ macro_rules! neg {
$(impl<const N: usize> Neg for Simd<$scalar, N>
where
$scalar: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
@ -40,7 +39,6 @@ macro_rules! not {
$(impl<const N: usize> Not for Simd<$scalar, N>
where
$scalar: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
type Output = Self;

View file

@ -1,54 +1,155 @@
use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
use crate::simd::{FixEndianness, Mask, MaskElement, Simd, SimdElement};
impl<T, const N: usize> Mask<T, N>
/// Choose elements from two vectors using a mask.
///
/// For each element in the mask, choose the corresponding element from `true_values` if
/// that element mask is true, and `false_values` if that element mask is false.
///
/// If the mask is `u64`, it's treated as a bitmask with the least significant bit
/// corresponding to the first element.
///
/// # Examples
///
/// ## Selecting values from `Simd`
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::{Simd, Mask, Select};
/// let a = Simd::from_array([0, 1, 2, 3]);
/// let b = Simd::from_array([4, 5, 6, 7]);
/// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
/// let c = mask.select(a, b);
/// assert_eq!(c.to_array(), [0, 5, 6, 3]);
/// ```
///
/// ## Selecting values from `Mask`
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::{Mask, Select};
/// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
/// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
/// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
/// let c = mask.select(a, b);
/// assert_eq!(c.to_array(), [true, false, true, false]);
/// ```
///
/// ## Selecting with a bitmask
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::{Mask, Select};
/// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
/// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
/// let mask = 0b1001;
/// let c = mask.select(a, b);
/// assert_eq!(c.to_array(), [true, false, true, false]);
/// ```
pub trait Select<T> {
/// Choose elements
fn select(self, true_values: T, false_values: T) -> T;
}
impl<T, U, const N: usize> Select<Simd<T, N>> for Mask<U, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
U: MaskElement,
{
/// Choose elements from two vectors.
///
/// For each element in the mask, choose the corresponding element from `true_values` if
/// that element mask is true, and `false_values` if that element mask is false.
///
/// # Examples
/// ```
/// # #![feature(portable_simd)]
/// # use core::simd::{Simd, Mask};
/// let a = Simd::from_array([0, 1, 2, 3]);
/// let b = Simd::from_array([4, 5, 6, 7]);
/// let mask = Mask::from_array([true, false, false, true]);
/// let c = mask.select(a, b);
/// assert_eq!(c.to_array(), [0, 5, 6, 3]);
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn select<U>(self, true_values: Simd<U, N>, false_values: Simd<U, N>) -> Simd<U, N>
where
U: SimdElement<Mask = T>,
{
// Safety: The mask has been cast to a vector of integers,
// and the operands to select between are vectors of the same type and length.
unsafe { core::intrinsics::simd::simd_select(self.to_int(), true_values, false_values) }
}
/// Choose elements from two masks.
///
/// For each element in the mask, choose the corresponding element from `true_values` if
/// that element mask is true, and `false_values` if that element mask is false.
///
/// # Examples
/// ```
/// # #![feature(portable_simd)]
/// # use core::simd::Mask;
/// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
/// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
/// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
/// let c = mask.select_mask(a, b);
/// assert_eq!(c.to_array(), [true, false, true, false]);
/// ```
#[inline]
#[must_use = "method returns a new mask and does not mutate the original inputs"]
pub fn select_mask(self, true_values: Self, false_values: Self) -> Self {
self & true_values | !self & false_values
fn select(self, true_values: Simd<T, N>, false_values: Simd<T, N>) -> Simd<T, N> {
// Safety:
// simd_as between masks is always safe (they're vectors of ints).
// simd_select uses a mask that matches the width and number of elements
unsafe {
let mask: Simd<T::Mask, N> = core::intrinsics::simd::simd_as(self.to_simd());
core::intrinsics::simd::simd_select(mask, true_values, false_values)
}
}
}
impl<T, const N: usize> Select<Simd<T, N>> for u64
where
T: SimdElement,
{
#[inline]
fn select(self, true_values: Simd<T, N>, false_values: Simd<T, N>) -> Simd<T, N> {
const {
assert!(N <= 64, "number of elements can't be greater than 64");
}
#[inline]
unsafe fn select_impl<T, U: FixEndianness, const M: usize, const N: usize>(
bitmask: U,
true_values: Simd<T, N>,
false_values: Simd<T, N>,
) -> Simd<T, N>
where
T: SimdElement,
{
let default = true_values[0];
let true_values = true_values.resize::<M>(default);
let false_values = false_values.resize::<M>(default);
// LLVM assumes bit order should match endianness
let bitmask = bitmask.fix_endianness();
// Safety: the caller guarantees that the size of U matches M
let selected = unsafe {
core::intrinsics::simd::simd_select_bitmask(bitmask, true_values, false_values)
};
selected.resize::<N>(default)
}
// TODO modify simd_bitmask_select to truncate input, making this unnecessary
if N <= 8 {
let bitmask = self as u8;
// Safety: bitmask matches length
unsafe { select_impl::<T, u8, 8, N>(bitmask, true_values, false_values) }
} else if N <= 16 {
let bitmask = self as u16;
// Safety: bitmask matches length
unsafe { select_impl::<T, u16, 16, N>(bitmask, true_values, false_values) }
} else if N <= 32 {
let bitmask = self as u32;
// Safety: bitmask matches length
unsafe { select_impl::<T, u32, 32, N>(bitmask, true_values, false_values) }
} else {
let bitmask = self;
// Safety: bitmask matches length
unsafe { select_impl::<T, u64, 64, N>(bitmask, true_values, false_values) }
}
}
}
impl<T, U, const N: usize> Select<Mask<T, N>> for Mask<U, N>
where
T: MaskElement,
U: MaskElement,
{
#[inline]
fn select(self, true_values: Mask<T, N>, false_values: Mask<T, N>) -> Mask<T, N> {
let selected: Simd<T, N> =
Select::select(self, true_values.to_simd(), false_values.to_simd());
// Safety: all values come from masks
unsafe { Mask::from_simd_unchecked(selected) }
}
}
impl<T, const N: usize> Select<Mask<T, N>> for u64
where
T: MaskElement,
{
#[inline]
fn select(self, true_values: Mask<T, N>, false_values: Mask<T, N>) -> Mask<T, N> {
let selected: Simd<T, N> =
Select::select(self, true_values.to_simd(), false_values.to_simd());
// Safety: all values come from masks
unsafe { Mask::from_simd_unchecked(selected) }
}
}

View file

@ -1,5 +1,5 @@
use crate::simd::{
LaneCount, Mask, Simd, SimdElement, SupportedLaneCount,
Mask, Simd, SimdElement,
ptr::{SimdConstPtr, SimdMutPtr},
};
@ -21,8 +21,6 @@ macro_rules! impl_number {
{ $($number:ty),* } => {
$(
impl<const N: usize> SimdPartialEq for Simd<$number, N>
where
LaneCount<N>: SupportedLaneCount,
{
type Mask = Mask<<$number as SimdElement>::Mask, N>;
@ -30,14 +28,14 @@ macro_rules! impl_number {
fn simd_eq(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_eq(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_eq(self, other)) }
}
#[inline]
fn simd_ne(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ne(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ne(self, other)) }
}
}
)*
@ -50,8 +48,6 @@ macro_rules! impl_mask {
{ $($integer:ty),* } => {
$(
impl<const N: usize> SimdPartialEq for Mask<$integer, N>
where
LaneCount<N>: SupportedLaneCount,
{
type Mask = Self;
@ -59,14 +55,14 @@ macro_rules! impl_mask {
fn simd_eq(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_eq(self.to_int(), other.to_int())) }
unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_eq(self.to_simd(), other.to_simd())) }
}
#[inline]
fn simd_ne(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ne(self.to_int(), other.to_int())) }
unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_ne(self.to_simd(), other.to_simd())) }
}
}
)*
@ -75,10 +71,7 @@ macro_rules! impl_mask {
impl_mask! { i8, i16, i32, i64, isize }
impl<T, const N: usize> SimdPartialEq for Simd<*const T, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<T, const N: usize> SimdPartialEq for Simd<*const T, N> {
type Mask = Mask<isize, N>;
#[inline]
@ -92,10 +85,7 @@ where
}
}
impl<T, const N: usize> SimdPartialEq for Simd<*mut T, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<T, const N: usize> SimdPartialEq for Simd<*mut T, N> {
type Mask = Mask<isize, N>;
#[inline]

View file

@ -1,5 +1,5 @@
use crate::simd::{
LaneCount, Mask, Simd, SupportedLaneCount,
Mask, Select, Simd,
cmp::SimdPartialEq,
ptr::{SimdConstPtr, SimdMutPtr},
};
@ -49,41 +49,37 @@ macro_rules! impl_integer {
{ $($integer:ty),* } => {
$(
impl<const N: usize> SimdPartialOrd for Simd<$integer, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
}
#[inline]
fn simd_le(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_le(self, other)) }
}
#[inline]
fn simd_gt(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
}
#[inline]
fn simd_ge(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
}
}
impl<const N: usize> SimdOrd for Simd<$integer, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_max(self, other: Self) -> Self {
@ -115,35 +111,33 @@ macro_rules! impl_float {
{ $($float:ty),* } => {
$(
impl<const N: usize> SimdPartialOrd for Simd<$float, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
}
#[inline]
fn simd_le(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_le(self, other)) }
}
#[inline]
fn simd_gt(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
}
#[inline]
fn simd_ge(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
}
}
)*
@ -156,50 +150,46 @@ macro_rules! impl_mask {
{ $($integer:ty),* } => {
$(
impl<const N: usize> SimdPartialOrd for Mask<$integer, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_lt(self.to_int(), other.to_int())) }
unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_lt(self.to_simd(), other.to_simd())) }
}
#[inline]
fn simd_le(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_le(self.to_int(), other.to_int())) }
unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_le(self.to_simd(), other.to_simd())) }
}
#[inline]
fn simd_gt(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_gt(self.to_int(), other.to_int())) }
unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_gt(self.to_simd(), other.to_simd())) }
}
#[inline]
fn simd_ge(self, other: Self) -> Self::Mask {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ge(self.to_int(), other.to_int())) }
unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_ge(self.to_simd(), other.to_simd())) }
}
}
impl<const N: usize> SimdOrd for Mask<$integer, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_max(self, other: Self) -> Self {
self.simd_gt(other).select_mask(other, self)
self.simd_gt(other).select(other, self)
}
#[inline]
fn simd_min(self, other: Self) -> Self {
self.simd_lt(other).select_mask(other, self)
self.simd_lt(other).select(other, self)
}
#[inline]
@ -218,10 +208,7 @@ macro_rules! impl_mask {
impl_mask! { i8, i16, i32, i64, isize }
impl<T, const N: usize> SimdPartialOrd for Simd<*const T, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<T, const N: usize> SimdPartialOrd for Simd<*const T, N> {
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
self.addr().simd_lt(other.addr())
@ -243,10 +230,7 @@ where
}
}
impl<T, const N: usize> SimdOrd for Simd<*const T, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<T, const N: usize> SimdOrd for Simd<*const T, N> {
#[inline]
fn simd_max(self, other: Self) -> Self {
self.simd_lt(other).select(other, self)
@ -268,10 +252,7 @@ where
}
}
impl<T, const N: usize> SimdPartialOrd for Simd<*mut T, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<T, const N: usize> SimdPartialOrd for Simd<*mut T, N> {
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
self.addr().simd_lt(other.addr())
@ -293,10 +274,7 @@ where
}
}
impl<T, const N: usize> SimdOrd for Simd<*mut T, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<T, const N: usize> SimdOrd for Simd<*mut T, N> {
#[inline]
fn simd_max(self, other: Self) -> Self {
self.simd_lt(other).select(other, self)

View file

@ -1,6 +1,6 @@
use super::sealed::Sealed;
use crate::simd::{
LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount,
Mask, Select, Simd, SimdCast, SimdElement,
cmp::{SimdPartialEq, SimdPartialOrd},
};
@ -240,15 +240,9 @@ pub trait SimdFloat: Copy + Sealed {
macro_rules! impl_trait {
{ $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => {
$(
impl<const N: usize> Sealed for Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
}
impl<const N: usize> Sealed for Simd<$ty, N> {}
impl<const N: usize> SimdFloat for Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
type Mask = Mask<<$mask_ty as SimdElement>::Mask, N>;
type Scalar = $ty;

View file

@ -1,7 +1,6 @@
use super::sealed::Sealed;
use crate::simd::{
LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, cmp::SimdOrd,
cmp::SimdPartialOrd, num::SimdUint,
Mask, Select, Simd, SimdCast, SimdElement, cmp::SimdOrd, cmp::SimdPartialOrd, num::SimdUint,
};
/// Operations on SIMD vectors of signed integers.
@ -242,16 +241,9 @@ pub trait SimdInt: Copy + Sealed {
macro_rules! impl_trait {
{ $($ty:ident ($unsigned:ident)),* } => {
$(
impl<const N: usize> Sealed for Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
}
impl<const N: usize> Sealed for Simd<$ty, N> {}
impl<const N: usize> SimdInt for Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<const N: usize> SimdInt for Simd<$ty, N> {
type Mask = Mask<<$ty as SimdElement>::Mask, N>;
type Scalar = $ty;
type Unsigned = Simd<$unsigned, N>;

View file

@ -1,5 +1,5 @@
use super::sealed::Sealed;
use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount, cmp::SimdOrd};
use crate::simd::{Simd, SimdCast, SimdElement, cmp::SimdOrd};
/// Operations on SIMD vectors of unsigned integers.
pub trait SimdUint: Copy + Sealed {
@ -124,15 +124,9 @@ pub trait SimdUint: Copy + Sealed {
macro_rules! impl_trait {
{ $($ty:ident ($signed:ident)),* } => {
$(
impl<const N: usize> Sealed for Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
}
impl<const N: usize> Sealed for Simd<$ty, N> {}
impl<const N: usize> SimdUint for Simd<$ty, N>
where
LaneCount<N>: SupportedLaneCount,
{
type Scalar = $ty;
type Cast<T: SimdElement> = Simd<T, N>;

View file

@ -1,5 +1,5 @@
use super::sealed::Sealed;
use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint};
use crate::simd::{Mask, Simd, cmp::SimdPartialEq, num::SimdUint};
/// Operations on SIMD vectors of constant pointers.
pub trait SimdConstPtr: Copy + Sealed {
@ -88,12 +88,9 @@ pub trait SimdConstPtr: Copy + Sealed {
fn wrapping_sub(self, count: Self::Usize) -> Self;
}
impl<T, const N: usize> Sealed for Simd<*const T, N> where LaneCount<N>: SupportedLaneCount {}
impl<T, const N: usize> Sealed for Simd<*const T, N> {}
impl<T, const N: usize> SimdConstPtr for Simd<*const T, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<T, const N: usize> SimdConstPtr for Simd<*const T, N> {
type Usize = Simd<usize, N>;
type Isize = Simd<isize, N>;
type CastPtr<U> = Simd<*const U, N>;

View file

@ -1,5 +1,5 @@
use super::sealed::Sealed;
use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint};
use crate::simd::{Mask, Simd, cmp::SimdPartialEq, num::SimdUint};
/// Operations on SIMD vectors of mutable pointers.
pub trait SimdMutPtr: Copy + Sealed {
@ -85,12 +85,9 @@ pub trait SimdMutPtr: Copy + Sealed {
fn wrapping_sub(self, count: Self::Usize) -> Self;
}
impl<T, const N: usize> Sealed for Simd<*mut T, N> where LaneCount<N>: SupportedLaneCount {}
impl<T, const N: usize> Sealed for Simd<*mut T, N> {}
impl<T, const N: usize> SimdMutPtr for Simd<*mut T, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<T, const N: usize> SimdMutPtr for Simd<*mut T, N> {
type Usize = Simd<usize, N>;
type Isize = Simd<isize, N>;
type CastPtr<U> = Simd<*mut U, N>;

View file

@ -1,4 +1,4 @@
use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
use crate::simd::{Mask, MaskElement, Simd, SimdElement};
/// Constructs a new SIMD vector by copying elements from selected elements in other vectors.
///
@ -82,8 +82,6 @@ pub trait Swizzle<const N: usize> {
fn swizzle<T, const M: usize>(vector: Simd<T, M>) -> Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
LaneCount<M>: SupportedLaneCount,
{
// Safety: `vector` is a vector, and the index is a const vector of u32.
unsafe {
@ -122,8 +120,6 @@ pub trait Swizzle<const N: usize> {
fn concat_swizzle<T, const M: usize>(first: Simd<T, M>, second: Simd<T, M>) -> Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
LaneCount<M>: SupportedLaneCount,
{
// Safety: `first` and `second` are vectors, and the index is a const vector of u32.
unsafe {
@ -161,11 +157,9 @@ pub trait Swizzle<const N: usize> {
fn swizzle_mask<T, const M: usize>(mask: Mask<T, M>) -> Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
LaneCount<M>: SupportedLaneCount,
{
// SAFETY: all elements of this mask come from another mask
unsafe { Mask::from_int_unchecked(Self::swizzle(mask.to_int())) }
unsafe { Mask::from_simd_unchecked(Self::swizzle(mask.to_simd())) }
}
/// Creates a new mask from the elements of `first` and `second`.
@ -177,18 +171,17 @@ pub trait Swizzle<const N: usize> {
fn concat_swizzle_mask<T, const M: usize>(first: Mask<T, M>, second: Mask<T, M>) -> Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
LaneCount<M>: SupportedLaneCount,
{
// SAFETY: all elements of this mask come from another mask
unsafe { Mask::from_int_unchecked(Self::concat_swizzle(first.to_int(), second.to_int())) }
unsafe {
Mask::from_simd_unchecked(Self::concat_swizzle(first.to_simd(), second.to_simd()))
}
}
}
impl<T, const N: usize> Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
/// Reverse the order of the elements in the vector.
#[inline]
@ -462,10 +455,7 @@ where
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn resize<const M: usize>(self, value: T) -> Simd<T, M>
where
LaneCount<M>: SupportedLaneCount,
{
pub fn resize<const M: usize>(self, value: T) -> Simd<T, M> {
struct Resize<const N: usize>;
impl<const N: usize, const M: usize> Swizzle<M> for Resize<N> {
const INDEX: [usize; M] = const {
@ -493,10 +483,7 @@ where
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn extract<const START: usize, const LEN: usize>(self) -> Simd<T, LEN>
where
LaneCount<LEN>: SupportedLaneCount,
{
pub fn extract<const START: usize, const LEN: usize>(self) -> Simd<T, LEN> {
struct Extract<const N: usize, const START: usize>;
impl<const N: usize, const START: usize, const LEN: usize> Swizzle<LEN> for Extract<N, START> {
const INDEX: [usize; LEN] = const {
@ -517,14 +504,13 @@ where
impl<T, const N: usize> Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
/// Reverse the order of the elements in the mask.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn reverse(self) -> Self {
// Safety: swizzles are safe for masks
unsafe { Self::from_int_unchecked(self.to_int().reverse()) }
unsafe { Self::from_simd_unchecked(self.to_simd().reverse()) }
}
/// Rotates the mask such that the first `OFFSET` elements of the slice move to the end
@ -534,7 +520,7 @@ where
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn rotate_elements_left<const OFFSET: usize>(self) -> Self {
// Safety: swizzles are safe for masks
unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_left::<OFFSET>()) }
unsafe { Self::from_simd_unchecked(self.to_simd().rotate_elements_left::<OFFSET>()) }
}
/// Rotates the mask such that the first `self.len() - OFFSET` elements of the mask move to
@ -544,7 +530,7 @@ where
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn rotate_elements_right<const OFFSET: usize>(self) -> Self {
// Safety: swizzles are safe for masks
unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_right::<OFFSET>()) }
unsafe { Self::from_simd_unchecked(self.to_simd().rotate_elements_right::<OFFSET>()) }
}
/// Shifts the mask elements to the left by `OFFSET`, filling in with
@ -554,7 +540,7 @@ where
pub fn shift_elements_left<const OFFSET: usize>(self, padding: bool) -> Self {
// Safety: swizzles are safe for masks
unsafe {
Self::from_int_unchecked(self.to_int().shift_elements_left::<OFFSET>(if padding {
Self::from_simd_unchecked(self.to_simd().shift_elements_left::<OFFSET>(if padding {
T::TRUE
} else {
T::FALSE
@ -569,7 +555,7 @@ where
pub fn shift_elements_right<const OFFSET: usize>(self, padding: bool) -> Self {
// Safety: swizzles are safe for masks
unsafe {
Self::from_int_unchecked(self.to_int().shift_elements_right::<OFFSET>(if padding {
Self::from_simd_unchecked(self.to_simd().shift_elements_right::<OFFSET>(if padding {
T::TRUE
} else {
T::FALSE
@ -598,9 +584,9 @@ where
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn interleave(self, other: Self) -> (Self, Self) {
let (lo, hi) = self.to_int().interleave(other.to_int());
let (lo, hi) = self.to_simd().interleave(other.to_simd());
// Safety: swizzles are safe for masks
unsafe { (Self::from_int_unchecked(lo), Self::from_int_unchecked(hi)) }
unsafe { (Self::from_simd_unchecked(lo), Self::from_simd_unchecked(hi)) }
}
/// Deinterleave two masks.
@ -627,12 +613,12 @@ where
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn deinterleave(self, other: Self) -> (Self, Self) {
let (even, odd) = self.to_int().deinterleave(other.to_int());
let (even, odd) = self.to_simd().deinterleave(other.to_simd());
// Safety: swizzles are safe for masks
unsafe {
(
Self::from_int_unchecked(even),
Self::from_int_unchecked(odd),
Self::from_simd_unchecked(even),
Self::from_simd_unchecked(odd),
)
}
}
@ -653,13 +639,10 @@ where
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn resize<const M: usize>(self, value: bool) -> Mask<T, M>
where
LaneCount<M>: SupportedLaneCount,
{
pub fn resize<const M: usize>(self, value: bool) -> Mask<T, M> {
// Safety: swizzles are safe for masks
unsafe {
Mask::<T, M>::from_int_unchecked(self.to_int().resize::<M>(if value {
Mask::<T, M>::from_simd_unchecked(self.to_simd().resize::<M>(if value {
T::TRUE
} else {
T::FALSE
@ -679,11 +662,8 @@ where
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn extract<const START: usize, const LEN: usize>(self) -> Mask<T, LEN>
where
LaneCount<LEN>: SupportedLaneCount,
{
pub fn extract<const START: usize, const LEN: usize>(self) -> Mask<T, LEN> {
// Safety: swizzles are safe for masks
unsafe { Mask::<T, LEN>::from_int_unchecked(self.to_int().extract::<START, LEN>()) }
unsafe { Mask::<T, LEN>::from_simd_unchecked(self.to_simd().extract::<START, LEN>()) }
}
}

View file

@ -1,10 +1,7 @@
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
use crate::simd::Simd;
use core::mem;
impl<const N: usize> Simd<u8, N>
where
LaneCount<N>: SupportedLaneCount,
{
impl<const N: usize> Simd<u8, N> {
/// Swizzle a vector of bytes according to the index vector.
/// Indices within range select the appropriate byte.
/// Indices "out of bounds" instead select 0.
@ -139,7 +136,7 @@ unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd<u8, 16>, idxs: Simd<u8, 16>) -> S
#[inline]
#[allow(clippy::let_and_return)]
unsafe fn avx2_pshufb(bytes: Simd<u8, 32>, idxs: Simd<u8, 32>) -> Simd<u8, 32> {
use crate::simd::cmp::SimdPartialOrd;
use crate::simd::{Select, cmp::SimdPartialOrd};
#[cfg(target_arch = "x86")]
use core::arch::x86;
#[cfg(target_arch = "x86_64")]
@ -184,10 +181,7 @@ unsafe fn transize<T, const N: usize>(
f: unsafe fn(T, T) -> T,
a: Simd<u8, N>,
b: Simd<u8, N>,
) -> Simd<u8, N>
where
LaneCount<N>: SupportedLaneCount,
{
) -> Simd<u8, N> {
// SAFETY: Same obligation to use this function as to use mem::transmute_copy.
unsafe { mem::transmute_copy(&f(mem::transmute_copy(&a), mem::transmute_copy(&b))) }
}
@ -196,11 +190,8 @@ where
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[allow(unused)]
#[inline(always)]
fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N>
where
LaneCount<N>: SupportedLaneCount,
{
use crate::simd::cmp::SimdPartialOrd;
fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N> {
use crate::simd::{Select, cmp::SimdPartialOrd};
idxs.simd_lt(Simd::splat(N as u8))
.select(idxs, Simd::splat(u8::MAX))
}

View file

@ -1,12 +1,12 @@
use crate::simd::{
LaneCount, Simd, SimdElement, SupportedLaneCount,
Simd, SimdElement,
num::{SimdFloat, SimdInt, SimdUint},
};
mod sealed {
use super::*;
pub trait Sealed {}
impl<T: SimdElement, const N: usize> Sealed for Simd<T, N> where LaneCount<N>: SupportedLaneCount {}
impl<T: SimdElement, const N: usize> Sealed for Simd<T, N> {}
}
use sealed::Sealed;

View file

@ -1,5 +1,7 @@
use core::intrinsics::simd::SimdAlign;
use crate::simd::{
LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle,
Mask, MaskElement,
cmp::SimdPartialOrd,
num::SimdUint,
ptr::{SimdConstPtr, SimdMutPtr},
@ -51,6 +53,8 @@ use crate::simd::{
/// Thus it is sound to [`transmute`] `Simd<T, N>` to `[T; N]` and should optimize to "zero cost",
/// but the reverse transmutation may require a copy the compiler cannot simply elide.
///
/// `N` cannot be 0 and may be at most 64. This limit may be increased in the future.
///
/// # ABI "Features"
/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed and returned via memory,
/// not SIMD registers, except as an optimization. Using `#[inline]` on functions that accept
@ -100,14 +104,13 @@ use crate::simd::{
// avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also
// causes rustc to emit illegal LLVM IR in some cases.
#[repr(simd, packed)]
#[rustc_simd_monomorphize_lane_limit = "64"]
pub struct Simd<T, const N: usize>([T; N])
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement;
impl<T, const N: usize> Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
/// Number of elements in this vector.
@ -146,30 +149,8 @@ where
#[inline]
#[rustc_const_unstable(feature = "portable_simd", issue = "86656")]
pub const fn splat(value: T) -> Self {
const fn splat_const<T, const N: usize>(value: T) -> Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
Simd::from_array([value; N])
}
fn splat_rt<T, const N: usize>(value: T) -> Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
// This is preferred over `[value; N]`, since it's explicitly a splat:
// https://github.com/rust-lang/rust/issues/97804
struct Splat;
impl<const N: usize> Swizzle<N> for Splat {
const INDEX: [usize; N] = [0; N];
}
Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
}
core::intrinsics::const_eval_select((value,), splat_const, splat_rt)
// SAFETY: T is a SimdElement, and the item type of Self.
unsafe { core::intrinsics::simd::simd_splat(value) }
}
/// Returns an array reference containing the entire SIMD vector.
@ -195,7 +176,7 @@ where
/// Returns a mutable array reference containing the entire SIMD vector.
#[inline]
pub fn as_mut_array(&mut self) -> &mut [T; N] {
pub const fn as_mut_array(&mut self) -> &mut [T; N] {
// SAFETY: `Simd<T, N>` is just an overaligned `[T; N]` with
// potential padding at the end, so pointer casting to a
// `&mut [T; N]` is safe.
@ -324,7 +305,7 @@ where
/// ```
#[inline]
#[track_caller]
pub fn copy_to_slice(self, slice: &mut [T]) {
pub const fn copy_to_slice(self, slice: &mut [T]) {
assert!(
slice.len() >= Self::LEN,
"slice length must be at least the number of elements"
@ -465,7 +446,7 @@ where
/// value from `or` is passed through.
///
/// # Safety
/// Enabled `ptr` elements must be safe to read as if by `std::ptr::read`.
/// Enabled `ptr` elements must be safe to read as if by `core::ptr::read`.
#[must_use]
#[inline]
pub unsafe fn load_select_ptr(
@ -475,12 +456,11 @@ where
) -> Self {
// SAFETY: The safety of reading elements through `ptr` is ensured by the caller.
unsafe {
core::intrinsics::simd::simd_masked_load::<
_,
_,
_,
{ core::intrinsics::simd::SimdAlign::Element },
>(enable.to_int(), ptr, or)
core::intrinsics::simd::simd_masked_load::<_, _, _, { SimdAlign::Element }>(
enable.to_simd(),
ptr,
or,
)
}
}
@ -659,7 +639,7 @@ where
or: Self,
) -> Self {
// Safety: The caller is responsible for upholding all invariants
unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) }
unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_simd()) }
}
/// Conditionally write contiguous elements to `slice`. The `enable` mask controls
@ -731,12 +711,11 @@ where
pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<<T as SimdElement>::Mask, N>) {
// SAFETY: The safety of writing elements through `ptr` is ensured by the caller.
unsafe {
core::intrinsics::simd::simd_masked_store::<
_,
_,
_,
{ core::intrinsics::simd::SimdAlign::Element },
>(enable.to_int(), ptr, self)
core::intrinsics::simd::simd_masked_store::<_, _, _, { SimdAlign::Element }>(
enable.to_simd(),
ptr,
self,
)
}
}
@ -896,20 +875,14 @@ where
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask<isize, N>) {
// Safety: The caller is responsible for upholding all invariants
unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_int()) }
unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_simd()) }
}
}
impl<T, const N: usize> Copy for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
}
impl<T, const N: usize> Copy for Simd<T, N> where T: SimdElement {}
impl<T, const N: usize> Clone for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
#[inline]
@ -920,7 +893,6 @@ where
impl<T, const N: usize> Default for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement + Default,
{
#[inline]
@ -931,7 +903,6 @@ where
impl<T, const N: usize> PartialEq for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement + PartialEq,
{
#[inline]
@ -940,7 +911,7 @@ where
let mask = unsafe {
let tfvec: Simd<<T as SimdElement>::Mask, N> =
core::intrinsics::simd::simd_eq(*self, *other);
Mask::from_int_unchecked(tfvec)
Mask::from_simd_unchecked(tfvec)
};
// Two vectors are equal if all elements are equal when compared elementwise
@ -954,7 +925,7 @@ where
let mask = unsafe {
let tfvec: Simd<<T as SimdElement>::Mask, N> =
core::intrinsics::simd::simd_ne(*self, *other);
Mask::from_int_unchecked(tfvec)
Mask::from_simd_unchecked(tfvec)
};
// Two vectors are non-equal if any elements are non-equal when compared elementwise
@ -965,7 +936,6 @@ where
/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
impl<T, const N: usize> PartialOrd for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement + PartialOrd,
{
#[inline]
@ -975,17 +945,11 @@ where
}
}
impl<T, const N: usize> Eq for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement + Eq,
{
}
impl<T, const N: usize> Eq for Simd<T, N> where T: SimdElement + Eq {}
/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
impl<T, const N: usize> Ord for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement + Ord,
{
#[inline]
@ -997,7 +961,6 @@ where
impl<T, const N: usize> core::hash::Hash for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement + core::hash::Hash,
{
#[inline]
@ -1012,7 +975,6 @@ where
// array references
impl<T, const N: usize> AsRef<[T; N]> for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
#[inline]
@ -1023,7 +985,6 @@ where
impl<T, const N: usize> AsMut<[T; N]> for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
#[inline]
@ -1035,7 +996,6 @@ where
// slice references
impl<T, const N: usize> AsRef<[T]> for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
#[inline]
@ -1046,7 +1006,6 @@ where
impl<T, const N: usize> AsMut<[T]> for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
#[inline]
@ -1058,7 +1017,6 @@ where
// vector/array conversion
impl<T, const N: usize> From<[T; N]> for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
#[inline]
@ -1069,7 +1027,6 @@ where
impl<T, const N: usize> From<Simd<T, N>> for [T; N]
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
#[inline]
@ -1080,7 +1037,6 @@ where
impl<T, const N: usize> TryFrom<&[T]> for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
type Error = core::array::TryFromSliceError;
@ -1093,7 +1049,6 @@ where
impl<T, const N: usize> TryFrom<&mut [T]> for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
T: SimdElement,
{
type Error = core::array::TryFromSliceError;
@ -1231,10 +1186,7 @@ where
}
#[inline]
fn lane_indices<const N: usize>() -> Simd<usize, N>
where
LaneCount<N>: SupportedLaneCount,
{
fn lane_indices<const N: usize>() -> Simd<usize, N> {
#![allow(clippy::needless_range_loop)]
let mut index = [0; N];
for i in 0..N {
@ -1246,7 +1198,6 @@ where
#[inline]
fn mask_up_to<M, const N: usize>(len: usize) -> Mask<M, N>
where
LaneCount<N>: SupportedLaneCount,
M: MaskElement,
{
let index = lane_indices::<N>();

View file

@ -1,31 +1,26 @@
use crate::simd::*;
use core::arch::loongarch64::*;
from_transmute! { unsafe u8x16 => v16u8 }
from_transmute! { unsafe u8x32 => v32u8 }
from_transmute! { unsafe i8x16 => v16i8 }
from_transmute! { unsafe i8x32 => v32i8 }
from_transmute! { unsafe u8x16 => m128i }
from_transmute! { unsafe u8x32 => m256i }
from_transmute! { unsafe i8x16 => m128i }
from_transmute! { unsafe i8x32 => m256i }
from_transmute! { unsafe u16x8 => v8u16 }
from_transmute! { unsafe u16x16 => v16u16 }
from_transmute! { unsafe i16x8 => v8i16 }
from_transmute! { unsafe i16x16 => v16i16 }
from_transmute! { unsafe u16x8 => m128i }
from_transmute! { unsafe u16x16 => m256i }
from_transmute! { unsafe i16x8 => m128i }
from_transmute! { unsafe i16x16 => m256i }
from_transmute! { unsafe u32x4 => v4u32 }
from_transmute! { unsafe u32x8 => v8u32 }
from_transmute! { unsafe i32x4 => v4i32 }
from_transmute! { unsafe i32x8 => v8i32 }
from_transmute! { unsafe f32x4 => v4f32 }
from_transmute! { unsafe f32x8 => v8f32 }
from_transmute! { unsafe u32x4 => m128i }
from_transmute! { unsafe u32x8 => m256i }
from_transmute! { unsafe i32x4 => m128i }
from_transmute! { unsafe i32x8 => m256i }
from_transmute! { unsafe f32x4 => m128 }
from_transmute! { unsafe f32x8 => m256 }
from_transmute! { unsafe u64x2 => v2u64 }
from_transmute! { unsafe u64x4 => v4u64 }
from_transmute! { unsafe i64x2 => v2i64 }
from_transmute! { unsafe i64x4 => v4i64 }
from_transmute! { unsafe f64x2 => v2f64 }
from_transmute! { unsafe f64x4 => v4f64 }
from_transmute! { unsafe usizex2 => v2u64 }
from_transmute! { unsafe usizex4 => v4u64 }
from_transmute! { unsafe isizex2 => v2i64 }
from_transmute! { unsafe isizex4 => v4i64 }
from_transmute! { unsafe u64x2 => m128i }
from_transmute! { unsafe u64x4 => m256i }
from_transmute! { unsafe i64x2 => m128i }
from_transmute! { unsafe i64x4 => m256i }
from_transmute! { unsafe f64x2 => m128d }
from_transmute! { unsafe f64x4 => m256d }

View file

@ -14,17 +14,3 @@ from_transmute! { unsafe f32x4 => v128 }
from_transmute! { unsafe u64x2 => v128 }
from_transmute! { unsafe i64x2 => v128 }
from_transmute! { unsafe f64x2 => v128 }
#[cfg(target_pointer_width = "32")]
mod p32 {
use super::*;
from_transmute! { unsafe usizex4 => v128 }
from_transmute! { unsafe isizex4 => v128 }
}
#[cfg(target_pointer_width = "64")]
mod p64 {
use super::*;
from_transmute! { unsafe usizex2 => v128 }
from_transmute! { unsafe isizex2 => v128 }
}

View file

@ -39,25 +39,3 @@ from_transmute! { unsafe i64x8 => __m512i }
from_transmute! { unsafe f64x2 => __m128d }
from_transmute! { unsafe f64x4 => __m256d }
from_transmute! { unsafe f64x8 => __m512d }
#[cfg(target_pointer_width = "32")]
mod p32 {
use super::*;
from_transmute! { unsafe usizex4 => __m128i }
from_transmute! { unsafe usizex8 => __m256i }
from_transmute! { unsafe Simd<usize, 16> => __m512i }
from_transmute! { unsafe isizex4 => __m128i }
from_transmute! { unsafe isizex8 => __m256i }
from_transmute! { unsafe Simd<isize, 16> => __m512i }
}
#[cfg(target_pointer_width = "64")]
mod p64 {
use super::*;
from_transmute! { unsafe usizex2 => __m128i }
from_transmute! { unsafe usizex4 => __m256i }
from_transmute! { unsafe usizex8 => __m512i }
from_transmute! { unsafe isizex2 => __m128i }
from_transmute! { unsafe isizex4 => __m256i }
from_transmute! { unsafe isizex8 => __m512i }
}

View file

@ -65,9 +65,9 @@ macro_rules! test_mask_api {
fn roundtrip_int_conversion() {
let values = [true, false, false, true, false, false, true, false];
let mask = Mask::<$type, 8>::from_array(values);
let int = mask.to_int();
let int = mask.to_simd();
assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]);
assert_eq!(Mask::<$type, 8>::from_int(int), mask);
assert_eq!(Mask::<$type, 8>::from_simd(int), mask);
}
#[test]

View file

@ -11,7 +11,7 @@ use core_simd::simd;
use core::intrinsics::simd as intrinsics;
use simd::{LaneCount, Simd, SupportedLaneCount};
use simd::Simd;
#[cfg(feature = "as_crate")]
mod experimental {
@ -66,28 +66,43 @@ pub trait StdFloat: Sealed + Sized {
/// Produces a vector where every element has the sine of the value
/// in the equivalently-indexed element in `self`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
fn sin(self) -> Self;
fn sin(self) -> Self {
unsafe { intrinsics::simd_fsin(self) }
}
/// Produces a vector where every element has the cosine of the value
/// in the equivalently-indexed element in `self`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
fn cos(self) -> Self;
fn cos(self) -> Self {
unsafe { intrinsics::simd_fcos(self) }
}
/// Produces a vector where every element has the exponential (base e) of the value
/// in the equivalently-indexed element in `self`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
fn exp(self) -> Self;
fn exp(self) -> Self {
unsafe { intrinsics::simd_fexp(self) }
}
/// Produces a vector where every element has the exponential (base 2) of the value
/// in the equivalently-indexed element in `self`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
fn exp2(self) -> Self;
fn exp2(self) -> Self {
unsafe { intrinsics::simd_fexp2(self) }
}
/// Produces a vector where every element has the natural logarithm of the value
/// in the equivalently-indexed element in `self`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
fn ln(self) -> Self;
fn ln(self) -> Self {
unsafe { intrinsics::simd_flog(self) }
}
/// Produces a vector where every element has the logarithm with respect to an arbitrary
/// in the equivalently-indexed elements in `self` and `base`.
@ -99,13 +114,19 @@ pub trait StdFloat: Sealed + Sized {
/// Produces a vector where every element has the base-2 logarithm of the value
/// in the equivalently-indexed element in `self`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
fn log2(self) -> Self;
fn log2(self) -> Self {
unsafe { intrinsics::simd_flog2(self) }
}
/// Produces a vector where every element has the base-10 logarithm of the value
/// in the equivalently-indexed element in `self`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
fn log10(self) -> Self;
fn log10(self) -> Self {
unsafe { intrinsics::simd_flog10(self) }
}
/// Returns the smallest integer greater than or equal to each element.
#[must_use = "method returns a new vector and does not mutate the original value"]
@ -140,68 +161,19 @@ pub trait StdFloat: Sealed + Sized {
fn fract(self) -> Self;
}
impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
impl<const N: usize> Sealed for Simd<f32, N> {}
impl<const N: usize> Sealed for Simd<f64, N> {}
macro_rules! impl_float {
{
$($fn:ident: $intrinsic:ident,)*
} => {
impl<const N: usize> StdFloat for Simd<f32, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn fract(self) -> Self {
self - self.trunc()
}
$(
#[inline]
fn $fn(self) -> Self {
unsafe { intrinsics::$intrinsic(self) }
}
)*
}
impl<const N: usize> StdFloat for Simd<f64, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn fract(self) -> Self {
self - self.trunc()
}
$(
#[inline]
fn $fn(self) -> Self {
// https://github.com/llvm/llvm-project/issues/83729
#[cfg(target_arch = "aarch64")]
{
let mut ln = Self::splat(0f64);
for i in 0..N {
ln[i] = self[i].$fn()
}
ln
}
#[cfg(not(target_arch = "aarch64"))]
{
unsafe { intrinsics::$intrinsic(self) }
}
}
)*
}
impl<const N: usize> StdFloat for Simd<f32, N> {
#[inline]
fn fract(self) -> Self {
self - self.trunc()
}
}
impl_float! {
sin: simd_fsin,
cos: simd_fcos,
exp: simd_fexp,
exp2: simd_fexp2,
ln: simd_flog,
log2: simd_flog2,
log10: simd_flog10,
impl<const N: usize> StdFloat for Simd<f64, N> {
#[inline]
fn fract(self) -> Self {
self - self.trunc()
}
}

View file

@ -16,15 +16,33 @@ macro_rules! unary_test {
}
}
macro_rules! binary_test {
macro_rules! unary_approx_test {
{ $scalar:tt, $($func:tt),+ } => {
test_helpers::test_lanes! {
$(
fn $func<const LANES: usize>() {
test_helpers::test_binary_elementwise(
test_helpers::test_unary_elementwise_approx(
&core_simd::simd::Simd::<$scalar, LANES>::$func,
&$scalar::$func,
&|_| true,
8,
)
}
)*
}
}
}
macro_rules! binary_approx_test {
{ $scalar:tt, $($func:tt),+ } => {
test_helpers::test_lanes! {
$(
fn $func<const LANES: usize>() {
test_helpers::test_binary_elementwise_approx(
&core_simd::simd::Simd::<$scalar, LANES>::$func,
&$scalar::$func,
&|_, _| true,
16,
)
}
)*
@ -53,10 +71,13 @@ macro_rules! impl_tests {
mod $scalar {
use std_float::StdFloat;
unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc }
binary_test! { $scalar, log }
unary_test! { $scalar, sqrt, ceil, floor, round, trunc }
ternary_test! { $scalar, mul_add }
// https://github.com/rust-lang/miri/issues/3555
unary_approx_test! { $scalar, sin, cos, exp, exp2, ln, log2, log10 }
binary_approx_test! { $scalar, log }
test_helpers::test_lanes! {
fn fract<const LANES: usize>() {
test_helpers::test_unary_elementwise_flush_subnormals(

View file

@ -6,3 +6,4 @@ publish = false
[dependencies]
proptest = { version = "0.10", default-features = false, features = ["alloc"] }
float-cmp = "0.10"

View file

@ -0,0 +1,110 @@
//! Compare numeric types approximately.
use float_cmp::Ulps;
pub trait ApproxEq {
fn approxeq(&self, other: &Self, _ulps: i64) -> bool;
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result;
}
impl ApproxEq for bool {
fn approxeq(&self, other: &Self, _ulps: i64) -> bool {
self == other
}
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "{:?}", self)
}
}
macro_rules! impl_integer_approxeq {
{ $($type:ty),* } => {
$(
impl ApproxEq for $type {
fn approxeq(&self, other: &Self, _ulps: i64) -> bool {
self == other
}
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "{:?} ({:x})", self, self)
}
}
)*
};
}
impl_integer_approxeq! { u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize }
macro_rules! impl_float_approxeq {
{ $($type:ty),* } => {
$(
impl ApproxEq for $type {
fn approxeq(&self, other: &Self, ulps: i64) -> bool {
if self.is_nan() && other.is_nan() {
true
} else {
(self.ulps(other) as i64).abs() <= ulps
}
}
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "{:?} ({:x})", self, self.to_bits())
}
}
)*
};
}
impl_float_approxeq! { f32, f64 }
impl<T: ApproxEq, const N: usize> ApproxEq for [T; N] {
fn approxeq(&self, other: &Self, ulps: i64) -> bool {
self.iter()
.zip(other.iter())
.fold(true, |value, (left, right)| {
value && left.approxeq(right, ulps)
})
}
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
#[repr(transparent)]
struct Wrapper<'a, T: ApproxEq>(&'a T);
impl<T: ApproxEq> core::fmt::Debug for Wrapper<'_, T> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
self.0.fmt(f)
}
}
f.debug_list()
.entries(self.iter().map(|x| Wrapper(x)))
.finish()
}
}
#[doc(hidden)]
pub struct ApproxEqWrapper<'a, T>(pub &'a T, pub i64);
impl<T: ApproxEq> PartialEq<T> for ApproxEqWrapper<'_, T> {
fn eq(&self, other: &T) -> bool {
self.0.approxeq(other, self.1)
}
}
impl<T: ApproxEq> core::fmt::Debug for ApproxEqWrapper<'_, T> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
self.0.fmt(f)
}
}
#[macro_export]
macro_rules! prop_assert_approxeq {
{ $a:expr, $b:expr, $ulps:expr $(,)? } => {
{
use $crate::approxeq::ApproxEqWrapper;
let a = $a;
let b = $b;
proptest::prop_assert_eq!(ApproxEqWrapper(&a, $ulps), b);
}
};
}

View file

@ -12,6 +12,9 @@ pub mod wasm;
#[macro_use]
pub mod biteq;
#[macro_use]
pub mod approxeq;
pub mod subnormals;
use subnormals::FlushSubnormals;
@ -185,6 +188,41 @@ pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const
});
}
/// Test a unary vector function against a unary scalar function, applied elementwise.
///
/// Floats are checked approximately.
pub fn test_unary_elementwise_approx<
Scalar,
ScalarResult,
Vector,
VectorResult,
const LANES: usize,
>(
fv: &dyn Fn(Vector) -> VectorResult,
fs: &dyn Fn(Scalar) -> ScalarResult,
check: &dyn Fn([Scalar; LANES]) -> bool,
ulps: i64,
) where
Scalar: Copy + core::fmt::Debug + DefaultStrategy,
ScalarResult: Copy + approxeq::ApproxEq + core::fmt::Debug + DefaultStrategy,
Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy,
VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
{
test_1(&|x: [Scalar; LANES]| {
proptest::prop_assume!(check(x));
let result_1: [ScalarResult; LANES] = fv(x.into()).into();
let result_2: [ScalarResult; LANES] = x
.iter()
.copied()
.map(fs)
.collect::<Vec<_>>()
.try_into()
.unwrap();
crate::prop_assert_approxeq!(result_1, result_2, ulps);
Ok(())
});
}
/// Test a unary vector function against a unary scalar function, applied elementwise.
///
/// Where subnormals are flushed, use approximate equality.
@ -290,6 +328,44 @@ pub fn test_binary_elementwise<
});
}
/// Test a binary vector function against a binary scalar function, applied elementwise.
pub fn test_binary_elementwise_approx<
Scalar1,
Scalar2,
ScalarResult,
Vector1,
Vector2,
VectorResult,
const LANES: usize,
>(
fv: &dyn Fn(Vector1, Vector2) -> VectorResult,
fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult,
check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool,
ulps: i64,
) where
Scalar1: Copy + core::fmt::Debug + DefaultStrategy,
Scalar2: Copy + core::fmt::Debug + DefaultStrategy,
ScalarResult: Copy + approxeq::ApproxEq + core::fmt::Debug + DefaultStrategy,
Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
{
test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| {
proptest::prop_assume!(check(x, y));
let result_1: [ScalarResult; LANES] = fv(x.into(), y.into()).into();
let result_2: [ScalarResult; LANES] = x
.iter()
.copied()
.zip(y.iter().copied())
.map(|(x, y)| fs(x, y))
.collect::<Vec<_>>()
.try_into()
.unwrap();
crate::prop_assert_approxeq!(result_1, result_2, ulps);
Ok(())
});
}
/// Test a binary vector function against a binary scalar function, applied elementwise.
///
/// Where subnormals are flushed, use approximate equality.
@ -528,8 +604,6 @@ macro_rules! test_lanes {
use super::*;
fn implementation<const $lanes: usize>()
where
core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount,
$body
#[cfg(target_arch = "wasm32")]
@ -628,8 +702,6 @@ macro_rules! test_lanes_panic {
use super::*;
fn implementation<const $lanes: usize>()
where
core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount,
$body
// test some odd and even non-power-of-2 lengths on miri

View file

@ -1,3 +1,3 @@
[toolchain]
channel = "nightly-2025-01-16"
channel = "nightly-2026-01-26"
components = ["rustfmt", "clippy", "miri", "rust-src"]