From 34384b7a68b3a72fc96e5293de7c7486d2cceb92 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Wed, 28 Jul 2021 04:19:31 +0000 Subject: [PATCH 1/4] Add const_evaluatable_checked feature, change to_bitmask to use it, and fix existing std feature --- crates/core_simd/Cargo.toml | 3 +- crates/core_simd/examples/nbody.rs | 301 ++++++++++++++------------- crates/core_simd/src/intrinsics.rs | 1 + crates/core_simd/src/lib.rs | 3 +- crates/core_simd/src/round.rs | 5 + crates/core_simd/src/to_bytes.rs | 73 ++----- crates/core_simd/tests/ops_macros.rs | 19 +- crates/core_simd/tests/to_bytes.rs | 4 +- 8 files changed, 199 insertions(+), 210 deletions(-) diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index 6044eabcd140..764b199d5bf9 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -10,8 +10,9 @@ categories = ["hardware-support", "no-std"] license = "MIT OR Apache-2.0" [features] -default = ["std"] +default = ["std", "const_evaluatable_checked"] std = [] +const_evaluatable_checked = [] [target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen] version = "0.2" diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs index 40e4e18b0264..779575985ed9 100644 --- a/crates/core_simd/examples/nbody.rs +++ b/crates/core_simd/examples/nbody.rs @@ -1,169 +1,173 @@ -#![feature(portable_simd)] +#![cfg_attr(feature = "std", feature(portable_simd))] /// Benchmarks game nbody code /// Taken from the `packed_simd` crate /// Run this benchmark with `cargo test --example nbody` -use core_simd::*; +#[cfg(feature = "std")] +mod nbody { + use core_simd::*; -use std::f64::consts::PI; -const SOLAR_MASS: f64 = 4.0 * PI * PI; -const DAYS_PER_YEAR: f64 = 365.24; + use std::f64::consts::PI; + const SOLAR_MASS: f64 = 4.0 * PI * PI; + const DAYS_PER_YEAR: f64 = 365.24; -#[derive(Debug, Clone, Copy)] -pub struct Body { - pub x: f64x4, - pub v: f64x4, - pub mass: f64, -} - -const N_BODIES: usize = 5; -const BODIES: [Body; N_BODIES] = [ - // sun: - Body { - x: f64x4::from_array([0., 0., 0., 0.]), - v: f64x4::from_array([0., 0., 0., 0.]), - mass: SOLAR_MASS, - }, - // jupiter: - Body { - x: f64x4::from_array([ - 4.84143144246472090e+00, - -1.16032004402742839e+00, - -1.03622044471123109e-01, - 0., - ]), - v: f64x4::from_array([ - 1.66007664274403694e-03 * DAYS_PER_YEAR, - 7.69901118419740425e-03 * DAYS_PER_YEAR, - -6.90460016972063023e-05 * DAYS_PER_YEAR, - 0., - ]), - mass: 9.54791938424326609e-04 * SOLAR_MASS, - }, - // saturn: - Body { - x: f64x4::from_array([ - 8.34336671824457987e+00, - 4.12479856412430479e+00, - -4.03523417114321381e-01, - 0., - ]), - v: f64x4::from_array([ - -2.76742510726862411e-03 * DAYS_PER_YEAR, - 4.99852801234917238e-03 * DAYS_PER_YEAR, - 2.30417297573763929e-05 * DAYS_PER_YEAR, - 0., - ]), - mass: 2.85885980666130812e-04 * SOLAR_MASS, - }, - // uranus: - Body { - x: f64x4::from_array([ - 1.28943695621391310e+01, - -1.51111514016986312e+01, - -2.23307578892655734e-01, - 0., - ]), - v: f64x4::from_array([ - 2.96460137564761618e-03 * DAYS_PER_YEAR, - 2.37847173959480950e-03 * DAYS_PER_YEAR, - -2.96589568540237556e-05 * DAYS_PER_YEAR, - 0., - ]), - mass: 4.36624404335156298e-05 * SOLAR_MASS, - }, - // neptune: - Body { - x: f64x4::from_array([ - 1.53796971148509165e+01, - -2.59193146099879641e+01, - 1.79258772950371181e-01, - 0., - ]), - v: f64x4::from_array([ - 2.68067772490389322e-03 * DAYS_PER_YEAR, - 1.62824170038242295e-03 * DAYS_PER_YEAR, - -9.51592254519715870e-05 * DAYS_PER_YEAR, - 0., - ]), - mass: 5.15138902046611451e-05 * SOLAR_MASS, - }, -]; - -pub fn offset_momentum(bodies: &mut [Body; N_BODIES]) { - let (sun, rest) = bodies.split_at_mut(1); - let sun = &mut sun[0]; - for body in rest { - let m_ratio = body.mass / SOLAR_MASS; - sun.v -= body.v * m_ratio; + #[derive(Debug, Clone, Copy)] + struct Body { + pub x: f64x4, + pub v: f64x4, + pub mass: f64, } -} -pub fn energy(bodies: &[Body; N_BODIES]) -> f64 { - let mut e = 0.; - for i in 0..N_BODIES { - let bi = &bodies[i]; - e += bi.mass * (bi.v * bi.v).horizontal_sum() * 0.5; - for bj in bodies.iter().take(N_BODIES).skip(i + 1) { - let dx = bi.x - bj.x; - e -= bi.mass * bj.mass / (dx * dx).horizontal_sum().sqrt() + const N_BODIES: usize = 5; + const BODIES: [Body; N_BODIES] = [ + // sun: + Body { + x: f64x4::from_array([0., 0., 0., 0.]), + v: f64x4::from_array([0., 0., 0., 0.]), + mass: SOLAR_MASS, + }, + // jupiter: + Body { + x: f64x4::from_array([ + 4.84143144246472090e+00, + -1.16032004402742839e+00, + -1.03622044471123109e-01, + 0., + ]), + v: f64x4::from_array([ + 1.66007664274403694e-03 * DAYS_PER_YEAR, + 7.69901118419740425e-03 * DAYS_PER_YEAR, + -6.90460016972063023e-05 * DAYS_PER_YEAR, + 0., + ]), + mass: 9.54791938424326609e-04 * SOLAR_MASS, + }, + // saturn: + Body { + x: f64x4::from_array([ + 8.34336671824457987e+00, + 4.12479856412430479e+00, + -4.03523417114321381e-01, + 0., + ]), + v: f64x4::from_array([ + -2.76742510726862411e-03 * DAYS_PER_YEAR, + 4.99852801234917238e-03 * DAYS_PER_YEAR, + 2.30417297573763929e-05 * DAYS_PER_YEAR, + 0., + ]), + mass: 2.85885980666130812e-04 * SOLAR_MASS, + }, + // uranus: + Body { + x: f64x4::from_array([ + 1.28943695621391310e+01, + -1.51111514016986312e+01, + -2.23307578892655734e-01, + 0., + ]), + v: f64x4::from_array([ + 2.96460137564761618e-03 * DAYS_PER_YEAR, + 2.37847173959480950e-03 * DAYS_PER_YEAR, + -2.96589568540237556e-05 * DAYS_PER_YEAR, + 0., + ]), + mass: 4.36624404335156298e-05 * SOLAR_MASS, + }, + // neptune: + Body { + x: f64x4::from_array([ + 1.53796971148509165e+01, + -2.59193146099879641e+01, + 1.79258772950371181e-01, + 0., + ]), + v: f64x4::from_array([ + 2.68067772490389322e-03 * DAYS_PER_YEAR, + 1.62824170038242295e-03 * DAYS_PER_YEAR, + -9.51592254519715870e-05 * DAYS_PER_YEAR, + 0., + ]), + mass: 5.15138902046611451e-05 * SOLAR_MASS, + }, + ]; + + fn offset_momentum(bodies: &mut [Body; N_BODIES]) { + let (sun, rest) = bodies.split_at_mut(1); + let sun = &mut sun[0]; + for body in rest { + let m_ratio = body.mass / SOLAR_MASS; + sun.v -= body.v * m_ratio; } } - e -} -pub fn advance(bodies: &mut [Body; N_BODIES], dt: f64) { - const N: usize = N_BODIES * (N_BODIES - 1) / 2; + fn energy(bodies: &[Body; N_BODIES]) -> f64 { + let mut e = 0.; + for i in 0..N_BODIES { + let bi = &bodies[i]; + e += bi.mass * (bi.v * bi.v).horizontal_sum() * 0.5; + for bj in bodies.iter().take(N_BODIES).skip(i + 1) { + let dx = bi.x - bj.x; + e -= bi.mass * bj.mass / (dx * dx).horizontal_sum().sqrt() + } + } + e + } + + fn advance(bodies: &mut [Body; N_BODIES], dt: f64) { + const N: usize = N_BODIES * (N_BODIES - 1) / 2; + + // compute distance between bodies: + let mut r = [f64x4::splat(0.); N]; + { + let mut i = 0; + for j in 0..N_BODIES { + for k in j + 1..N_BODIES { + r[i] = bodies[j].x - bodies[k].x; + i += 1; + } + } + } + + let mut mag = [0.0; N]; + for i in (0..N).step_by(2) { + let d2s = f64x2::from_array([ + (r[i] * r[i]).horizontal_sum(), + (r[i + 1] * r[i + 1]).horizontal_sum(), + ]); + let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt()); + mag[i] = dmags[0]; + mag[i + 1] = dmags[1]; + } - // compute distance between bodies: - let mut r = [f64x4::splat(0.); N]; - { let mut i = 0; for j in 0..N_BODIES { for k in j + 1..N_BODIES { - r[i] = bodies[j].x - bodies[k].x; - i += 1; + let f = r[i] * mag[i]; + bodies[j].v -= f * bodies[k].mass; + bodies[k].v += f * bodies[j].mass; + i += 1 } } - } - - let mut mag = [0.0; N]; - for i in (0..N).step_by(2) { - let d2s = f64x2::from_array([ - (r[i] * r[i]).horizontal_sum(), - (r[i + 1] * r[i + 1]).horizontal_sum(), - ]); - let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt()); - mag[i] = dmags[0]; - mag[i + 1] = dmags[1]; - } - - let mut i = 0; - for j in 0..N_BODIES { - for k in j + 1..N_BODIES { - let f = r[i] * mag[i]; - bodies[j].v -= f * bodies[k].mass; - bodies[k].v += f * bodies[j].mass; - i += 1 + for body in bodies { + body.x += dt * body.v } } - for body in bodies { - body.x += dt * body.v + + pub fn run(n: usize) -> (f64, f64) { + let mut bodies = BODIES; + offset_momentum(&mut bodies); + let energy_before = energy(&bodies); + for _ in 0..n { + advance(&mut bodies, 0.01); + } + let energy_after = energy(&bodies); + + (energy_before, energy_after) } } -pub fn run(n: usize) -> (f64, f64) { - let mut bodies = BODIES; - offset_momentum(&mut bodies); - let energy_before = energy(&bodies); - for _ in 0..n { - advance(&mut bodies, 0.01); - } - let energy_after = energy(&bodies); - - (energy_before, energy_after) -} - +#[cfg(feature = "std")] #[cfg(test)] mod tests { // Good enough for demonstration purposes, not going for strictness here. @@ -173,12 +177,17 @@ mod tests { #[test] fn test() { const OUTPUT: [f64; 2] = [-0.169075164, -0.169087605]; - let (energy_before, energy_after) = super::run(1000); + let (energy_before, energy_after) = super::nbody::run(1000); assert!(approx_eq_f64(energy_before, OUTPUT[0])); assert!(approx_eq_f64(energy_after, OUTPUT[1])); } } fn main() { - // empty main to pass CI + #[cfg(feature = "std")] + { + let (energy_before, energy_after) = nbody::run(1000); + println!("Energy before: {}", energy_before); + println!("Energy after: {}", energy_after); + } } diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 944026c080a6..916c0dadf752 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -47,6 +47,7 @@ extern "platform-intrinsic" { pub(crate) fn simd_fabs(x: T) -> T; /// fsqrt + #[cfg(feature = "std")] pub(crate) fn simd_fsqrt(x: T) -> T; /// fma diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index d8149efe9c7f..5f88e3c63b5b 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,6 +1,7 @@ #![no_std] #![allow(incomplete_features)] #![feature( + const_evaluatable_checked, const_generics, platform_intrinsics, repr_simd, @@ -20,8 +21,8 @@ mod reduction; mod select; pub use select::Select; +#[cfg(feature = "const_evaluatable_checked")] mod to_bytes; -pub use to_bytes::ToBytes; mod comparisons; mod fmt; diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs index 74cae0cf9898..c284ade463fc 100644 --- a/crates/core_simd/src/round.rs +++ b/crates/core_simd/src/round.rs @@ -41,7 +41,12 @@ macro_rules! implement { pub fn fract(self) -> Self { self - self.trunc() } + } + impl crate::$type + where + crate::LaneCount: crate::SupportedLaneCount, + { /// Rounds toward zero and converts to the same-width integer type, assuming that /// the value is finite and fits in that type. /// diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index 0823391049fd..31d7dfebe1a7 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -1,72 +1,39 @@ -mod sealed { - pub trait Sealed {} -} -use sealed::Sealed; - -/// Supporting trait for byte conversion functions. -pub trait ToBytes: Sealed { - /// The bytes representation of this type. - type Bytes; - - #[doc(hidden)] - fn to_bytes_impl(self) -> Self::Bytes; - - #[doc(hidden)] - fn from_bytes_impl(bytes: Self::Bytes) -> Self; -} - macro_rules! impl_to_bytes { - { $name:ident, $($int_width:literal -> $byte_width:literal),* } => { - $( - impl Sealed for crate::$name<$int_width> - where - crate::LaneCount<$int_width>: crate::SupportedLaneCount, - {} - - impl ToBytes for crate::$name<$int_width> - where - crate::LaneCount<$int_width>: crate::SupportedLaneCount, - { - type Bytes = crate::SimdU8<$byte_width>; - fn to_bytes_impl(self) -> Self::Bytes { - unsafe { core::mem::transmute(self) } - } - fn from_bytes_impl(bytes: Self::Bytes) -> Self { - unsafe { core::mem::transmute(bytes) } - } - } - )* - + { $name:ident, $size:literal } => { impl crate::$name where crate::LaneCount: crate::SupportedLaneCount, - Self: ToBytes, + crate::LaneCount<{{ $size * LANES }}>: crate::SupportedLaneCount, { /// Return the memory representation of this integer as a byte array in native byte /// order. - pub fn to_ne_bytes(self) -> ::Bytes { self.to_bytes_impl() } + pub fn to_ne_bytes(self) -> crate::SimdU8<{{ $size * LANES }}> { + unsafe { core::mem::transmute_copy(&self) } + } /// Create a native endian integer value from its memory representation as a byte array /// in native endianness. - pub fn from_ne_bytes(bytes: ::Bytes) -> Self { Self::from_bytes_impl(bytes) } + pub fn from_ne_bytes(bytes: crate::SimdU8<{{ $size * LANES }}>) -> Self { + unsafe { core::mem::transmute_copy(&bytes) } + } } } } -impl_to_bytes! { SimdU8, 1 -> 1, 2 -> 2, 4 -> 4, 8 -> 8, 16 -> 16, 32 -> 32 } -impl_to_bytes! { SimdU16, 1 -> 2, 2 -> 4, 4 -> 8, 8 -> 16, 16 -> 32 } -impl_to_bytes! { SimdU32, 1 -> 4, 2 -> 8, 4 -> 16, 8 -> 32 } -impl_to_bytes! { SimdU64, 1 -> 8, 2 -> 16, 4 -> 32 } +impl_to_bytes! { SimdU8, 1 } +impl_to_bytes! { SimdU16, 2 } +impl_to_bytes! { SimdU32, 4 } +impl_to_bytes! { SimdU64, 8 } #[cfg(target_pointer_width = "32")] -impl_to_bytes! { SimdUsize, 1 -> 4, 2 -> 8, 4 -> 16, 8 -> 32 } +impl_to_bytes! { SimdUsize, 4 } #[cfg(target_pointer_width = "64")] -impl_to_bytes! { SimdUsize, 1 -> 8, 2 -> 16, 4 -> 32 } +impl_to_bytes! { SimdUsize, 8 } -impl_to_bytes! { SimdI8, 1 -> 1, 2 -> 2, 4 -> 4, 8 -> 8, 16 -> 16, 32 -> 32 } -impl_to_bytes! { SimdI16, 1 -> 2, 2 -> 4, 4 -> 8, 8 -> 16, 16 -> 32 } -impl_to_bytes! { SimdI32, 1 -> 4, 2 -> 8, 4 -> 16, 8 -> 32 } -impl_to_bytes! { SimdI64, 1 -> 8, 2 -> 16, 4 -> 32 } +impl_to_bytes! { SimdI8, 1 } +impl_to_bytes! { SimdI16, 2 } +impl_to_bytes! { SimdI32, 4 } +impl_to_bytes! { SimdI64, 8 } #[cfg(target_pointer_width = "32")] -impl_to_bytes! { SimdIsize, 1 -> 4, 2 -> 8, 4 -> 16, 8 -> 32 } +impl_to_bytes! { SimdIsize, 4 } #[cfg(target_pointer_width = "64")] -impl_to_bytes! { SimdIsize, 1 -> 8, 2 -> 16, 4 -> 32 } +impl_to_bytes! { SimdIsize, 8 } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index cb39e7377054..81553c34aa77 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -443,14 +443,6 @@ macro_rules! impl_float_tests { ) } - fn sqrt() { - test_helpers::test_unary_elementwise( - &Vector::::sqrt, - &Scalar::sqrt, - &|_| true, - ) - } - fn recip() { test_helpers::test_unary_elementwise( &Vector::::recip, @@ -605,6 +597,17 @@ macro_rules! impl_float_tests { }); } } + + #[cfg(feature = "std")] + test_helpers::test_lanes! { + fn sqrt() { + test_helpers::test_unary_elementwise( + &Vector::::sqrt, + &Scalar::sqrt, + &|_| true, + ) + } + } } } } diff --git a/crates/core_simd/tests/to_bytes.rs b/crates/core_simd/tests/to_bytes.rs index 8d662b3238c9..22c97c95d927 100644 --- a/crates/core_simd/tests/to_bytes.rs +++ b/crates/core_simd/tests/to_bytes.rs @@ -1,4 +1,6 @@ -#![feature(portable_simd)] +#![feature(portable_simd, const_generics, const_evaluatable_checked)] +#![allow(incomplete_features)] +#![cfg(feature = "const_evaluatable_checked")] use core_simd::SimdU32; From 1f69bc459a2d4d2a34009553e57ff93598e6a342 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Wed, 28 Jul 2021 04:26:55 +0000 Subject: [PATCH 2/4] Add CI for testing cargo features --- .github/workflows/ci.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9c62a6d40aee..2f15dcc6c161 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -196,3 +196,28 @@ jobs: - name: Test (release) run: cross test --verbose --target=${{ matrix.target }} --release + features: + name: "Check cargo features (${{ matrix.features }} ${{ matrix.rustflags }})" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + rustflags: + - "" + - "-Ctarget-feature=+avx512" # AVX-512 uses packed bit masks, so enable it to test more code paths + features: + - "" + - "--feature std" + - "--feature const_evaluatable_checked" + - "--feature std --feature const_evaluatable_checked" + + steps: + - uses: actions/checkout@v2 + - name: Setup Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + - name: Check build + run: cargo check --all-targets --no-default-features ${{ matrix.features }} + env: + RUSTFLAGS: ${{ matrix.rustflags }} From 9ab050796f24b3dcd5b56c303bc48024af027eb4 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Wed, 28 Jul 2021 04:33:57 +0000 Subject: [PATCH 3/4] Fix feature flag in CI --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f15dcc6c161..454bc3154751 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -207,9 +207,9 @@ jobs: - "-Ctarget-feature=+avx512" # AVX-512 uses packed bit masks, so enable it to test more code paths features: - "" - - "--feature std" - - "--feature const_evaluatable_checked" - - "--feature std --feature const_evaluatable_checked" + - "--features std" + - "--features const_evaluatable_checked" + - "--features std --features const_evaluatable_checked" steps: - uses: actions/checkout@v2 From cca91024298b92f5bff5fc7353155aff0eef38e5 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 29 Jul 2021 04:55:28 +0000 Subject: [PATCH 4/4] Change bitmasks to use less opaque type --- crates/core_simd/src/lane_count.rs | 7 ++++++- crates/core_simd/src/masks.rs | 4 ++-- crates/core_simd/src/masks/bitmask.rs | 10 ++++++---- crates/core_simd/src/masks/full_masks.rs | 10 +++++----- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs index 8fe204dff98e..b017e7d137e3 100644 --- a/crates/core_simd/src/lane_count.rs +++ b/crates/core_simd/src/lane_count.rs @@ -6,9 +6,14 @@ use sealed::Sealed; /// A type representing a vector lane count. pub struct LaneCount; +impl LaneCount { + /// The number of bytes in a bitmask with this many lanes. + pub const BITMASK_LEN: usize = (LANES + 7) / 8; +} + /// Helper trait for vector lane counts. pub trait SupportedLaneCount: Sealed { - /// The bitmask representation of a mask. + #[doc(hidden)] type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>; #[doc(hidden)] diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index d3338a6d366e..ba7da704f61d 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -160,12 +160,12 @@ macro_rules! define_opaque_mask { } /// Convert this mask to a bitmask, with one bit set per lane. - pub fn to_bitmask(self) -> as crate::SupportedLaneCount>::BitMask { + pub fn to_bitmask(self) -> [u8; crate::LaneCount::::BITMASK_LEN] { self.0.to_bitmask() } /// Convert a bitmask to a mask. - pub fn from_bitmask(bitmask: as crate::SupportedLaneCount>::BitMask) -> Self { + pub fn from_bitmask(bitmask: [u8; crate::LaneCount::::BITMASK_LEN]) -> Self { Self(<$inner_ty>::from_bitmask(bitmask)) } } diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index b6897728988b..69edd5235872 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -128,13 +128,15 @@ where } #[inline] - pub fn to_bitmask(self) -> as SupportedLaneCount>::BitMask { - self.0 + pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { + // Safety: these are the same type and we are laundering the generic + unsafe { core::mem::transmute_copy(&self.0) } } #[inline] - pub fn from_bitmask(bitmask: as SupportedLaneCount>::BitMask) -> Self { - Self(bitmask) + pub fn from_bitmask(bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { + // Safety: these are the same type and we are laundering the generic + Self(unsafe { core::mem::transmute_copy(&bitmask) }) } #[inline] diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index af36571134ee..2923cf1964a0 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -103,15 +103,15 @@ macro_rules! define_mask { } #[inline] - pub fn to_bitmask(self) -> as crate::SupportedLaneCount>::BitMask { + pub fn to_bitmask(self) -> [u8; crate::LaneCount::::BITMASK_LEN] { unsafe { // TODO remove the transmute when rustc can use arrays of u8 as bitmasks assert_eq!( - core::mem::size_of::< as crate::SupportedLaneCount>::BitMask>(), core::mem::size_of::< as crate::SupportedLaneCount>::IntBitMask>(), + crate::LaneCount::::BITMASK_LEN, ); let bitmask: as crate::SupportedLaneCount>::IntBitMask = crate::intrinsics::simd_bitmask(self.0); - let mut bitmask: as crate::SupportedLaneCount>::BitMask = core::mem::transmute_copy(&bitmask); + let mut bitmask: [u8; crate::LaneCount::::BITMASK_LEN] = core::mem::transmute_copy(&bitmask); // There is a bug where LLVM appears to implement this operation with the wrong // bit order. @@ -127,7 +127,7 @@ macro_rules! define_mask { } #[inline] - pub fn from_bitmask(mut bitmask: as crate::SupportedLaneCount>::BitMask) -> Self { + pub fn from_bitmask(mut bitmask: [u8; crate::LaneCount::::BITMASK_LEN]) -> Self { unsafe { // There is a bug where LLVM appears to implement this operation with the wrong // bit order. @@ -140,8 +140,8 @@ macro_rules! define_mask { // TODO remove the transmute when rustc can use arrays of u8 as bitmasks assert_eq!( - core::mem::size_of::< as crate::SupportedLaneCount>::BitMask>(), core::mem::size_of::< as crate::SupportedLaneCount>::IntBitMask>(), + crate::LaneCount::::BITMASK_LEN, ); let bitmask: as crate::SupportedLaneCount>::IntBitMask = core::mem::transmute_copy(&bitmask);