Add const_evaluatable_checked feature, change to_bitmask to use it, and fix existing std feature
This commit is contained in:
parent
82e3405efe
commit
34384b7a68
8 changed files with 199 additions and 210 deletions
|
|
@ -10,8 +10,9 @@ categories = ["hardware-support", "no-std"]
|
|||
license = "MIT OR Apache-2.0"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
default = ["std", "const_evaluatable_checked"]
|
||||
std = []
|
||||
const_evaluatable_checked = []
|
||||
|
||||
[target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen]
|
||||
version = "0.2"
|
||||
|
|
|
|||
|
|
@ -1,169 +1,173 @@
|
|||
#![feature(portable_simd)]
|
||||
#![cfg_attr(feature = "std", feature(portable_simd))]
|
||||
|
||||
/// Benchmarks game nbody code
|
||||
/// Taken from the `packed_simd` crate
|
||||
/// Run this benchmark with `cargo test --example nbody`
|
||||
use core_simd::*;
|
||||
#[cfg(feature = "std")]
|
||||
mod nbody {
|
||||
use core_simd::*;
|
||||
|
||||
use std::f64::consts::PI;
|
||||
const SOLAR_MASS: f64 = 4.0 * PI * PI;
|
||||
const DAYS_PER_YEAR: f64 = 365.24;
|
||||
use std::f64::consts::PI;
|
||||
const SOLAR_MASS: f64 = 4.0 * PI * PI;
|
||||
const DAYS_PER_YEAR: f64 = 365.24;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Body {
|
||||
pub x: f64x4,
|
||||
pub v: f64x4,
|
||||
pub mass: f64,
|
||||
}
|
||||
|
||||
const N_BODIES: usize = 5;
|
||||
const BODIES: [Body; N_BODIES] = [
|
||||
// sun:
|
||||
Body {
|
||||
x: f64x4::from_array([0., 0., 0., 0.]),
|
||||
v: f64x4::from_array([0., 0., 0., 0.]),
|
||||
mass: SOLAR_MASS,
|
||||
},
|
||||
// jupiter:
|
||||
Body {
|
||||
x: f64x4::from_array([
|
||||
4.84143144246472090e+00,
|
||||
-1.16032004402742839e+00,
|
||||
-1.03622044471123109e-01,
|
||||
0.,
|
||||
]),
|
||||
v: f64x4::from_array([
|
||||
1.66007664274403694e-03 * DAYS_PER_YEAR,
|
||||
7.69901118419740425e-03 * DAYS_PER_YEAR,
|
||||
-6.90460016972063023e-05 * DAYS_PER_YEAR,
|
||||
0.,
|
||||
]),
|
||||
mass: 9.54791938424326609e-04 * SOLAR_MASS,
|
||||
},
|
||||
// saturn:
|
||||
Body {
|
||||
x: f64x4::from_array([
|
||||
8.34336671824457987e+00,
|
||||
4.12479856412430479e+00,
|
||||
-4.03523417114321381e-01,
|
||||
0.,
|
||||
]),
|
||||
v: f64x4::from_array([
|
||||
-2.76742510726862411e-03 * DAYS_PER_YEAR,
|
||||
4.99852801234917238e-03 * DAYS_PER_YEAR,
|
||||
2.30417297573763929e-05 * DAYS_PER_YEAR,
|
||||
0.,
|
||||
]),
|
||||
mass: 2.85885980666130812e-04 * SOLAR_MASS,
|
||||
},
|
||||
// uranus:
|
||||
Body {
|
||||
x: f64x4::from_array([
|
||||
1.28943695621391310e+01,
|
||||
-1.51111514016986312e+01,
|
||||
-2.23307578892655734e-01,
|
||||
0.,
|
||||
]),
|
||||
v: f64x4::from_array([
|
||||
2.96460137564761618e-03 * DAYS_PER_YEAR,
|
||||
2.37847173959480950e-03 * DAYS_PER_YEAR,
|
||||
-2.96589568540237556e-05 * DAYS_PER_YEAR,
|
||||
0.,
|
||||
]),
|
||||
mass: 4.36624404335156298e-05 * SOLAR_MASS,
|
||||
},
|
||||
// neptune:
|
||||
Body {
|
||||
x: f64x4::from_array([
|
||||
1.53796971148509165e+01,
|
||||
-2.59193146099879641e+01,
|
||||
1.79258772950371181e-01,
|
||||
0.,
|
||||
]),
|
||||
v: f64x4::from_array([
|
||||
2.68067772490389322e-03 * DAYS_PER_YEAR,
|
||||
1.62824170038242295e-03 * DAYS_PER_YEAR,
|
||||
-9.51592254519715870e-05 * DAYS_PER_YEAR,
|
||||
0.,
|
||||
]),
|
||||
mass: 5.15138902046611451e-05 * SOLAR_MASS,
|
||||
},
|
||||
];
|
||||
|
||||
pub fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
|
||||
let (sun, rest) = bodies.split_at_mut(1);
|
||||
let sun = &mut sun[0];
|
||||
for body in rest {
|
||||
let m_ratio = body.mass / SOLAR_MASS;
|
||||
sun.v -= body.v * m_ratio;
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct Body {
|
||||
pub x: f64x4,
|
||||
pub v: f64x4,
|
||||
pub mass: f64,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn energy(bodies: &[Body; N_BODIES]) -> f64 {
|
||||
let mut e = 0.;
|
||||
for i in 0..N_BODIES {
|
||||
let bi = &bodies[i];
|
||||
e += bi.mass * (bi.v * bi.v).horizontal_sum() * 0.5;
|
||||
for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
|
||||
let dx = bi.x - bj.x;
|
||||
e -= bi.mass * bj.mass / (dx * dx).horizontal_sum().sqrt()
|
||||
const N_BODIES: usize = 5;
|
||||
const BODIES: [Body; N_BODIES] = [
|
||||
// sun:
|
||||
Body {
|
||||
x: f64x4::from_array([0., 0., 0., 0.]),
|
||||
v: f64x4::from_array([0., 0., 0., 0.]),
|
||||
mass: SOLAR_MASS,
|
||||
},
|
||||
// jupiter:
|
||||
Body {
|
||||
x: f64x4::from_array([
|
||||
4.84143144246472090e+00,
|
||||
-1.16032004402742839e+00,
|
||||
-1.03622044471123109e-01,
|
||||
0.,
|
||||
]),
|
||||
v: f64x4::from_array([
|
||||
1.66007664274403694e-03 * DAYS_PER_YEAR,
|
||||
7.69901118419740425e-03 * DAYS_PER_YEAR,
|
||||
-6.90460016972063023e-05 * DAYS_PER_YEAR,
|
||||
0.,
|
||||
]),
|
||||
mass: 9.54791938424326609e-04 * SOLAR_MASS,
|
||||
},
|
||||
// saturn:
|
||||
Body {
|
||||
x: f64x4::from_array([
|
||||
8.34336671824457987e+00,
|
||||
4.12479856412430479e+00,
|
||||
-4.03523417114321381e-01,
|
||||
0.,
|
||||
]),
|
||||
v: f64x4::from_array([
|
||||
-2.76742510726862411e-03 * DAYS_PER_YEAR,
|
||||
4.99852801234917238e-03 * DAYS_PER_YEAR,
|
||||
2.30417297573763929e-05 * DAYS_PER_YEAR,
|
||||
0.,
|
||||
]),
|
||||
mass: 2.85885980666130812e-04 * SOLAR_MASS,
|
||||
},
|
||||
// uranus:
|
||||
Body {
|
||||
x: f64x4::from_array([
|
||||
1.28943695621391310e+01,
|
||||
-1.51111514016986312e+01,
|
||||
-2.23307578892655734e-01,
|
||||
0.,
|
||||
]),
|
||||
v: f64x4::from_array([
|
||||
2.96460137564761618e-03 * DAYS_PER_YEAR,
|
||||
2.37847173959480950e-03 * DAYS_PER_YEAR,
|
||||
-2.96589568540237556e-05 * DAYS_PER_YEAR,
|
||||
0.,
|
||||
]),
|
||||
mass: 4.36624404335156298e-05 * SOLAR_MASS,
|
||||
},
|
||||
// neptune:
|
||||
Body {
|
||||
x: f64x4::from_array([
|
||||
1.53796971148509165e+01,
|
||||
-2.59193146099879641e+01,
|
||||
1.79258772950371181e-01,
|
||||
0.,
|
||||
]),
|
||||
v: f64x4::from_array([
|
||||
2.68067772490389322e-03 * DAYS_PER_YEAR,
|
||||
1.62824170038242295e-03 * DAYS_PER_YEAR,
|
||||
-9.51592254519715870e-05 * DAYS_PER_YEAR,
|
||||
0.,
|
||||
]),
|
||||
mass: 5.15138902046611451e-05 * SOLAR_MASS,
|
||||
},
|
||||
];
|
||||
|
||||
fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
|
||||
let (sun, rest) = bodies.split_at_mut(1);
|
||||
let sun = &mut sun[0];
|
||||
for body in rest {
|
||||
let m_ratio = body.mass / SOLAR_MASS;
|
||||
sun.v -= body.v * m_ratio;
|
||||
}
|
||||
}
|
||||
e
|
||||
}
|
||||
|
||||
pub fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
|
||||
const N: usize = N_BODIES * (N_BODIES - 1) / 2;
|
||||
fn energy(bodies: &[Body; N_BODIES]) -> f64 {
|
||||
let mut e = 0.;
|
||||
for i in 0..N_BODIES {
|
||||
let bi = &bodies[i];
|
||||
e += bi.mass * (bi.v * bi.v).horizontal_sum() * 0.5;
|
||||
for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
|
||||
let dx = bi.x - bj.x;
|
||||
e -= bi.mass * bj.mass / (dx * dx).horizontal_sum().sqrt()
|
||||
}
|
||||
}
|
||||
e
|
||||
}
|
||||
|
||||
fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
|
||||
const N: usize = N_BODIES * (N_BODIES - 1) / 2;
|
||||
|
||||
// compute distance between bodies:
|
||||
let mut r = [f64x4::splat(0.); N];
|
||||
{
|
||||
let mut i = 0;
|
||||
for j in 0..N_BODIES {
|
||||
for k in j + 1..N_BODIES {
|
||||
r[i] = bodies[j].x - bodies[k].x;
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut mag = [0.0; N];
|
||||
for i in (0..N).step_by(2) {
|
||||
let d2s = f64x2::from_array([
|
||||
(r[i] * r[i]).horizontal_sum(),
|
||||
(r[i + 1] * r[i + 1]).horizontal_sum(),
|
||||
]);
|
||||
let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt());
|
||||
mag[i] = dmags[0];
|
||||
mag[i + 1] = dmags[1];
|
||||
}
|
||||
|
||||
// compute distance between bodies:
|
||||
let mut r = [f64x4::splat(0.); N];
|
||||
{
|
||||
let mut i = 0;
|
||||
for j in 0..N_BODIES {
|
||||
for k in j + 1..N_BODIES {
|
||||
r[i] = bodies[j].x - bodies[k].x;
|
||||
i += 1;
|
||||
let f = r[i] * mag[i];
|
||||
bodies[j].v -= f * bodies[k].mass;
|
||||
bodies[k].v += f * bodies[j].mass;
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut mag = [0.0; N];
|
||||
for i in (0..N).step_by(2) {
|
||||
let d2s = f64x2::from_array([
|
||||
(r[i] * r[i]).horizontal_sum(),
|
||||
(r[i + 1] * r[i + 1]).horizontal_sum(),
|
||||
]);
|
||||
let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt());
|
||||
mag[i] = dmags[0];
|
||||
mag[i + 1] = dmags[1];
|
||||
}
|
||||
|
||||
let mut i = 0;
|
||||
for j in 0..N_BODIES {
|
||||
for k in j + 1..N_BODIES {
|
||||
let f = r[i] * mag[i];
|
||||
bodies[j].v -= f * bodies[k].mass;
|
||||
bodies[k].v += f * bodies[j].mass;
|
||||
i += 1
|
||||
for body in bodies {
|
||||
body.x += dt * body.v
|
||||
}
|
||||
}
|
||||
for body in bodies {
|
||||
body.x += dt * body.v
|
||||
|
||||
pub fn run(n: usize) -> (f64, f64) {
|
||||
let mut bodies = BODIES;
|
||||
offset_momentum(&mut bodies);
|
||||
let energy_before = energy(&bodies);
|
||||
for _ in 0..n {
|
||||
advance(&mut bodies, 0.01);
|
||||
}
|
||||
let energy_after = energy(&bodies);
|
||||
|
||||
(energy_before, energy_after)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(n: usize) -> (f64, f64) {
|
||||
let mut bodies = BODIES;
|
||||
offset_momentum(&mut bodies);
|
||||
let energy_before = energy(&bodies);
|
||||
for _ in 0..n {
|
||||
advance(&mut bodies, 0.01);
|
||||
}
|
||||
let energy_after = energy(&bodies);
|
||||
|
||||
(energy_before, energy_after)
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
// Good enough for demonstration purposes, not going for strictness here.
|
||||
|
|
@ -173,12 +177,17 @@ mod tests {
|
|||
#[test]
|
||||
fn test() {
|
||||
const OUTPUT: [f64; 2] = [-0.169075164, -0.169087605];
|
||||
let (energy_before, energy_after) = super::run(1000);
|
||||
let (energy_before, energy_after) = super::nbody::run(1000);
|
||||
assert!(approx_eq_f64(energy_before, OUTPUT[0]));
|
||||
assert!(approx_eq_f64(energy_after, OUTPUT[1]));
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// empty main to pass CI
|
||||
#[cfg(feature = "std")]
|
||||
{
|
||||
let (energy_before, energy_after) = nbody::run(1000);
|
||||
println!("Energy before: {}", energy_before);
|
||||
println!("Energy after: {}", energy_after);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ extern "platform-intrinsic" {
|
|||
pub(crate) fn simd_fabs<T>(x: T) -> T;
|
||||
|
||||
/// fsqrt
|
||||
#[cfg(feature = "std")]
|
||||
pub(crate) fn simd_fsqrt<T>(x: T) -> T;
|
||||
|
||||
/// fma
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#![no_std]
|
||||
#![allow(incomplete_features)]
|
||||
#![feature(
|
||||
const_evaluatable_checked,
|
||||
const_generics,
|
||||
platform_intrinsics,
|
||||
repr_simd,
|
||||
|
|
@ -20,8 +21,8 @@ mod reduction;
|
|||
mod select;
|
||||
pub use select::Select;
|
||||
|
||||
#[cfg(feature = "const_evaluatable_checked")]
|
||||
mod to_bytes;
|
||||
pub use to_bytes::ToBytes;
|
||||
|
||||
mod comparisons;
|
||||
mod fmt;
|
||||
|
|
|
|||
|
|
@ -41,7 +41,12 @@ macro_rules! implement {
|
|||
pub fn fract(self) -> Self {
|
||||
self - self.trunc()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> crate::$type<LANES>
|
||||
where
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
/// Rounds toward zero and converts to the same-width integer type, assuming that
|
||||
/// the value is finite and fits in that type.
|
||||
///
|
||||
|
|
|
|||
|
|
@ -1,72 +1,39 @@
|
|||
mod sealed {
|
||||
pub trait Sealed {}
|
||||
}
|
||||
use sealed::Sealed;
|
||||
|
||||
/// Supporting trait for byte conversion functions.
|
||||
pub trait ToBytes: Sealed {
|
||||
/// The bytes representation of this type.
|
||||
type Bytes;
|
||||
|
||||
#[doc(hidden)]
|
||||
fn to_bytes_impl(self) -> Self::Bytes;
|
||||
|
||||
#[doc(hidden)]
|
||||
fn from_bytes_impl(bytes: Self::Bytes) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! impl_to_bytes {
|
||||
{ $name:ident, $($int_width:literal -> $byte_width:literal),* } => {
|
||||
$(
|
||||
impl Sealed for crate::$name<$int_width>
|
||||
where
|
||||
crate::LaneCount<$int_width>: crate::SupportedLaneCount,
|
||||
{}
|
||||
|
||||
impl ToBytes for crate::$name<$int_width>
|
||||
where
|
||||
crate::LaneCount<$int_width>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Bytes = crate::SimdU8<$byte_width>;
|
||||
fn to_bytes_impl(self) -> Self::Bytes {
|
||||
unsafe { core::mem::transmute(self) }
|
||||
}
|
||||
fn from_bytes_impl(bytes: Self::Bytes) -> Self {
|
||||
unsafe { core::mem::transmute(bytes) }
|
||||
}
|
||||
}
|
||||
)*
|
||||
|
||||
{ $name:ident, $size:literal } => {
|
||||
impl<const LANES: usize> crate::$name<LANES>
|
||||
where
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
Self: ToBytes,
|
||||
crate::LaneCount<{{ $size * LANES }}>: crate::SupportedLaneCount,
|
||||
{
|
||||
/// Return the memory representation of this integer as a byte array in native byte
|
||||
/// order.
|
||||
pub fn to_ne_bytes(self) -> <Self as ToBytes>::Bytes { self.to_bytes_impl() }
|
||||
pub fn to_ne_bytes(self) -> crate::SimdU8<{{ $size * LANES }}> {
|
||||
unsafe { core::mem::transmute_copy(&self) }
|
||||
}
|
||||
|
||||
/// Create a native endian integer value from its memory representation as a byte array
|
||||
/// in native endianness.
|
||||
pub fn from_ne_bytes(bytes: <Self as ToBytes>::Bytes) -> Self { Self::from_bytes_impl(bytes) }
|
||||
pub fn from_ne_bytes(bytes: crate::SimdU8<{{ $size * LANES }}>) -> Self {
|
||||
unsafe { core::mem::transmute_copy(&bytes) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_to_bytes! { SimdU8, 1 -> 1, 2 -> 2, 4 -> 4, 8 -> 8, 16 -> 16, 32 -> 32 }
|
||||
impl_to_bytes! { SimdU16, 1 -> 2, 2 -> 4, 4 -> 8, 8 -> 16, 16 -> 32 }
|
||||
impl_to_bytes! { SimdU32, 1 -> 4, 2 -> 8, 4 -> 16, 8 -> 32 }
|
||||
impl_to_bytes! { SimdU64, 1 -> 8, 2 -> 16, 4 -> 32 }
|
||||
impl_to_bytes! { SimdU8, 1 }
|
||||
impl_to_bytes! { SimdU16, 2 }
|
||||
impl_to_bytes! { SimdU32, 4 }
|
||||
impl_to_bytes! { SimdU64, 8 }
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
impl_to_bytes! { SimdUsize, 1 -> 4, 2 -> 8, 4 -> 16, 8 -> 32 }
|
||||
impl_to_bytes! { SimdUsize, 4 }
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
impl_to_bytes! { SimdUsize, 1 -> 8, 2 -> 16, 4 -> 32 }
|
||||
impl_to_bytes! { SimdUsize, 8 }
|
||||
|
||||
impl_to_bytes! { SimdI8, 1 -> 1, 2 -> 2, 4 -> 4, 8 -> 8, 16 -> 16, 32 -> 32 }
|
||||
impl_to_bytes! { SimdI16, 1 -> 2, 2 -> 4, 4 -> 8, 8 -> 16, 16 -> 32 }
|
||||
impl_to_bytes! { SimdI32, 1 -> 4, 2 -> 8, 4 -> 16, 8 -> 32 }
|
||||
impl_to_bytes! { SimdI64, 1 -> 8, 2 -> 16, 4 -> 32 }
|
||||
impl_to_bytes! { SimdI8, 1 }
|
||||
impl_to_bytes! { SimdI16, 2 }
|
||||
impl_to_bytes! { SimdI32, 4 }
|
||||
impl_to_bytes! { SimdI64, 8 }
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
impl_to_bytes! { SimdIsize, 1 -> 4, 2 -> 8, 4 -> 16, 8 -> 32 }
|
||||
impl_to_bytes! { SimdIsize, 4 }
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
impl_to_bytes! { SimdIsize, 1 -> 8, 2 -> 16, 4 -> 32 }
|
||||
impl_to_bytes! { SimdIsize, 8 }
|
||||
|
|
|
|||
|
|
@ -443,14 +443,6 @@ macro_rules! impl_float_tests {
|
|||
)
|
||||
}
|
||||
|
||||
fn sqrt<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::sqrt,
|
||||
&Scalar::sqrt,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn recip<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::recip,
|
||||
|
|
@ -605,6 +597,17 @@ macro_rules! impl_float_tests {
|
|||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
test_helpers::test_lanes! {
|
||||
fn sqrt<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::sqrt,
|
||||
&Scalar::sqrt,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#![feature(portable_simd)]
|
||||
#![feature(portable_simd, const_generics, const_evaluatable_checked)]
|
||||
#![allow(incomplete_features)]
|
||||
#![cfg(feature = "const_evaluatable_checked")]
|
||||
|
||||
use core_simd::SimdU32;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue