diff --git a/crates/core_simd/src/array.rs b/crates/core_simd/src/array.rs index d2f944d1e535..36e1fb59675c 100644 --- a/crates/core_simd/src/array.rs +++ b/crates/core_simd/src/array.rs @@ -1,4 +1,6 @@ +use crate::intrinsics; use crate::masks::*; +use crate::vector::ptr::SimdConstPtr; use crate::vector::*; /// A representation of a vector as an "array" with indices, implementing @@ -17,6 +19,70 @@ where /// Generates a SIMD vector with the same value in every lane. #[must_use] fn splat(val: Self::Scalar) -> Self; + + /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices. + /// If an index is out of bounds, that lane instead selects the value from the "or" vector. + /// ``` + /// # use core_simd::*; + /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]); + /// let alt = SimdI32::from_array([-5, -4, -3, -2]); + /// + /// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds. + /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15])); + /// ``` + #[must_use] + #[inline] + fn gather_or(slice: &[Self::Scalar], idxs: SimdUsize, or: Self) -> Self { + Self::gather_select(slice, MaskSize::splat(true), idxs, or) + } + + /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices. + /// Out-of-bounds indices instead use the default value for that lane (0). + /// ``` + /// # use core_simd::*; + /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]); + /// + /// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds. + /// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15])); + /// ``` + #[must_use] + #[inline] + fn gather_or_default(slice: &[Self::Scalar], idxs: SimdUsize) -> Self + where + Self::Scalar: Default, + { + Self::gather_or(slice, idxs, Self::splat(Self::Scalar::default())) + } + + /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices. + /// Out-of-bounds or masked indices instead select the value from the "or" vector. + /// ``` + /// # use core_simd::*; + /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]); + /// let alt = SimdI32::from_array([-5, -4, -3, -2]); + /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane. + /// + /// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds. + /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2])); + /// ``` + #[must_use] + #[inline] + fn gather_select( + slice: &[Self::Scalar], + mask: MaskSize, + idxs: SimdUsize, + or: Self, + ) -> Self { + let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int(); + let base_ptr = SimdConstPtr::splat(slice.as_ptr()); + // Ferris forgive me, I have done pointer arithmetic here. + let ptrs = base_ptr.wrapping_add(idxs); + // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah + unsafe { intrinsics::simd_gather(or, ptrs, mask) } + } } macro_rules! impl_simdarray_for { diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 7adf4c24e104..e69696e515b6 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -45,7 +45,7 @@ extern "platform-intrinsic" { /// fabs pub(crate) fn simd_fabs(x: T) -> T; - + /// fsqrt pub(crate) fn simd_fsqrt(x: T) -> T; @@ -63,6 +63,8 @@ extern "platform-intrinsic" { pub(crate) fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U; pub(crate) fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U; + pub(crate) fn simd_gather(val: T, ptr: U, mask: V) -> T; + // {s,u}add.sat pub(crate) fn simd_saturating_add(x: T, y: T) -> T;