Merge pull request #154 from rust-lang/feature/generic-element-type

Change vectors to be generic over element type.
2021-08-17 12:10:44 -07:00 · 2021-08-17 12:10:44 -07:00 · d42875302d
commit d42875302d
parent 50eb35eb3b 4aafd8e779
40 changed files with 1890 additions and 2090 deletions
--- a/crates/core_simd/src/comparisons.rs
+++ b/crates/core_simd/src/comparisons.rs
@ -1,77 +1,49 @@
-use crate::{LaneCount, SupportedLaneCount};
+use crate::{LaneCount, Mask, Simd, SimdElement, SupportedLaneCount};

-macro_rules! implement_mask_ops {
-    { $($vector:ident => $mask:ident ($inner_ty:ident),)* } => {
-        $(
-            impl<const LANES: usize> crate::$vector<LANES>
-            where
-                LaneCount<LANES>: SupportedLaneCount,
-            {
-                /// Test if each lane is equal to the corresponding lane in `other`.
-                #[inline]
-                pub fn lanes_eq(self, other: Self) -> crate::$mask<LANES> {
-                    unsafe {
-                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_eq(self, other))
-                    }
-                }
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+    T: SimdElement + PartialEq,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    /// Test if each lane is equal to the corresponding lane in `other`.
+    #[inline]
+    pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
+        unsafe { Mask::from_int_unchecked(crate::intrinsics::simd_eq(self, other)) }
+    }

-                /// Test if each lane is not equal to the corresponding lane in `other`.
-                #[inline]
-                pub fn lanes_ne(self, other: Self) -> crate::$mask<LANES> {
-                    unsafe {
-                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_ne(self, other))
-                    }
-                }
-
-                /// Test if each lane is less than the corresponding lane in `other`.
-                #[inline]
-                pub fn lanes_lt(self, other: Self) -> crate::$mask<LANES> {
-                    unsafe {
-                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_lt(self, other))
-                    }
-                }
-
-                /// Test if each lane is greater than the corresponding lane in `other`.
-                #[inline]
-                pub fn lanes_gt(self, other: Self) -> crate::$mask<LANES> {
-                    unsafe {
-                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_gt(self, other))
-                    }
-                }
-
-                /// Test if each lane is less than or equal to the corresponding lane in `other`.
-                #[inline]
-                pub fn lanes_le(self, other: Self) -> crate::$mask<LANES> {
-                    unsafe {
-                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_le(self, other))
-                    }
-                }
-
-                /// Test if each lane is greater than or equal to the corresponding lane in `other`.
-                #[inline]
-                pub fn lanes_ge(self, other: Self) -> crate::$mask<LANES> {
-                    unsafe {
-                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_ge(self, other))
-                    }
-                }
-            }
-        )*
+    /// Test if each lane is not equal to the corresponding lane in `other`.
+    #[inline]
+    pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
+        unsafe { Mask::from_int_unchecked(crate::intrinsics::simd_ne(self, other)) }
    }
 }

-implement_mask_ops! {
-    SimdI8 => Mask8 (SimdI8),
-    SimdI16 => Mask16 (SimdI16),
-    SimdI32 => Mask32 (SimdI32),
-    SimdI64 => Mask64 (SimdI64),
-    SimdIsize => MaskSize (SimdIsize),
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+    T: SimdElement + PartialOrd,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    /// Test if each lane is less than the corresponding lane in `other`.
+    #[inline]
+    pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
+        unsafe { Mask::from_int_unchecked(crate::intrinsics::simd_lt(self, other)) }
+    }

-    SimdU8 => Mask8 (SimdI8),
-    SimdU16 => Mask16 (SimdI16),
-    SimdU32 => Mask32 (SimdI32),
-    SimdU64 => Mask64 (SimdI64),
-    SimdUsize => MaskSize (SimdIsize),
+    /// Test if each lane is greater than the corresponding lane in `other`.
+    #[inline]
+    pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
+        unsafe { Mask::from_int_unchecked(crate::intrinsics::simd_gt(self, other)) }
+    }

-    SimdF32 => Mask32 (SimdI32),
-    SimdF64 => Mask64 (SimdI64),
+    /// Test if each lane is less than or equal to the corresponding lane in `other`.
+    #[inline]
+    pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
+        unsafe { Mask::from_int_unchecked(crate::intrinsics::simd_le(self, other)) }
+    }
+
+    /// Test if each lane is greater than or equal to the corresponding lane in `other`.
+    #[inline]
+    pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
+        unsafe { Mask::from_int_unchecked(crate::intrinsics::simd_ge(self, other)) }
+    }
 }
--- a/crates/core_simd/src/fmt.rs
+++ b/crates/core_simd/src/fmt.rs
@ -1,88 +1,36 @@
-macro_rules! debug_wrapper {
-    { $($trait:ident => $name:ident,)* } => {
+macro_rules! impl_fmt_trait {
+    { $($trait:ident,)* } => {
        $(
-            pub(crate) fn $name<T: core::fmt::$trait>(slice: &[T], f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                #[repr(transparent)]
-                struct Wrapper<'a, T: core::fmt::$trait>(&'a T);
+            impl<T, const LANES: usize> core::fmt::$trait for crate::Simd<T, LANES>
+            where
+                crate::LaneCount<LANES>: crate::SupportedLaneCount,
+                T: crate::SimdElement + core::fmt::$trait,
+            {
+                fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                    #[repr(transparent)]
+                    struct Wrapper<'a, T: core::fmt::$trait>(&'a T);

-                impl<T: core::fmt::$trait> core::fmt::Debug for Wrapper<'_, T> {
-                    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                        self.0.fmt(f)
+                    impl<T: core::fmt::$trait> core::fmt::Debug for Wrapper<'_, T> {
+                        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                            self.0.fmt(f)
+                        }
                    }
-                }

-                f.debug_list()
-                    .entries(slice.iter().map(|x| Wrapper(x)))
-                    .finish()
+                    f.debug_list()
+                        .entries(self.as_array().iter().map(|x| Wrapper(x)))
+                        .finish()
+                }
            }
        )*
    }
 }

-debug_wrapper! {
-    Debug => format,
-    Binary => format_binary,
-    LowerExp => format_lower_exp,
-    UpperExp => format_upper_exp,
-    Octal => format_octal,
-    LowerHex => format_lower_hex,
-    UpperHex => format_upper_hex,
-}
-
-macro_rules! impl_fmt_trait {
-    { $($type:ident => $(($trait:ident, $format:ident)),*;)* } => {
-        $( // repeat type
-            $( // repeat trait
-                impl<const LANES: usize> core::fmt::$trait for crate::$type<LANES>
-                where
-                    crate::LaneCount<LANES>: crate::SupportedLaneCount,
-                {
-                    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                        $format(self.as_ref(), f)
-                    }
-                }
-            )*
-        )*
-    };
-    { integers: $($type:ident,)* } => {
-        impl_fmt_trait! {
-            $($type =>
-              (Debug, format),
-              (Binary, format_binary),
-              (LowerExp, format_lower_exp),
-              (UpperExp, format_upper_exp),
-              (Octal, format_octal),
-              (LowerHex, format_lower_hex),
-              (UpperHex, format_upper_hex);
-            )*
-        }
-    };
-    { floats: $($type:ident,)* } => {
-        impl_fmt_trait! {
-            $($type =>
-              (Debug, format),
-              (LowerExp, format_lower_exp),
-              (UpperExp, format_upper_exp);
-            )*
-        }
-    };
-    { masks: $($type:ident,)* } => {
-        impl_fmt_trait! {
-            $($type =>
-              (Debug, format);
-            )*
-        }
-    }
-}
-
 impl_fmt_trait! {
-    integers:
-        SimdU8, SimdU16, SimdU32, SimdU64,
-        SimdI8, SimdI16, SimdI32, SimdI64,
-        SimdUsize, SimdIsize,
-}
-
-impl_fmt_trait! {
-    floats:
-        SimdF32, SimdF64,
+    Debug,
+    Binary,
+    LowerExp,
+    UpperExp,
+    Octal,
+    LowerHex,
+    UpperHex,
 }
--- a/crates/core_simd/src/iter.rs
+++ b/crates/core_simd/src/iter.rs
@ -1,54 +1,58 @@
-use crate::{LaneCount, SupportedLaneCount};
+use crate::{LaneCount, Simd, SupportedLaneCount};
+use core::{
+    iter::{Product, Sum},
+    ops::{Add, Mul},
+};

 macro_rules! impl_traits {
-    { $type:ident } => {
-        impl<const LANES: usize> core::iter::Sum<Self> for crate::$type<LANES>
+    { $type:ty } => {
+        impl<const LANES: usize> Sum<Self> for Simd<$type, LANES>
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
-            fn sum<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
-                iter.fold(Default::default(), core::ops::Add::add)
+            fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Simd::splat(0 as $type), Add::add)
            }
        }

-        impl<const LANES: usize> core::iter::Product<Self> for crate::$type<LANES>
+        impl<const LANES: usize> core::iter::Product<Self> for Simd<$type, LANES>
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
-            fn product<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
-                iter.fold(Default::default(), core::ops::Mul::mul)
+            fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Simd::splat(1 as $type), Mul::mul)
            }
        }

-        impl<'a, const LANES: usize> core::iter::Sum<&'a Self> for crate::$type<LANES>
+        impl<'a, const LANES: usize> Sum<&'a Self> for Simd<$type, LANES>
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
-            fn sum<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
-                iter.fold(Default::default(), core::ops::Add::add)
+            fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Simd::splat(0 as $type), Add::add)
            }
        }

-        impl<'a, const LANES: usize> core::iter::Product<&'a Self> for crate::$type<LANES>
+        impl<'a, const LANES: usize> Product<&'a Self> for Simd<$type, LANES>
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
-            fn product<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
-                iter.fold(Default::default(), core::ops::Mul::mul)
+            fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Simd::splat(1 as $type), Mul::mul)
            }
        }
    }
 }

-impl_traits! { SimdF32 }
-impl_traits! { SimdF64 }
-impl_traits! { SimdU8 }
-impl_traits! { SimdU16 }
-impl_traits! { SimdU32 }
-impl_traits! { SimdU64 }
-impl_traits! { SimdUsize }
-impl_traits! { SimdI8 }
-impl_traits! { SimdI16 }
-impl_traits! { SimdI32 }
-impl_traits! { SimdI64 }
-impl_traits! { SimdIsize }
+impl_traits! { f32 }
+impl_traits! { f64 }
+impl_traits! { u8 }
+impl_traits! { u16 }
+impl_traits! { u32 }
+impl_traits! { u64 }
+impl_traits! { usize }
+impl_traits! { i8 }
+impl_traits! { i16 }
+impl_traits! { i32 }
+impl_traits! { i64 }
+impl_traits! { isize }
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@ -2,6 +2,7 @@
 #![allow(incomplete_features)]
 #![feature(
    const_evaluatable_checked,
+    const_fn_trait_bound,
    const_generics,
    platform_intrinsics,
    repr_simd,
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@ -12,521 +12,516 @@
 )]
 mod mask_impl;

-use crate::{SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};
+use crate::{LaneCount, Simd, SimdElement, SupportedLaneCount};

-mod sealed {
-    pub trait Sealed {}
+/// Marker trait for types that may be used as SIMD mask elements.
+pub unsafe trait MaskElement: SimdElement {
+    #[doc(hidden)]
+    fn valid<const LANES: usize>(values: Simd<Self, LANES>) -> bool
+    where
+        LaneCount<LANES>: SupportedLaneCount;
+
+    #[doc(hidden)]
+    fn eq(self, other: Self) -> bool;
+
+    #[doc(hidden)]
+    const TRUE: Self;
+
+    #[doc(hidden)]
+    const FALSE: Self;
 }

-/// Helper trait for mask types.
-pub trait Mask: sealed::Sealed {
-    /// The number of lanes for this mask.
-    const LANES: usize;
+macro_rules! impl_element {
+    { $ty:ty } => {
+        unsafe impl MaskElement for $ty {
+            fn valid<const LANES: usize>(value: Simd<Self, LANES>) -> bool
+            where
+                LaneCount<LANES>: SupportedLaneCount,
+            {
+                (value.lanes_eq(Simd::splat(0)) | value.lanes_eq(Simd::splat(-1))).all()
+            }

-    /// Generates a mask with the same value in every lane.
-    #[must_use]
-    fn splat(val: bool) -> Self;
+            fn eq(self, other: Self) -> bool { self == other }
+
+            const TRUE: Self = -1;
+            const FALSE: Self = 0;
+        }
+    }
 }

-macro_rules! define_opaque_mask {
-    {
-        $(#[$attr:meta])*
-        struct $name:ident<const $lanes:ident: usize>($inner_ty:ty);
-        @bits $bits_ty:ident
-    } => {
-        $(#[$attr])*
-        #[allow(non_camel_case_types)]
-        pub struct $name<const LANES: usize>($inner_ty)
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount;
+impl_element! { i8 }
+impl_element! { i16 }
+impl_element! { i32 }
+impl_element! { i64 }
+impl_element! { isize }

-        impl<const LANES: usize> sealed::Sealed for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {}
+/// A SIMD vector mask for `LANES` elements of width specified by `Element`.
+///
+/// The layout of this type is unspecified.
+#[repr(transparent)]
+pub struct Mask<T, const LANES: usize>(mask_impl::Mask<T, LANES>)
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount;

-        impl<const LANES: usize> Mask for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            const LANES: usize = LANES;
-
-            #[inline]
-            fn splat(value: bool) -> Self {
-                Self::splat(value)
-            }
-        }
-
-        impl_opaque_mask_reductions! { $name, $bits_ty }
-
-        impl<const LANES: usize> $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            /// Construct a mask by setting all lanes to the given value.
-            pub fn splat(value: bool) -> Self {
-                Self(<$inner_ty>::splat(value))
-            }
-
-            /// Converts an array to a SIMD vector.
-            pub fn from_array(array: [bool; LANES]) -> Self {
-                let mut vector = Self::splat(false);
-                let mut i = 0;
-                while i < $lanes {
-                    vector.set(i, array[i]);
-                    i += 1;
-                }
-                vector
-            }
-
-            /// Converts a SIMD vector to an array.
-            pub fn to_array(self) -> [bool; LANES] {
-                let mut array = [false; LANES];
-                let mut i = 0;
-                while i < $lanes {
-                    array[i] = self.test(i);
-                    i += 1;
-                }
-                array
-            }
-
-            /// Converts a vector of integers to a mask, where 0 represents `false` and -1
-            /// represents `true`.
-            ///
-            /// # Safety
-            /// All lanes must be either 0 or -1.
-            #[inline]
-            pub unsafe fn from_int_unchecked(value: $bits_ty<LANES>) -> Self {
-                Self(<$inner_ty>::from_int_unchecked(value))
-            }
-
-            /// Converts a vector of integers to a mask, where 0 represents `false` and -1
-            /// represents `true`.
-            ///
-            /// # Panics
-            /// Panics if any lane is not 0 or -1.
-            #[inline]
-            pub fn from_int(value: $bits_ty<LANES>) -> Self {
-                assert!(
-                    (value.lanes_eq($bits_ty::splat(0)) | value.lanes_eq($bits_ty::splat(-1))).all(),
-                    "all values must be either 0 or -1",
-                );
-                unsafe { Self::from_int_unchecked(value) }
-            }
-
-            /// Converts the mask to a vector of integers, where 0 represents `false` and -1
-            /// represents `true`.
-            #[inline]
-            pub fn to_int(self) -> $bits_ty<LANES> {
-                self.0.to_int()
-            }
-
-            /// Tests the value of the specified lane.
-            ///
-            /// # Safety
-            /// `lane` must be less than `LANES`.
-            #[inline]
-            pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
-                self.0.test_unchecked(lane)
-            }
-
-            /// Tests the value of the specified lane.
-            ///
-            /// # Panics
-            /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
-            #[inline]
-            pub fn test(&self, lane: usize) -> bool {
-                assert!(lane < LANES, "lane index out of range");
-                unsafe { self.test_unchecked(lane) }
-            }
-
-            /// Sets the value of the specified lane.
-            ///
-            /// # Safety
-            /// `lane` must be less than `LANES`.
-            #[inline]
-            pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
-                self.0.set_unchecked(lane, value);
-            }
-
-            /// Sets the value of the specified lane.
-            ///
-            /// # Panics
-            /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
-            #[inline]
-            pub fn set(&mut self, lane: usize, value: bool) {
-                assert!(lane < LANES, "lane index out of range");
-                unsafe { self.set_unchecked(lane, value); }
-            }
-
-            /// Convert this mask to a bitmask, with one bit set per lane.
-            pub fn to_bitmask(self) -> [u8; crate::LaneCount::<LANES>::BITMASK_LEN] {
-                self.0.to_bitmask()
-            }
-
-            /// Convert a bitmask to a mask.
-            pub fn from_bitmask(bitmask: [u8; crate::LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-                Self(<$inner_ty>::from_bitmask(bitmask))
-            }
-        }
-
-        // vector/array conversion
-        impl<const LANES: usize> From<[bool; LANES]> for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            fn from(array: [bool; LANES]) -> Self {
-                Self::from_array(array)
-            }
-        }
-
-        impl <const LANES: usize> From<$name<LANES>> for [bool; LANES]
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            fn from(vector: $name<LANES>) -> Self {
-                vector.to_array()
-            }
-        }
-
-        impl<const LANES: usize> Copy for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {}
-
-        impl<const LANES: usize> Clone for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn clone(&self) -> Self {
-                *self
-            }
-        }
-
-        impl<const LANES: usize> Default for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn default() -> Self {
-                Self::splat(false)
-            }
-        }
-
-        impl<const LANES: usize> PartialEq for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn eq(&self, other: &Self) -> bool {
-                self.0 == other.0
-            }
-        }
-
-        impl<const LANES: usize> PartialOrd for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
-                self.0.partial_cmp(&other.0)
-            }
-        }
-
-        impl<const LANES: usize> core::fmt::Debug for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                f.debug_list()
-                    .entries((0..LANES).map(|lane| self.test(lane)))
-                    .finish()
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn bitand(self, rhs: Self) -> Self {
-                Self(self.0 & rhs.0)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn bitand(self, rhs: bool) -> Self {
-                self & Self::splat(rhs)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = $name<LANES>;
-            #[inline]
-            fn bitand(self, rhs: $name<LANES>) -> $name<LANES> {
-                $name::<LANES>::splat(self) & rhs
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitOr for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn bitor(self, rhs: Self) -> Self {
-                Self(self.0 | rhs.0)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn bitor(self, rhs: bool) -> Self {
-                self | Self::splat(rhs)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = $name<LANES>;
-            #[inline]
-            fn bitor(self, rhs: $name<LANES>) -> $name<LANES> {
-                $name::<LANES>::splat(self) | rhs
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitXor for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn bitxor(self, rhs: Self) -> Self::Output {
-                Self(self.0 ^ rhs.0)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn bitxor(self, rhs: bool) -> Self::Output {
-                self ^ Self::splat(rhs)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = $name<LANES>;
-            #[inline]
-            fn bitxor(self, rhs: $name<LANES>) -> Self::Output {
-                $name::<LANES>::splat(self) ^ rhs
-            }
-        }
-
-        impl<const LANES: usize> core::ops::Not for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = $name<LANES>;
-            #[inline]
-            fn not(self) -> Self::Output {
-                Self(!self.0)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn bitand_assign(&mut self, rhs: Self) {
-                self.0 = self.0 & rhs.0;
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn bitand_assign(&mut self, rhs: bool) {
-                *self &= Self::splat(rhs);
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn bitor_assign(&mut self, rhs: Self) {
-                self.0 = self.0 | rhs.0;
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn bitor_assign(&mut self, rhs: bool) {
-                *self |= Self::splat(rhs);
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn bitxor_assign(&mut self, rhs: Self) {
-                self.0 = self.0 ^ rhs.0;
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn bitxor_assign(&mut self, rhs: bool) {
-                *self ^= Self::splat(rhs);
-            }
-        }
-    };
+impl<T, const LANES: usize> Copy for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
 }

-define_opaque_mask! {
-    /// Mask for vectors with `LANES` 8-bit elements.
+impl<T, const LANES: usize> Clone for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    /// Construct a mask by setting all lanes to the given value.
+    pub fn splat(value: bool) -> Self {
+        Self(mask_impl::Mask::splat(value))
+    }
+
+    /// Converts an array to a SIMD vector.
+    pub fn from_array(array: [bool; LANES]) -> Self {
+        let mut vector = Self::splat(false);
+        for (i, v) in array.iter().enumerate() {
+            vector.set(i, *v);
+        }
+        vector
+    }
+
+    /// Converts a SIMD vector to an array.
+    pub fn to_array(self) -> [bool; LANES] {
+        let mut array = [false; LANES];
+        for (i, v) in array.iter_mut().enumerate() {
+            *v = self.test(i);
+        }
+        array
+    }
+
+    /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+    /// represents `true`.
    ///
-    /// The layout of this type is unspecified.
-    struct Mask8<const LANES: usize>(mask_impl::Mask8<LANES>);
-    @bits SimdI8
+    /// # Safety
+    /// All lanes must be either 0 or -1.
+    #[inline]
+    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+        Self(mask_impl::Mask::from_int_unchecked(value))
+    }
+
+    /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+    /// represents `true`.
+    ///
+    /// # Panics
+    /// Panics if any lane is not 0 or -1.
+    #[inline]
+    pub fn from_int(value: Simd<T, LANES>) -> Self {
+        assert!(T::valid(value), "all values must be either 0 or -1",);
+        unsafe { Self::from_int_unchecked(value) }
+    }
+
+    /// Converts the mask to a vector of integers, where 0 represents `false` and -1
+    /// represents `true`.
+    #[inline]
+    pub fn to_int(self) -> Simd<T, LANES> {
+        self.0.to_int()
+    }
+
+    /// Tests the value of the specified lane.
+    ///
+    /// # Safety
+    /// `lane` must be less than `LANES`.
+    #[inline]
+    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        self.0.test_unchecked(lane)
+    }
+
+    /// Tests the value of the specified lane.
+    ///
+    /// # Panics
+    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+    #[inline]
+    pub fn test(&self, lane: usize) -> bool {
+        assert!(lane < LANES, "lane index out of range");
+        unsafe { self.test_unchecked(lane) }
+    }
+
+    /// Sets the value of the specified lane.
+    ///
+    /// # Safety
+    /// `lane` must be less than `LANES`.
+    #[inline]
+    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        self.0.set_unchecked(lane, value);
+    }
+
+    /// Sets the value of the specified lane.
+    ///
+    /// # Panics
+    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+    #[inline]
+    pub fn set(&mut self, lane: usize, value: bool) {
+        assert!(lane < LANES, "lane index out of range");
+        unsafe {
+            self.set_unchecked(lane, value);
+        }
+    }
+
+    /// Convert this mask to a bitmask, with one bit set per lane.
+    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
+        self.0.to_bitmask()
+    }
+
+    /// Convert a bitmask to a mask.
+    pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
+        Self(mask_impl::Mask::from_bitmask(bitmask))
+    }
+
+    /// Returns true if any lane is set, or false otherwise.
+    #[inline]
+    pub fn any(self) -> bool {
+        self.0.any()
+    }
+
+    /// Returns true if all lanes are set, or false otherwise.
+    #[inline]
+    pub fn all(self) -> bool {
+        self.0.all()
+    }
 }

-define_opaque_mask! {
-    /// Mask for vectors with `LANES` 16-bit elements.
-    ///
-    /// The layout of this type is unspecified.
-    struct Mask16<const LANES: usize>(mask_impl::Mask16<LANES>);
-    @bits SimdI16
+// vector/array conversion
+impl<T, const LANES: usize> From<[bool; LANES]> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn from(array: [bool; LANES]) -> Self {
+        Self::from_array(array)
+    }
 }

-define_opaque_mask! {
-    /// Mask for vectors with `LANES` 32-bit elements.
-    ///
-    /// The layout of this type is unspecified.
-    struct Mask32<const LANES: usize>(mask_impl::Mask32<LANES>);
-    @bits SimdI32
+impl<T, const LANES: usize> From<Mask<T, LANES>> for [bool; LANES]
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn from(vector: Mask<T, LANES>) -> Self {
+        vector.to_array()
+    }
 }

-define_opaque_mask! {
-    /// Mask for vectors with `LANES` 64-bit elements.
-    ///
-    /// The layout of this type is unspecified.
-    struct Mask64<const LANES: usize>(mask_impl::Mask64<LANES>);
-    @bits SimdI64
+impl<T, const LANES: usize> Default for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn default() -> Self {
+        Self::splat(false)
+    }
 }

-define_opaque_mask! {
-    /// Mask for vectors with `LANES` pointer-width elements.
-    ///
-    /// The layout of this type is unspecified.
-    struct MaskSize<const LANES: usize>(mask_impl::MaskSize<LANES>);
-    @bits SimdIsize
+impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+where
+    T: MaskElement + PartialEq,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        self.0 == other.0
+    }
+}
+
+impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+where
+    T: MaskElement + PartialOrd,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        self.0.partial_cmp(&other.0)
+    }
+}
+
+impl<T, const LANES: usize> core::fmt::Debug for Mask<T, LANES>
+where
+    T: MaskElement + core::fmt::Debug,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_list()
+            .entries((0..LANES).map(|lane| self.test(lane)))
+            .finish()
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn bitand(self, rhs: Self) -> Self {
+        Self(self.0 & rhs.0)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn bitand(self, rhs: bool) -> Self {
+        self & Self::splat(rhs)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd<Mask<T, LANES>> for bool
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Mask<T, LANES>;
+    #[inline]
+    fn bitand(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+        Mask::splat(self) & rhs
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn bitor(self, rhs: Self) -> Self {
+        Self(self.0 | rhs.0)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn bitor(self, rhs: bool) -> Self {
+        self | Self::splat(rhs)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr<Mask<T, LANES>> for bool
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Mask<T, LANES>;
+    #[inline]
+    fn bitor(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+        Mask::splat(self) | rhs
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn bitxor(self, rhs: Self) -> Self::Output {
+        Self(self.0 ^ rhs.0)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn bitxor(self, rhs: bool) -> Self::Output {
+        self ^ Self::splat(rhs)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor<Mask<T, LANES>> for bool
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Mask<T, LANES>;
+    #[inline]
+    fn bitxor(self, rhs: Mask<T, LANES>) -> Self::Output {
+        Mask::splat(self) ^ rhs
+    }
+}
+
+impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Mask<T, LANES>;
+    #[inline]
+    fn not(self) -> Self::Output {
+        Self(!self.0)
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAndAssign for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitand_assign(&mut self, rhs: Self) {
+        self.0 = self.0 & rhs.0;
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAndAssign<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitand_assign(&mut self, rhs: bool) {
+        *self &= Self::splat(rhs);
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOrAssign for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitor_assign(&mut self, rhs: Self) {
+        self.0 = self.0 | rhs.0;
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOrAssign<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitor_assign(&mut self, rhs: bool) {
+        *self |= Self::splat(rhs);
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXorAssign for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitxor_assign(&mut self, rhs: Self) {
+        self.0 = self.0 ^ rhs.0;
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXorAssign<bool> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn bitxor_assign(&mut self, rhs: bool) {
+        *self ^= Self::splat(rhs);
+    }
 }

 /// Vector of eight 8-bit masks
-pub type mask8x8 = Mask8<8>;
+pub type mask8x8 = Mask<i8, 8>;

 /// Vector of 16 8-bit masks
-pub type mask8x16 = Mask8<16>;
+pub type mask8x16 = Mask<i8, 16>;

 /// Vector of 32 8-bit masks
-pub type mask8x32 = Mask8<32>;
+pub type mask8x32 = Mask<i8, 32>;

 /// Vector of 16 8-bit masks
-pub type mask8x64 = Mask8<64>;
+pub type mask8x64 = Mask<i8, 64>;

 /// Vector of four 16-bit masks
-pub type mask16x4 = Mask16<4>;
+pub type mask16x4 = Mask<i16, 4>;

 /// Vector of eight 16-bit masks
-pub type mask16x8 = Mask16<8>;
+pub type mask16x8 = Mask<i16, 8>;

 /// Vector of 16 16-bit masks
-pub type mask16x16 = Mask16<16>;
+pub type mask16x16 = Mask<i16, 16>;

 /// Vector of 32 16-bit masks
-pub type mask16x32 = Mask32<32>;
+pub type mask16x32 = Mask<i32, 32>;

 /// Vector of two 32-bit masks
-pub type mask32x2 = Mask32<2>;
+pub type mask32x2 = Mask<i32, 2>;

 /// Vector of four 32-bit masks
-pub type mask32x4 = Mask32<4>;
+pub type mask32x4 = Mask<i32, 4>;

 /// Vector of eight 32-bit masks
-pub type mask32x8 = Mask32<8>;
+pub type mask32x8 = Mask<i32, 8>;

 /// Vector of 16 32-bit masks
-pub type mask32x16 = Mask32<16>;
+pub type mask32x16 = Mask<i32, 16>;

 /// Vector of two 64-bit masks
-pub type mask64x2 = Mask64<2>;
+pub type mask64x2 = Mask<i64, 2>;

 /// Vector of four 64-bit masks
-pub type mask64x4 = Mask64<4>;
+pub type mask64x4 = Mask<i64, 4>;

 /// Vector of eight 64-bit masks
-pub type mask64x8 = Mask64<8>;
+pub type mask64x8 = Mask<i64, 8>;

 /// Vector of two pointer-width masks
-pub type masksizex2 = MaskSize<2>;
+pub type masksizex2 = Mask<isize, 2>;

 /// Vector of four pointer-width masks
-pub type masksizex4 = MaskSize<4>;
+pub type masksizex4 = Mask<isize, 4>;

 /// Vector of eight pointer-width masks
-pub type masksizex8 = MaskSize<8>;
+pub type masksizex8 = Mask<isize, 8>;

 macro_rules! impl_from {
-    { $from:ident ($from_inner:ident) => $($to:ident ($to_inner:ident)),* } => {
+    { $from:ty  => $($to:ty),* } => {
        $(
-        impl<const LANES: usize> From<$from<LANES>> for $to<LANES>
+        impl<const LANES: usize> From<Mask<$from, LANES>> for Mask<$to, LANES>
        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+            LaneCount<LANES>: SupportedLaneCount,
        {
-            fn from(value: $from<LANES>) -> Self {
-                Self(value.0.into())
+            fn from(value: Mask<$from, LANES>) -> Self {
+                Self(value.0.convert())
            }
        }
        )*
    }
 }
-impl_from! { Mask8 (SimdI8) => Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize) }
-impl_from! { Mask16 (SimdI16) => Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8) }
-impl_from! { Mask32 (SimdI32) => Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16) }
-impl_from! { Mask64 (SimdI64) => MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32) }
-impl_from! { MaskSize (SimdIsize) => Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64) }
+impl_from! { i8 => i16, i32, i64, isize }
+impl_from! { i16 => i32, i64, isize, i8 }
+impl_from! { i32 => i64, isize, i8, i16 }
+impl_from! { i64 => isize, i8, i16, i32 }
+impl_from! { isize => i8, i16, i32, i64 }
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@ -1,38 +1,26 @@
-use crate::{LaneCount, SupportedLaneCount};
-
-/// Helper trait for limiting int conversion types
-pub trait ConvertToInt {}
-impl<const LANES: usize> ConvertToInt for crate::SimdI8<LANES> where
-    LaneCount<LANES>: SupportedLaneCount
-{
-}
-impl<const LANES: usize> ConvertToInt for crate::SimdI16<LANES> where
-    LaneCount<LANES>: SupportedLaneCount
-{
-}
-impl<const LANES: usize> ConvertToInt for crate::SimdI32<LANES> where
-    LaneCount<LANES>: SupportedLaneCount
-{
-}
-impl<const LANES: usize> ConvertToInt for crate::SimdI64<LANES> where
-    LaneCount<LANES>: SupportedLaneCount
-{
-}
-impl<const LANES: usize> ConvertToInt for crate::SimdIsize<LANES> where
-    LaneCount<LANES>: SupportedLaneCount
-{
-}
+use crate::{LaneCount, MaskElement, Simd, SupportedLaneCount};
+use core::marker::PhantomData;

 /// A mask where each lane is represented by a single bit.
 #[repr(transparent)]
-pub struct BitMask<const LANES: usize>(<LaneCount<LANES> as SupportedLaneCount>::BitMask)
+pub struct Mask<T, const LANES: usize>(
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask,
+    PhantomData<T>,
+)
 where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount;

-impl<const LANES: usize> Copy for BitMask<LANES> where LaneCount<LANES>: SupportedLaneCount {}
-
-impl<const LANES: usize> Clone for BitMask<LANES>
+impl<T, const LANES: usize> Copy for Mask<T, LANES>
 where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Mask<T, LANES>
+where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount,
 {
    fn clone(&self) -> Self {
@ -40,8 +28,9 @@ where
    }
 }

-impl<const LANES: usize> PartialEq for BitMask<LANES>
+impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
 where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount,
 {
    fn eq(&self, other: &Self) -> bool {
@ -49,8 +38,9 @@ where
    }
 }

-impl<const LANES: usize> PartialOrd for BitMask<LANES>
+impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
 where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount,
 {
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@ -58,10 +48,16 @@ where
    }
 }

-impl<const LANES: usize> Eq for BitMask<LANES> where LaneCount<LANES>: SupportedLaneCount {}
-
-impl<const LANES: usize> Ord for BitMask<LANES>
+impl<T, const LANES: usize> Eq for Mask<T, LANES>
 where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Ord for Mask<T, LANES>
+where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount,
 {
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
@ -69,8 +65,9 @@ where
    }
 }

-impl<const LANES: usize> BitMask<LANES>
+impl<T, const LANES: usize> Mask<T, LANES>
 where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount,
 {
    #[inline]
@ -84,7 +81,7 @@ where
        if LANES % 8 > 0 {
            *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
        }
-        Self(mask)
+        Self(mask, PhantomData)
    }

    #[inline]
@ -98,33 +95,28 @@ where
    }

    #[inline]
-    pub fn to_int<V>(self) -> V
-    where
-        V: ConvertToInt + Default + core::ops::Not<Output = V>,
-    {
+    pub fn to_int(self) -> Simd<T, LANES> {
        unsafe {
            let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
                core::mem::transmute_copy(&self);
-            crate::intrinsics::simd_select_bitmask(mask, !V::default(), V::default())
+            crate::intrinsics::simd_select_bitmask(
+                mask,
+                Simd::splat(T::TRUE),
+                Simd::splat(T::FALSE),
+            )
        }
    }

    #[inline]
-    pub unsafe fn from_int_unchecked<V>(value: V) -> Self
-    where
-        V: crate::Vector,
-    {
+    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
        // TODO remove the transmute when rustc is more flexible
        assert_eq!(
-            core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(
-            ),
-            core::mem::size_of::<
-                <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask,
-            >(),
+            core::mem::size_of::<<LaneCount::<LANES> as SupportedLaneCount>::BitMask>(),
+            core::mem::size_of::<<LaneCount::<LANES> as SupportedLaneCount>::IntBitMask>(),
        );
        let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
            crate::intrinsics::simd_bitmask(value);
-        Self(core::mem::transmute_copy(&mask))
+        Self(core::mem::transmute_copy(&mask), PhantomData)
    }

    #[inline]
@ -136,7 +128,15 @@ where
    #[inline]
    pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
        // Safety: these are the same type and we are laundering the generic
-        Self(unsafe { core::mem::transmute_copy(&bitmask) })
+        Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+    }
+
+    #[inline]
+    pub fn convert<U>(self) -> Mask<U, LANES>
+    where
+        U: MaskElement,
+    {
+        unsafe { core::mem::transmute_copy(&self) }
    }

    #[inline]
@ -150,10 +150,11 @@ where
    }
 }

-impl<const LANES: usize> core::ops::BitAnd for BitMask<LANES>
+impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
 where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount,
-    <LaneCount<LANES> as SupportedLaneCount>::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
 {
    type Output = Self;
    #[inline]
@ -165,10 +166,11 @@ where
    }
 }

-impl<const LANES: usize> core::ops::BitOr for BitMask<LANES>
+impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
 where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount,
-    <LaneCount<LANES> as SupportedLaneCount>::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
 {
    type Output = Self;
    #[inline]
@ -180,8 +182,9 @@ where
    }
 }

-impl<const LANES: usize> core::ops::BitXor for BitMask<LANES>
+impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
 where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount,
 {
    type Output = Self;
@ -194,8 +197,9 @@ where
    }
 }

-impl<const LANES: usize> core::ops::Not for BitMask<LANES>
+impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
 where
+    T: MaskElement,
    LaneCount<LANES>: SupportedLaneCount,
 {
    type Output = Self;
@ -210,9 +214,3 @@ where
        self
    }
 }
-
-pub type Mask8<const LANES: usize> = BitMask<LANES>;
-pub type Mask16<const LANES: usize> = BitMask<LANES>;
-pub type Mask32<const LANES: usize> = BitMask<LANES>;
-pub type Mask64<const LANES: usize> = BitMask<LANES>;
-pub type MaskSize<const LANES: usize> = BitMask<LANES>;
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@ -1,264 +1,225 @@
 //! Masks that take up full SIMD vector registers.

-macro_rules! define_mask {
+use super::MaskElement;
+use crate::{LaneCount, Simd, SupportedLaneCount};
+
+#[repr(transparent)]
+pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Copy for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+where
+    T: MaskElement + PartialEq,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn eq(&self, other: &Self) -> bool {
+        self.0.eq(&other.0)
+    }
+}
+
+impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+where
+    T: MaskElement + PartialOrd,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        self.0.partial_cmp(&other.0)
+    }
+}
+
+impl<T, const LANES: usize> Eq for Mask<T, LANES>
+where
+    T: MaskElement + Eq,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Ord for Mask<T, LANES>
+where
+    T: MaskElement + Ord,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+        self.0.cmp(&other.0)
+    }
+}
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    pub fn splat(value: bool) -> Self {
+        Self(Simd::splat(if value { T::TRUE } else { T::FALSE }))
+    }
+
+    #[inline]
+    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        T::eq(self.0[lane], T::TRUE)
+    }
+
+    #[inline]
+    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        self.0[lane] = if value { T::TRUE } else { T::FALSE }
+    }
+
+    #[inline]
+    pub fn to_int(self) -> Simd<T, LANES> {
+        self.0
+    }
+
+    #[inline]
+    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+        Self(value)
+    }
+
+    #[inline]
+    pub fn convert<U>(self) -> Mask<U, LANES>
+    where
+        U: MaskElement,
    {
-        $(#[$attr:meta])*
-        struct $name:ident<const $lanes:ident: usize>(
-            crate::$type:ident<$lanes2:ident>
-        );
-    } => {
-        $(#[$attr])*
-        #[repr(transparent)]
-        pub struct $name<const $lanes: usize>(crate::$type<$lanes>)
-        where
-            crate::LaneCount<$lanes>: crate::SupportedLaneCount;
+        unsafe { Mask(crate::intrinsics::simd_cast(self.0)) }
+    }

-        impl_full_mask_reductions! { $name, $type }
+    #[inline]
+    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
+        unsafe {
+            // TODO remove the transmute when rustc can use arrays of u8 as bitmasks
+            assert_eq!(
+                core::mem::size_of::<<LaneCount::<LANES> as SupportedLaneCount>::IntBitMask>(),
+                LaneCount::<LANES>::BITMASK_LEN,
+            );
+            let bitmask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
+                crate::intrinsics::simd_bitmask(self.0);
+            let mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN] =
+                core::mem::transmute_copy(&bitmask);

-        impl<const LANES: usize> Copy for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {}
-
-        impl<const LANES: usize> Clone for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            fn clone(&self) -> Self {
-                *self
-            }
-        }
-
-        impl<const LANES: usize> PartialEq for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            fn eq(&self, other: &Self) -> bool {
-                self.0 == other.0
-            }
-        }
-
-        impl<const LANES: usize> PartialOrd for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
-                self.0.partial_cmp(&other.0)
-            }
-        }
-
-        impl<const LANES: usize> Eq for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {}
-
-        impl<const LANES: usize> Ord for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
-                self.0.cmp(&other.0)
-            }
-        }
-
-        impl<const LANES: usize> $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            pub fn splat(value: bool) -> Self {
-                Self(
-                    <crate::$type<LANES>>::splat(
-                        if value {
-                            -1
-                        } else {
-                            0
-                        }
-                    ),
-                )
-            }
-
-            #[inline]
-            pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
-                self.0[lane] == -1
-            }
-
-            #[inline]
-            pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
-                self.0[lane] = if value {
-                    -1
-                } else {
-                    0
+            // There is a bug where LLVM appears to implement this operation with the wrong
+            // bit order.
+            // TODO fix this in a better way
+            if cfg!(any(target_arch = "mips", target_arch = "mips64")) {
+                for x in bitmask.as_mut() {
+                    *x = x.reverse_bits();
                }
            }

-            #[inline]
-            pub fn to_int(self) -> crate::$type<LANES> {
-                self.0
-            }
+            bitmask
+        }
+    }

-            #[inline]
-            pub unsafe fn from_int_unchecked(value: crate::$type<LANES>) -> Self {
-                Self(value)
-            }
-
-            #[inline]
-            pub fn to_bitmask(self) -> [u8; crate::LaneCount::<LANES>::BITMASK_LEN] {
-                unsafe {
-                    // TODO remove the transmute when rustc can use arrays of u8 as bitmasks
-                    assert_eq!(
-                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask>(),
-                        crate::LaneCount::<LANES>::BITMASK_LEN,
-                    );
-                    let bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask = crate::intrinsics::simd_bitmask(self.0);
-                    let mut bitmask: [u8; crate::LaneCount::<LANES>::BITMASK_LEN] = core::mem::transmute_copy(&bitmask);
-
-                    // There is a bug where LLVM appears to implement this operation with the wrong
-                    // bit order.
-                    // TODO fix this in a better way
-                    if cfg!(any(target_arch = "mips", target_arch = "mips64")) {
-                        for x in bitmask.as_mut() {
-                            *x = x.reverse_bits();
-                        }
-                    }
-
-                    bitmask
+    #[inline]
+    pub fn from_bitmask(mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
+        unsafe {
+            // There is a bug where LLVM appears to implement this operation with the wrong
+            // bit order.
+            // TODO fix this in a better way
+            if cfg!(any(target_arch = "mips", target_arch = "mips64")) {
+                for x in bitmask.as_mut() {
+                    *x = x.reverse_bits();
                }
            }

-            #[inline]
-            pub fn from_bitmask(mut bitmask: [u8; crate::LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-                unsafe {
-                    // There is a bug where LLVM appears to implement this operation with the wrong
-                    // bit order.
-                    // TODO fix this in a better way
-                    if cfg!(any(target_arch = "mips", target_arch = "mips64")) {
-                        for x in bitmask.as_mut() {
-                            *x = x.reverse_bits();
-                        }
-                    }
+            // TODO remove the transmute when rustc can use arrays of u8 as bitmasks
+            assert_eq!(
+                core::mem::size_of::<<LaneCount::<LANES> as SupportedLaneCount>::IntBitMask>(),
+                LaneCount::<LANES>::BITMASK_LEN,
+            );
+            let bitmask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
+                core::mem::transmute_copy(&bitmask);

-                    // TODO remove the transmute when rustc can use arrays of u8 as bitmasks
-                    assert_eq!(
-                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask>(),
-                        crate::LaneCount::<LANES>::BITMASK_LEN,
-                    );
-                    let bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask = core::mem::transmute_copy(&bitmask);
-
-                    Self::from_int_unchecked(crate::intrinsics::simd_select_bitmask(
-                        bitmask,
-                        Self::splat(true).to_int(),
-                        Self::splat(false).to_int(),
-                    ))
-                }
-            }
+            Self::from_int_unchecked(crate::intrinsics::simd_select_bitmask(
+                bitmask,
+                Self::splat(true).to_int(),
+                Self::splat(false).to_int(),
+            ))
        }
+    }

-        impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::$type<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            fn from(value: $name<LANES>) -> Self {
-                value.0
-            }
-        }
+    #[inline]
+    pub fn any(self) -> bool {
+        unsafe { crate::intrinsics::simd_reduce_any(self.to_int()) }
+    }

-        impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn bitand(self, rhs: Self) -> Self {
-                Self(self.0 & rhs.0)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitOr for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn bitor(self, rhs: Self) -> Self {
-                Self(self.0 | rhs.0)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitXor for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn bitxor(self, rhs: Self) -> Self::Output {
-                Self(self.0 ^ rhs.0)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::Not for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Output = Self;
-            #[inline]
-            fn not(self) -> Self::Output {
-                Self(!self.0)
-            }
-        }
+    #[inline]
+    pub fn all(self) -> bool {
+        unsafe { crate::intrinsics::simd_reduce_all(self.to_int()) }
    }
 }

-define_mask! {
-    /// A mask equivalent to [SimdI8](crate::SimdI8), where all bits in the lane must be either set
-    /// or unset.
-    struct Mask8<const LANES: usize>(crate::SimdI8<LANES>);
-}
-
-define_mask! {
-    /// A mask equivalent to [SimdI16](crate::SimdI16), where all bits in the lane must be either set
-    /// or unset.
-    struct Mask16<const LANES: usize>(crate::SimdI16<LANES>);
-}
-
-define_mask! {
-    /// A mask equivalent to [SimdI32](crate::SimdI32), where all bits in the lane must be either set
-    /// or unset.
-    struct Mask32<const LANES: usize>(crate::SimdI32<LANES>);
-}
-
-define_mask! {
-    /// A mask equivalent to [SimdI64](crate::SimdI64), where all bits in the lane must be either set
-    /// or unset.
-    struct Mask64<const LANES: usize>(crate::SimdI64<LANES>);
-}
-
-define_mask! {
-    /// A mask equivalent to [SimdIsize](crate::SimdIsize), where all bits in the lane must be either set
-    /// or unset.
-    struct MaskSize<const LANES: usize>(crate::SimdIsize<LANES>);
-}
-
-macro_rules! impl_from {
-    { $from:ident ($from_inner:ident) => $($to:ident ($to_inner:ident)),* } => {
-        $(
-        impl<const LANES: usize> From<$from<LANES>> for $to<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            fn from(value: $from<LANES>) -> Self {
-                let mut new = Self::splat(false);
-                for i in 0..LANES {
-                    unsafe { new.set_unchecked(i, value.test_unchecked(i)) }
-                }
-                new
-            }
-        }
-        )*
+impl<T, const LANES: usize> core::convert::From<Mask<T, LANES>> for Simd<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn from(value: Mask<T, LANES>) -> Self {
+        value.0
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn bitand(self, rhs: Self) -> Self {
+        unsafe { Self(crate::intrinsics::simd_and(self.0, rhs.0)) }
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn bitor(self, rhs: Self) -> Self {
+        unsafe { Self(crate::intrinsics::simd_or(self.0, rhs.0)) }
+    }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn bitxor(self, rhs: Self) -> Self {
+        unsafe { Self(crate::intrinsics::simd_xor(self.0, rhs.0)) }
+    }
+}
+
+impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    type Output = Self;
+    #[inline]
+    fn not(self) -> Self::Output {
+        Self::splat(true) ^ self
    }
 }
-impl_from! { Mask8 (SimdI8) => Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize) }
-impl_from! { Mask16 (SimdI16) => Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8) }
-impl_from! { Mask32 (SimdI32) => Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16) }
-impl_from! { Mask64 (SimdI64) => MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32) }
-impl_from! { MaskSize (SimdIsize) => Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64) }
--- a/crates/core_simd/src/math.rs
+++ b/crates/core_simd/src/math.rs
@ -1,6 +1,8 @@
+use crate::{LaneCount, Simd, SupportedLaneCount};
+
 macro_rules! impl_uint_arith {
-    ($(($name:ident, $n:ident)),+) => {
-        $( impl<const LANES: usize> $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+    ($($ty:ty),+) => {
+        $( impl<const LANES: usize> Simd<$ty, LANES> where LaneCount<LANES>: SupportedLaneCount {

            /// Lanewise saturating add.
            ///
@ -8,9 +10,9 @@ macro_rules! impl_uint_arith {
            /// ```
            /// # #![feature(portable_simd)]
            /// # use core_simd::*;
-            #[doc = concat!("# use core::", stringify!($n), "::MAX;")]
-            #[doc = concat!("let x = ", stringify!($name), "::from_array([2, 1, 0, MAX]);")]
-            #[doc = concat!("let max = ", stringify!($name), "::splat(MAX);")]
+            #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
+            /// let x = Simd::from_array([2, 1, 0, MAX]);
+            /// let max = Simd::splat(MAX);
            /// let unsat = x + max;
            /// let sat = x.saturating_add(max);
            /// assert_eq!(x - 1, unsat);
@ -27,13 +29,13 @@ macro_rules! impl_uint_arith {
            /// ```
            /// # #![feature(portable_simd)]
            /// # use core_simd::*;
-            #[doc = concat!("# use core::", stringify!($n), "::MAX;")]
-            #[doc = concat!("let x = ", stringify!($name), "::from_array([2, 1, 0, MAX]);")]
-            #[doc = concat!("let max = ", stringify!($name), "::splat(MAX);")]
+            #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
+            /// let x = Simd::from_array([2, 1, 0, MAX]);
+            /// let max = Simd::splat(MAX);
            /// let unsat = x - max;
            /// let sat = x.saturating_sub(max);
            /// assert_eq!(unsat, x + 1);
-            #[doc = concat!("assert_eq!(sat, ", stringify!($name), "::splat(0));")]
+            /// assert_eq!(sat, Simd::splat(0));
            #[inline]
            pub fn saturating_sub(self, second: Self) -> Self {
                unsafe { crate::intrinsics::simd_saturating_sub(self, second) }
@ -43,8 +45,8 @@ macro_rules! impl_uint_arith {
 }

 macro_rules! impl_int_arith {
-    ($(($name:ident, $n:ident)),+) => {
-        $( impl<const LANES: usize> $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+    ($($ty:ty),+) => {
+        $( impl<const LANES: usize> Simd<$ty, LANES> where LaneCount<LANES>: SupportedLaneCount {

            /// Lanewise saturating add.
            ///
@ -52,13 +54,13 @@ macro_rules! impl_int_arith {
            /// ```
            /// # #![feature(portable_simd)]
            /// # use core_simd::*;
-            #[doc = concat!("# use core::", stringify!($n), "::{MIN, MAX};")]
-            #[doc = concat!("let x = ", stringify!($name), "::from_array([MIN, 0, 1, MAX]);")]
-            #[doc = concat!("let max = ", stringify!($name), "::splat(MAX);")]
+            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+            /// let x = Simd::from_array([MIN, 0, 1, MAX]);
+            /// let max = Simd::splat(MAX);
            /// let unsat = x + max;
            /// let sat = x.saturating_add(max);
-            #[doc = concat!("assert_eq!(unsat, ", stringify!($name), "::from_array([-1, MAX, MIN, -2]));")]
-            #[doc = concat!("assert_eq!(sat, ", stringify!($name), "::from_array([-1, MAX, MAX, MAX]));")]
+            /// assert_eq!(unsat, Simd::from_array([-1, MAX, MIN, -2]));
+            /// assert_eq!(sat, Simd::from_array([-1, MAX, MAX, MAX]));
            /// ```
            #[inline]
            pub fn saturating_add(self, second: Self) -> Self {
@ -71,13 +73,13 @@ macro_rules! impl_int_arith {
            /// ```
            /// # #![feature(portable_simd)]
            /// # use core_simd::*;
-            #[doc = concat!("# use core::", stringify!($n), "::{MIN, MAX};")]
-            #[doc = concat!("let x = ", stringify!($name), "::from_array([MIN, -2, -1, MAX]);")]
-            #[doc = concat!("let max = ", stringify!($name), "::splat(MAX);")]
+            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+            /// let x = Simd::from_array([MIN, -2, -1, MAX]);
+            /// let max = Simd::splat(MAX);
            /// let unsat = x - max;
            /// let sat = x.saturating_sub(max);
-            #[doc = concat!("assert_eq!(unsat, ", stringify!($name), "::from_array([1, MAX, MIN, 0]));")]
-            #[doc = concat!("assert_eq!(sat, ", stringify!($name), "::from_array([MIN, MIN, MIN, 0]));")]
+            /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0]));
+            /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
            #[inline]
            pub fn saturating_sub(self, second: Self) -> Self {
                unsafe { crate::intrinsics::simd_saturating_sub(self, second) }
@ -90,13 +92,13 @@ macro_rules! impl_int_arith {
            /// ```
            /// # #![feature(portable_simd)]
            /// # use core_simd::*;
-            #[doc = concat!("# use core::", stringify!($n), "::{MIN, MAX};")]
-            #[doc = concat!("let xs = ", stringify!($name), "::from_array([MIN, MIN +1, -5, 0]);")]
-            #[doc = concat!("assert_eq!(xs.abs(), ", stringify!($name), "::from_array([MIN, MAX, 5, 0]));")]
+            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+            /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
+            /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
            /// ```
            #[inline]
            pub fn abs(self) -> Self {
-                const SHR: $n = <$n>::BITS as $n - 1;
+                const SHR: $ty = <$ty>::BITS as $ty - 1;
                let m = self >> SHR;
                (self^m) - m
            }
@ -108,17 +110,17 @@ macro_rules! impl_int_arith {
            /// ```
            /// # #![feature(portable_simd)]
            /// # use core_simd::*;
-            #[doc = concat!("# use core::", stringify!($n), "::{MIN, MAX};")]
-            #[doc = concat!("let xs = ", stringify!($name), "::from_array([MIN, -2, 0, 3]);")]
+            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+            /// let xs = Simd::from_array([MIN, -2, 0, 3]);
            /// let unsat = xs.abs();
            /// let sat = xs.saturating_abs();
-            #[doc = concat!("assert_eq!(unsat, ", stringify!($name), "::from_array([MIN, 2, 0, 3]));")]
-            #[doc = concat!("assert_eq!(sat, ", stringify!($name), "::from_array([MAX, 2, 0, 3]));")]
+            /// assert_eq!(unsat, Simd::from_array([MIN, 2, 0, 3]));
+            /// assert_eq!(sat, Simd::from_array([MAX, 2, 0, 3]));
            /// ```
            #[inline]
            pub fn saturating_abs(self) -> Self {
                // arith shift for -1 or 0 mask based on sign bit, giving 2s complement
-                const SHR: $n = <$n>::BITS as $n - 1;
+                const SHR: $ty = <$ty>::BITS as $ty - 1;
                let m = self >> SHR;
                (self^m).saturating_sub(m)
            }
@ -130,12 +132,12 @@ macro_rules! impl_int_arith {
            /// ```
            /// # #![feature(portable_simd)]
            /// # use core_simd::*;
-            #[doc = concat!("# use core::", stringify!($n), "::{MIN, MAX};")]
-            #[doc = concat!("let x = ", stringify!($name), "::from_array([MIN, -2, 3, MAX]);")]
+            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+            /// let x = Simd::from_array([MIN, -2, 3, MAX]);
            /// let unsat = -x;
            /// let sat = x.saturating_neg();
-            #[doc = concat!("assert_eq!(unsat, ", stringify!($name), "::from_array([MIN, 2, -3, MIN + 1]));")]
-            #[doc = concat!("assert_eq!(sat, ", stringify!($name), "::from_array([MAX, 2, -3, MIN + 1]));")]
+            /// assert_eq!(unsat, Simd::from_array([MIN, 2, -3, MIN + 1]));
+            /// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1]));
            /// ```
            #[inline]
            pub fn saturating_neg(self) -> Self {
@ -145,7 +147,5 @@ macro_rules! impl_int_arith {
    }
 }

-use crate::vector::*;
-
-impl_uint_arith! { (SimdU8, u8), (SimdU16, u16), (SimdU32, u32), (SimdU64, u64), (SimdUsize, usize) }
-impl_int_arith! { (SimdI8, i8), (SimdI16, i16), (SimdI32, i32), (SimdI64, i64), (SimdIsize, isize) }
+impl_uint_arith! { u8, u16, u32, u64, usize }
+impl_int_arith! { i8, i16, i32, i64, isize }
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@ -1,4 +1,27 @@
-use crate::{LaneCount, SupportedLaneCount};
+use crate::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+
+impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+    I: core::slice::SliceIndex<[T]>,
+{
+    type Output = I::Output;
+    fn index(&self, index: I) -> &Self::Output {
+        &self.as_array()[index]
+    }
+}
+
+impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+    I: core::slice::SliceIndex<[T]>,
+{
+    fn index_mut(&mut self, index: I) -> &mut Self::Output {
+        &mut self.as_mut_array()[index]
+    }
+}

 /// Checks if the right-hand side argument of a left- or right-shift would cause overflow.
 fn invalid_shift_rhs<T>(rhs: T) -> bool
@ -132,40 +155,40 @@ macro_rules! impl_ref_ops {

 /// Automatically implements operators over vectors and scalars for a particular vector.
 macro_rules! impl_op {
-    { impl Add for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, Add::add, AddAssign::add_assign, simd_add }
+    { impl Add for $scalar:ty } => {
+        impl_op! { @binary $scalar, Add::add, AddAssign::add_assign, simd_add }
    };
-    { impl Sub for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, Sub::sub, SubAssign::sub_assign, simd_sub }
+    { impl Sub for $scalar:ty } => {
+        impl_op! { @binary $scalar, Sub::sub, SubAssign::sub_assign, simd_sub }
    };
-    { impl Mul for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, Mul::mul, MulAssign::mul_assign, simd_mul }
+    { impl Mul for $scalar:ty } => {
+        impl_op! { @binary $scalar, Mul::mul, MulAssign::mul_assign, simd_mul }
    };
-    { impl Div for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, Div::div, DivAssign::div_assign, simd_div }
+    { impl Div for $scalar:ty } => {
+        impl_op! { @binary $scalar, Div::div, DivAssign::div_assign, simd_div }
    };
-    { impl Rem for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, Rem::rem, RemAssign::rem_assign, simd_rem }
+    { impl Rem for $scalar:ty } => {
+        impl_op! { @binary $scalar, Rem::rem, RemAssign::rem_assign, simd_rem }
    };
-    { impl Shl for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, Shl::shl, ShlAssign::shl_assign, simd_shl }
+    { impl Shl for $scalar:ty } => {
+        impl_op! { @binary $scalar, Shl::shl, ShlAssign::shl_assign, simd_shl }
    };
-    { impl Shr for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, Shr::shr, ShrAssign::shr_assign, simd_shr }
+    { impl Shr for $scalar:ty } => {
+        impl_op! { @binary $scalar, Shr::shr, ShrAssign::shr_assign, simd_shr }
    };
-    { impl BitAnd for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, BitAnd::bitand, BitAndAssign::bitand_assign, simd_and }
+    { impl BitAnd for $scalar:ty } => {
+        impl_op! { @binary $scalar, BitAnd::bitand, BitAndAssign::bitand_assign, simd_and }
    };
-    { impl BitOr for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, BitOr::bitor, BitOrAssign::bitor_assign, simd_or }
+    { impl BitOr for $scalar:ty } => {
+        impl_op! { @binary $scalar, BitOr::bitor, BitOrAssign::bitor_assign, simd_or }
    };
-    { impl BitXor for $type:ident, $scalar:ty } => {
-        impl_op! { @binary $type, $scalar, BitXor::bitxor, BitXorAssign::bitxor_assign, simd_xor }
+    { impl BitXor for $scalar:ty } => {
+        impl_op! { @binary $scalar, BitXor::bitxor, BitXorAssign::bitxor_assign, simd_xor }
    };

-    { impl Not for $type:ident, $scalar:ty } => {
+    { impl Not for $scalar:ty } => {
        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::Not for crate::$type<LANES>
+            impl<const LANES: usize> core::ops::Not for Simd<$scalar, LANES>
            where
                LaneCount<LANES>: SupportedLaneCount,
            {
@ -177,9 +200,9 @@ macro_rules! impl_op {
        }
    };

-    { impl Neg for $type:ident, $scalar:ty } => {
+    { impl Neg for $scalar:ty } => {
        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::Neg for crate::$type<LANES>
+            impl<const LANES: usize> core::ops::Neg for Simd<$scalar, LANES>
            where
                LaneCount<LANES>: SupportedLaneCount,
            {
@ -191,35 +214,10 @@ macro_rules! impl_op {
        }
    };

-    { impl Index for $type:ident, $scalar:ty } => {
-        impl<I, const LANES: usize> core::ops::Index<I> for crate::$type<LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-            I: core::slice::SliceIndex<[$scalar]>,
-        {
-            type Output = I::Output;
-            fn index(&self, index: I) -> &Self::Output {
-                let slice: &[_] = self.as_ref();
-                &slice[index]
-            }
-        }
-
-        impl<I, const LANES: usize> core::ops::IndexMut<I> for crate::$type<LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-            I: core::slice::SliceIndex<[$scalar]>,
-        {
-            fn index_mut(&mut self, index: I) -> &mut Self::Output {
-                let slice: &mut [_] = self.as_mut();
-                &mut slice[index]
-            }
-        }
-    };
-
    // generic binary op with assignment when output is `Self`
-    { @binary $type:ident, $scalar:ty, $trait:ident :: $trait_fn:ident, $assign_trait:ident :: $assign_trait_fn:ident, $intrinsic:ident } => {
+    { @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $assign_trait:ident :: $assign_trait_fn:ident, $intrinsic:ident } => {
        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::$trait<Self> for crate::$type<LANES>
+            impl<const LANES: usize> core::ops::$trait<Self> for Simd<$scalar, LANES>
            where
                LaneCount<LANES>: SupportedLaneCount,
            {
@ -235,7 +233,7 @@ macro_rules! impl_op {
        }

        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::$trait<$scalar> for crate::$type<LANES>
+            impl<const LANES: usize> core::ops::$trait<$scalar> for Simd<$scalar, LANES>
            where
                LaneCount<LANES>: SupportedLaneCount,
            {
@ -249,21 +247,21 @@ macro_rules! impl_op {
        }

        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::$trait<crate::$type<LANES>> for $scalar
+            impl<const LANES: usize> core::ops::$trait<Simd<$scalar, LANES>> for $scalar
            where
                LaneCount<LANES>: SupportedLaneCount,
            {
-                type Output = crate::$type<LANES>;
+                type Output = Simd<$scalar, LANES>;

                #[inline]
-                fn $trait_fn(self, rhs: crate::$type<LANES>) -> Self::Output {
-                    core::ops::$trait::$trait_fn(crate::$type::splat(self), rhs)
+                fn $trait_fn(self, rhs: Simd<$scalar, LANES>) -> Self::Output {
+                    core::ops::$trait::$trait_fn(Simd::splat(self), rhs)
                }
            }
        }

        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::$assign_trait<Self> for crate::$type<LANES>
+            impl<const LANES: usize> core::ops::$assign_trait<Self> for Simd<$scalar, LANES>
            where
                LaneCount<LANES>: SupportedLaneCount,
            {
@ -277,7 +275,7 @@ macro_rules! impl_op {
        }

        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::$assign_trait<$scalar> for crate::$type<LANES>
+            impl<const LANES: usize> core::ops::$assign_trait<$scalar> for Simd<$scalar, LANES>
            where
                LaneCount<LANES>: SupportedLaneCount,
            {
@ -292,379 +290,354 @@ macro_rules! impl_op {

 /// Implements floating-point operators for the provided types.
 macro_rules! impl_float_ops {
-    { $($scalar:ty => $($vector:ident),*;)* } => {
-        $( // scalar
-            $( // vector
-                impl_op! { impl Add for $vector, $scalar }
-                impl_op! { impl Sub for $vector, $scalar }
-                impl_op! { impl Mul for $vector, $scalar }
-                impl_op! { impl Div for $vector, $scalar }
-                impl_op! { impl Rem for $vector, $scalar }
-                impl_op! { impl Neg for $vector, $scalar }
-                impl_op! { impl Index for $vector, $scalar }
-            )*
+    { $($scalar:ty),* } => {
+        $(
+            impl_op! { impl Add for $scalar }
+            impl_op! { impl Sub for $scalar }
+            impl_op! { impl Mul for $scalar }
+            impl_op! { impl Div for $scalar }
+            impl_op! { impl Rem for $scalar }
+            impl_op! { impl Neg for $scalar }
        )*
    };
 }

 /// Implements unsigned integer operators for the provided types.
 macro_rules! impl_unsigned_int_ops {
-    { $($scalar:ty => $($vector:ident),*;)* } => {
-        $( // scalar
-            $( // vector
-                impl_op! { impl Add for $vector, $scalar }
-                impl_op! { impl Sub for $vector, $scalar }
-                impl_op! { impl Mul for $vector, $scalar }
-                impl_op! { impl BitAnd for $vector, $scalar }
-                impl_op! { impl BitOr  for $vector, $scalar }
-                impl_op! { impl BitXor for $vector, $scalar }
-                impl_op! { impl Not for $vector, $scalar }
-                impl_op! { impl Index for $vector, $scalar }
+    { $($scalar:ty),* } => {
+        $(
+            impl_op! { impl Add for $scalar }
+            impl_op! { impl Sub for $scalar }
+            impl_op! { impl Mul for $scalar }
+            impl_op! { impl BitAnd for $scalar }
+            impl_op! { impl BitOr  for $scalar }
+            impl_op! { impl BitXor for $scalar }
+            impl_op! { impl Not for $scalar }

-                // Integers panic on divide by 0
-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Div<Self> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = Self;
+            // Integers panic on divide by 0
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Div<Self> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Self;

-                        #[inline]
-                        fn div(self, rhs: Self) -> Self::Output {
-                            if rhs.as_array()
-                                .iter()
-                                .any(|x| *x == 0)
-                            {
-                                panic!("attempt to divide by zero");
-                            }
+                    #[inline]
+                    fn div(self, rhs: Self) -> Self::Output {
+                        if rhs.as_array()
+                            .iter()
+                            .any(|x| *x == 0)
+                        {
+                            panic!("attempt to divide by zero");
+                        }

-                            // Guards for div(MIN, -1),
-                            // this check only applies to signed ints
-                            if <$scalar>::MIN != 0 && self.as_array().iter()
-                                    .zip(rhs.as_array().iter())
-                                    .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
+                        // Guards for div(MIN, -1),
+                        // this check only applies to signed ints
+                        if <$scalar>::MIN != 0 && self.as_array().iter()
+                                .zip(rhs.as_array().iter())
+                                .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
+                            panic!("attempt to divide with overflow");
+                        }
+                        unsafe { crate::intrinsics::simd_div(self, rhs) }
+                    }
+                }
+            }
+
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Div<$scalar> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Self;
+
+                    #[inline]
+                    fn div(self, rhs: $scalar) -> Self::Output {
+                        if rhs == 0 {
+                            panic!("attempt to divide by zero");
+                        }
+                        if <$scalar>::MIN != 0 &&
+                            self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
+                            rhs == -1 as _ {
                                panic!("attempt to divide with overflow");
-                            }
-                            unsafe { crate::intrinsics::simd_div(self, rhs) }
                        }
+                        let rhs = Self::splat(rhs);
+                        unsafe { crate::intrinsics::simd_div(self, rhs) }
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Div<$scalar> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = Self;
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Div<Simd<$scalar, LANES>> for $scalar
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Simd<$scalar, LANES>;

-                        #[inline]
-                        fn div(self, rhs: $scalar) -> Self::Output {
-                            if rhs == 0 {
-                                panic!("attempt to divide by zero");
-                            }
-                            if <$scalar>::MIN != 0 &&
-                                self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
-                                rhs == -1 as _ {
-                                    panic!("attempt to divide with overflow");
-                            }
-                            let rhs = Self::splat(rhs);
-                            unsafe { crate::intrinsics::simd_div(self, rhs) }
-                        }
+                    #[inline]
+                    fn div(self, rhs: Simd<$scalar, LANES>) -> Self::Output {
+                        Simd::splat(self) / rhs
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Div<crate::$vector<LANES>> for $scalar
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = crate::$vector<LANES>;
-
-                        #[inline]
-                        fn div(self, rhs: crate::$vector<LANES>) -> Self::Output {
-                            crate::$vector::splat(self) / rhs
-                        }
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::DivAssign<Self> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    #[inline]
+                    fn div_assign(&mut self, rhs: Self) {
+                        *self = *self / rhs;
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::DivAssign<Self> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        #[inline]
-                        fn div_assign(&mut self, rhs: Self) {
-                            *self = *self / rhs;
-                        }
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::DivAssign<$scalar> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    #[inline]
+                    fn div_assign(&mut self, rhs: $scalar) {
+                        *self = *self / rhs;
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::DivAssign<$scalar> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        #[inline]
-                        fn div_assign(&mut self, rhs: $scalar) {
-                            *self = *self / rhs;
+            // remainder panics on zero divisor
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Rem<Self> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Self;
+
+                    #[inline]
+                    fn rem(self, rhs: Self) -> Self::Output {
+                        if rhs.as_array()
+                            .iter()
+                            .any(|x| *x == 0)
+                        {
+                            panic!("attempt to calculate the remainder with a divisor of zero");
                        }
+
+                        // Guards for rem(MIN, -1)
+                        // this branch applies the check only to signed ints
+                        if <$scalar>::MIN != 0 && self.as_array().iter()
+                                .zip(rhs.as_array().iter())
+                                .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
+                            panic!("attempt to calculate the remainder with overflow");
+                        }
+                        unsafe { crate::intrinsics::simd_rem(self, rhs) }
                    }
                }
+            }

-                // remainder panics on zero divisor
-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Rem<Self> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = Self;
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Rem<$scalar> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Self;

-                        #[inline]
-                        fn rem(self, rhs: Self) -> Self::Output {
-                            if rhs.as_array()
-                                .iter()
-                                .any(|x| *x == 0)
-                            {
-                                panic!("attempt to calculate the remainder with a divisor of zero");
-                            }
-
-                            // Guards for rem(MIN, -1)
-                            // this branch applies the check only to signed ints
-                            if <$scalar>::MIN != 0 && self.as_array().iter()
-                                    .zip(rhs.as_array().iter())
-                                    .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
+                    #[inline]
+                    fn rem(self, rhs: $scalar) -> Self::Output {
+                        if rhs == 0 {
+                            panic!("attempt to calculate the remainder with a divisor of zero");
+                        }
+                        if <$scalar>::MIN != 0 &&
+                            self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
+                            rhs == -1 as _ {
                                panic!("attempt to calculate the remainder with overflow");
-                            }
-                            unsafe { crate::intrinsics::simd_rem(self, rhs) }
                        }
+                        let rhs = Self::splat(rhs);
+                        unsafe { crate::intrinsics::simd_rem(self, rhs) }
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Rem<$scalar> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = Self;
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Rem<Simd<$scalar, LANES>> for $scalar
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Simd<$scalar, LANES>;

-                        #[inline]
-                        fn rem(self, rhs: $scalar) -> Self::Output {
-                            if rhs == 0 {
-                                panic!("attempt to calculate the remainder with a divisor of zero");
-                            }
-                            if <$scalar>::MIN != 0 &&
-                                self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
-                                rhs == -1 as _ {
-                                    panic!("attempt to calculate the remainder with overflow");
-                            }
-                            let rhs = Self::splat(rhs);
-                            unsafe { crate::intrinsics::simd_rem(self, rhs) }
-                        }
+                    #[inline]
+                    fn rem(self, rhs: Simd<$scalar, LANES>) -> Self::Output {
+                        Simd::splat(self) % rhs
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Rem<crate::$vector<LANES>> for $scalar
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = crate::$vector<LANES>;
-
-                        #[inline]
-                        fn rem(self, rhs: crate::$vector<LANES>) -> Self::Output {
-                            crate::$vector::splat(self) % rhs
-                        }
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::RemAssign<Self> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    #[inline]
+                    fn rem_assign(&mut self, rhs: Self) {
+                        *self = *self % rhs;
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::RemAssign<Self> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        #[inline]
-                        fn rem_assign(&mut self, rhs: Self) {
-                            *self = *self % rhs;
-                        }
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::RemAssign<$scalar> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    #[inline]
+                    fn rem_assign(&mut self, rhs: $scalar) {
+                        *self = *self % rhs;
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::RemAssign<$scalar> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        #[inline]
-                        fn rem_assign(&mut self, rhs: $scalar) {
-                            *self = *self % rhs;
+            // shifts panic on overflow
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Shl<Self> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Self;
+
+                    #[inline]
+                    fn shl(self, rhs: Self) -> Self::Output {
+                        // TODO there is probably a better way of doing this
+                        if rhs.as_array()
+                            .iter()
+                            .copied()
+                            .any(invalid_shift_rhs)
+                        {
+                            panic!("attempt to shift left with overflow");
                        }
+                        unsafe { crate::intrinsics::simd_shl(self, rhs) }
                    }
                }
+            }

-                // shifts panic on overflow
-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Shl<Self> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = Self;
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Shl<$scalar> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Self;

-                        #[inline]
-                        fn shl(self, rhs: Self) -> Self::Output {
-                            // TODO there is probably a better way of doing this
-                            if rhs.as_array()
-                                .iter()
-                                .copied()
-                                .any(invalid_shift_rhs)
-                            {
-                                panic!("attempt to shift left with overflow");
-                            }
-                            unsafe { crate::intrinsics::simd_shl(self, rhs) }
+                    #[inline]
+                    fn shl(self, rhs: $scalar) -> Self::Output {
+                        if invalid_shift_rhs(rhs) {
+                            panic!("attempt to shift left with overflow");
                        }
+                        let rhs = Self::splat(rhs);
+                        unsafe { crate::intrinsics::simd_shl(self, rhs) }
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Shl<$scalar> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = Self;

-                        #[inline]
-                        fn shl(self, rhs: $scalar) -> Self::Output {
-                            if invalid_shift_rhs(rhs) {
-                                panic!("attempt to shift left with overflow");
-                            }
-                            let rhs = Self::splat(rhs);
-                            unsafe { crate::intrinsics::simd_shl(self, rhs) }
-                        }
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::ShlAssign<Self> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    #[inline]
+                    fn shl_assign(&mut self, rhs: Self) {
+                        *self = *self << rhs;
                    }
                }
+            }

-
-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::ShlAssign<Self> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        #[inline]
-                        fn shl_assign(&mut self, rhs: Self) {
-                            *self = *self << rhs;
-                        }
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::ShlAssign<$scalar> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    #[inline]
+                    fn shl_assign(&mut self, rhs: $scalar) {
+                        *self = *self << rhs;
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::ShlAssign<$scalar> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        #[inline]
-                        fn shl_assign(&mut self, rhs: $scalar) {
-                            *self = *self << rhs;
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Shr<Self> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Self;
+
+                    #[inline]
+                    fn shr(self, rhs: Self) -> Self::Output {
+                        // TODO there is probably a better way of doing this
+                        if rhs.as_array()
+                            .iter()
+                            .copied()
+                            .any(invalid_shift_rhs)
+                        {
+                            panic!("attempt to shift with overflow");
                        }
+                        unsafe { crate::intrinsics::simd_shr(self, rhs) }
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Shr<Self> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = Self;
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::Shr<$scalar> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    type Output = Self;

-                        #[inline]
-                        fn shr(self, rhs: Self) -> Self::Output {
-                            // TODO there is probably a better way of doing this
-                            if rhs.as_array()
-                                .iter()
-                                .copied()
-                                .any(invalid_shift_rhs)
-                            {
-                                panic!("attempt to shift with overflow");
-                            }
-                            unsafe { crate::intrinsics::simd_shr(self, rhs) }
+                    #[inline]
+                    fn shr(self, rhs: $scalar) -> Self::Output {
+                        if invalid_shift_rhs(rhs) {
+                            panic!("attempt to shift with overflow");
                        }
+                        let rhs = Self::splat(rhs);
+                        unsafe { crate::intrinsics::simd_shr(self, rhs) }
                    }
                }
+            }

-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::Shr<$scalar> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        type Output = Self;

-                        #[inline]
-                        fn shr(self, rhs: $scalar) -> Self::Output {
-                            if invalid_shift_rhs(rhs) {
-                                panic!("attempt to shift with overflow");
-                            }
-                            let rhs = Self::splat(rhs);
-                            unsafe { crate::intrinsics::simd_shr(self, rhs) }
-                        }
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::ShrAssign<Self> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    #[inline]
+                    fn shr_assign(&mut self, rhs: Self) {
+                        *self = *self >> rhs;
                    }
                }
+            }

-
-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::ShrAssign<Self> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        #[inline]
-                        fn shr_assign(&mut self, rhs: Self) {
-                            *self = *self >> rhs;
-                        }
+            impl_ref_ops! {
+                impl<const LANES: usize> core::ops::ShrAssign<$scalar> for Simd<$scalar, LANES>
+                where
+                    LaneCount<LANES>: SupportedLaneCount,
+                {
+                    #[inline]
+                    fn shr_assign(&mut self, rhs: $scalar) {
+                        *self = *self >> rhs;
                    }
                }
-
-                impl_ref_ops! {
-                    impl<const LANES: usize> core::ops::ShrAssign<$scalar> for crate::$vector<LANES>
-                    where
-                        LaneCount<LANES>: SupportedLaneCount,
-                    {
-                        #[inline]
-                        fn shr_assign(&mut self, rhs: $scalar) {
-                            *self = *self >> rhs;
-                        }
-                    }
-                }
-            )*
+            }
        )*
    };
 }

 /// Implements unsigned integer operators for the provided types.
 macro_rules! impl_signed_int_ops {
-    { $($scalar:ty => $($vector:ident),*;)* } => {
-        impl_unsigned_int_ops! { $($scalar => $($vector),*;)* }
+    { $($scalar:ty),* } => {
+        impl_unsigned_int_ops! { $($scalar),* }
        $( // scalar
-            $( // vector
-                impl_op! { impl Neg for $vector, $scalar }
-            )*
+            impl_op! { impl Neg for $scalar }
        )*
    };
 }

-impl_unsigned_int_ops! {
-    u8 => SimdU8;
-    u16 => SimdU16;
-    u32 => SimdU32;
-    u64 => SimdU64;
-    usize => SimdUsize;
-}
-
-impl_signed_int_ops! {
-    i8 => SimdI8;
-    i16 => SimdI16;
-    i32 => SimdI32;
-    i64 => SimdI64;
-    isize => SimdIsize;
-}
-
-impl_float_ops! {
-    f32 => SimdF32;
-    f64 => SimdF64;
-}
+impl_unsigned_int_ops! { u8, u16, u32, u64, usize }
+impl_signed_int_ops! { i8, i16, i32, i64, isize }
+impl_float_ops! { f32, f64 }
--- a/crates/core_simd/src/permute.rs
+++ b/crates/core_simd/src/permute.rs
@ -1,6 +1,9 @@
 macro_rules! impl_shuffle_lane {
-    { $name:ident, $fn:ident, $n:literal } => {
-        impl $name<$n> {
+    { $fn:ident, $n:literal } => {
+        impl<T> crate::Simd<T, $n>
+        where
+            T: crate::SimdElement,
+        {
            /// A const SIMD shuffle that takes 2 SIMD vectors and produces another vector, using
            /// the indices in the const parameter. The first or "self" vector will have its lanes
            /// indexed from 0, and the second vector will have its first lane indexed at $n.
@ -12,12 +15,12 @@ macro_rules! impl_shuffle_lane {
            ///
            /// ```
            /// #![feature(portable_simd)]
-            /// # use core_simd::*;
-            /// let a = f32x4::from_array([1.0, 2.0, 3.0, 4.0]);
-            /// let b = f32x4::from_array([5.0, 6.0, 7.0, 8.0]);
+            /// # use core_simd::Simd;
+            /// let a = Simd::from_array([1.0, 2.0, 3.0, 4.0]);
+            /// let b = Simd::from_array([5.0, 6.0, 7.0, 8.0]);
            /// const IDXS: [u32; 4] = [4,0,3,7];
-            /// let c = f32x4::shuffle::<IDXS>(a,b);
-            /// assert_eq!(f32x4::from_array([5.0, 1.0, 4.0, 8.0]), c);
+            /// let c = Simd::<_, 4>::shuffle::<IDXS>(a,b);
+            /// assert_eq!(Simd::from_array([5.0, 1.0, 4.0, 8.0]), c);
            /// ```
            #[inline]
            pub fn shuffle<const IDX: [u32; $n]>(self, second: Self) -> Self {
@ -53,9 +56,9 @@ macro_rules! impl_shuffle_lane {
            ///
            /// ```
            /// #![feature(portable_simd)]
-            /// # use core_simd::SimdU32;
-            /// let a = SimdU32::from_array([0, 1, 2, 3]);
-            /// let b = SimdU32::from_array([4, 5, 6, 7]);
+            /// # use core_simd::Simd;
+            /// let a = Simd::from_array([0, 1, 2, 3]);
+            /// let b = Simd::from_array([4, 5, 6, 7]);
            /// let (x, y) = a.interleave(b);
            /// assert_eq!(x.to_array(), [0, 4, 1, 5]);
            /// assert_eq!(y.to_array(), [2, 6, 3, 7]);
@ -105,9 +108,9 @@ macro_rules! impl_shuffle_lane {
            ///
            /// ```
            /// #![feature(portable_simd)]
-            /// # use core_simd::SimdU32;
-            /// let a = SimdU32::from_array([0, 4, 1, 5]);
-            /// let b = SimdU32::from_array([2, 6, 3, 7]);
+            /// # use core_simd::Simd;
+            /// let a = Simd::from_array([0, 4, 1, 5]);
+            /// let b = Simd::from_array([2, 6, 3, 7]);
            /// let (x, y) = a.deinterleave(b);
            /// assert_eq!(x.to_array(), [0, 1, 2, 3]);
            /// assert_eq!(y.to_array(), [4, 5, 6, 7]);
@ -138,12 +141,8 @@ macro_rules! impl_shuffle_lane {
    }
 }

-macro_rules! impl_shuffle_2pow_lanes {
-    { $name:ident } => {
-        impl_shuffle_lane!{ $name, simd_shuffle2, 2 }
-        impl_shuffle_lane!{ $name, simd_shuffle4, 4 }
-        impl_shuffle_lane!{ $name, simd_shuffle8, 8 }
-        impl_shuffle_lane!{ $name, simd_shuffle16, 16 }
-        impl_shuffle_lane!{ $name, simd_shuffle32, 32 }
-    }
-}
+impl_shuffle_lane! { simd_shuffle2, 2 }
+impl_shuffle_lane! { simd_shuffle4, 4 }
+impl_shuffle_lane! { simd_shuffle8, 8 }
+impl_shuffle_lane! { simd_shuffle16, 16 }
+impl_shuffle_lane! { simd_shuffle32, 32 }
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@ -1,8 +1,10 @@
+use crate::{LaneCount, Simd, SupportedLaneCount};
+
 macro_rules! impl_integer_reductions {
-    { $name:ident, $scalar:ty } => {
-        impl<const LANES: usize> crate::$name<LANES>
+    { $scalar:ty } => {
+        impl<const LANES: usize> Simd<$scalar, LANES>
        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+            LaneCount<LANES>: SupportedLaneCount,
        {
            /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
            #[inline]
@ -52,11 +54,22 @@ macro_rules! impl_integer_reductions {
    }
 }

+impl_integer_reductions! { i8 }
+impl_integer_reductions! { i16 }
+impl_integer_reductions! { i32 }
+impl_integer_reductions! { i64 }
+impl_integer_reductions! { isize }
+impl_integer_reductions! { u8 }
+impl_integer_reductions! { u16 }
+impl_integer_reductions! { u32 }
+impl_integer_reductions! { u64 }
+impl_integer_reductions! { usize }
+
 macro_rules! impl_float_reductions {
-    { $name:ident, $scalar:ty } => {
-        impl<const LANES: usize> crate::$name<LANES>
+    { $scalar:ty } => {
+        impl<const LANES: usize> Simd<$scalar, LANES>
        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+            LaneCount<LANES>: SupportedLaneCount,
        {

            /// Horizontal add.  Returns the sum of the lanes of the vector.
@ -102,42 +115,5 @@ macro_rules! impl_float_reductions {
    }
 }

-macro_rules! impl_full_mask_reductions {
-    { $name:ident, $bits_ty:ident } => {
-        impl<const LANES: usize> $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[inline]
-            pub fn any(self) -> bool {
-                unsafe { crate::intrinsics::simd_reduce_any(self.to_int()) }
-            }
-
-            #[inline]
-            pub fn all(self) -> bool {
-                unsafe { crate::intrinsics::simd_reduce_all(self.to_int()) }
-            }
-        }
-    }
-}
-
-macro_rules! impl_opaque_mask_reductions {
-    { $name:ident, $bits_ty:ident } => {
-        impl<const LANES: usize> $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            /// Returns true if any lane is set, or false otherwise.
-            #[inline]
-            pub fn any(self) -> bool {
-                self.0.any()
-            }
-
-            /// Returns true if all lanes are set, or false otherwise.
-            #[inline]
-            pub fn all(self) -> bool {
-                self.0.all()
-            }
-        }
-    }
-}
+impl_float_reductions! { f32 }
+impl_float_reductions! { f64 }
--- a/crates/core_simd/src/round.rs
+++ b/crates/core_simd/src/round.rs
@ -1,11 +1,13 @@
+use crate::{LaneCount, Simd, SupportedLaneCount};
+
 macro_rules! implement {
    {
-        $type:ident, $int_type:ident
+        $type:ty, $int_type:ty
    } => {
        #[cfg(feature = "std")]
-        impl<const LANES: usize> crate::$type<LANES>
+        impl<const LANES: usize> Simd<$type, LANES>
        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+            LaneCount<LANES>: SupportedLaneCount,
        {
            /// Returns the smallest integer greater than or equal to each lane.
            #[must_use = "method returns a new vector and does not mutate the original value"]
@ -43,9 +45,9 @@ macro_rules! implement {
            }
        }

-        impl<const LANES: usize> crate::$type<LANES>
+        impl<const LANES: usize> Simd<$type, LANES>
        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+            LaneCount<LANES>: SupportedLaneCount,
        {
            /// Rounds toward zero and converts to the same-width integer type, assuming that
            /// the value is finite and fits in that type.
@ -57,19 +59,19 @@ macro_rules! implement {
            /// * Not be infinite
            /// * Be representable in the return type, after truncating off its fractional part
            #[inline]
-            pub unsafe fn to_int_unchecked(self) -> crate::$int_type<LANES> {
+            pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> {
                crate::intrinsics::simd_cast(self)
            }

            /// Creates a floating-point vector from an integer vector.  Rounds values that are
            /// not exactly representable.
            #[inline]
-            pub fn round_from_int(value: crate::$int_type<LANES>) -> Self {
+            pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
                unsafe { crate::intrinsics::simd_cast(value) }
            }
        }
    }
 }

-implement! { SimdF32, SimdI32 }
-implement! { SimdF64, SimdI64 }
+implement! { f32, i32 }
+implement! { f64, i64 }
--- a/crates/core_simd/src/select.rs
+++ b/crates/core_simd/src/select.rs
@ -1,3 +1,5 @@
+use crate::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
+
 mod sealed {
    pub trait Sealed {}
 }
@ -9,79 +11,75 @@ pub trait Select<Mask>: Sealed {
    fn select(mask: Mask, true_values: Self, false_values: Self) -> Self;
 }

-macro_rules! impl_select {
-    {
-        $mask:ident ($bits_ty:ident): $($type:ident),*
-    } => {
-        $(
-        impl<const LANES: usize> Sealed for crate::$type<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {}
-        impl<const LANES: usize> Select<crate::$mask<LANES>> for crate::$type<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[doc(hidden)]
-            #[inline]
-            fn select(mask: crate::$mask<LANES>, true_values: Self, false_values: Self) -> Self {
-                unsafe { crate::intrinsics::simd_select(mask.to_int(), true_values, false_values) }
-            }
-        }
-        )*
+impl<T, const LANES: usize> Sealed for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}

-        impl<const LANES: usize> Sealed for crate::$mask<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {}
-
-        impl<const LANES: usize> Select<Self> for crate::$mask<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            #[doc(hidden)]
-            #[inline]
-            fn select(mask: Self, true_values: Self, false_values: Self) -> Self {
-                mask & true_values | !mask & false_values
-            }
-        }
-
-        impl<const LANES: usize> crate::$mask<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            /// Choose lanes from two vectors.
-            ///
-            /// For each lane in the mask, choose the corresponding lane from `true_values` if
-            /// that lane mask is true, and `false_values` if that lane mask is false.
-            ///
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core_simd::{Mask32, SimdI32};
-            /// let a = SimdI32::from_array([0, 1, 2, 3]);
-            /// let b = SimdI32::from_array([4, 5, 6, 7]);
-            /// let mask = Mask32::from_array([true, false, false, true]);
-            /// let c = mask.select(a, b);
-            /// assert_eq!(c.to_array(), [0, 5, 6, 3]);
-            /// ```
-            ///
-            /// `select` can also be used on masks:
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core_simd::Mask32;
-            /// let a = Mask32::from_array([true, true, false, false]);
-            /// let b = Mask32::from_array([false, false, true, true]);
-            /// let mask = Mask32::from_array([true, false, false, true]);
-            /// let c = mask.select(a, b);
-            /// assert_eq!(c.to_array(), [true, false, true, false]);
-            /// ```
-            #[inline]
-            pub fn select<S: Select<Self>>(self, true_values: S, false_values: S) -> S {
-                S::select(self, true_values, false_values)
-            }
-        }
+impl<T, const LANES: usize> Select<Mask<T::Mask, LANES>> for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[inline]
+    fn select(mask: Mask<T::Mask, LANES>, true_values: Self, false_values: Self) -> Self {
+        unsafe { crate::intrinsics::simd_select(mask.to_int(), true_values, false_values) }
    }
 }

-impl_select! { Mask8 (SimdI8): SimdU8, SimdI8 }
-impl_select! { Mask16 (SimdI16): SimdU16, SimdI16 }
-impl_select! { Mask32 (SimdI32): SimdU32, SimdI32, SimdF32}
-impl_select! { Mask64 (SimdI64): SimdU64, SimdI64, SimdF64}
-impl_select! { MaskSize (SimdIsize): SimdUsize, SimdIsize }
+impl<T, const LANES: usize> Sealed for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Select<Self> for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    #[doc(hidden)]
+    #[inline]
+    fn select(mask: Self, true_values: Self, false_values: Self) -> Self {
+        mask & true_values | !mask & false_values
+    }
+}
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    /// Choose lanes from two vectors.
+    ///
+    /// For each lane in the mask, choose the corresponding lane from `true_values` if
+    /// that lane mask is true, and `false_values` if that lane mask is false.
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core_simd::{Mask, Simd};
+    /// let a = Simd::from_array([0, 1, 2, 3]);
+    /// let b = Simd::from_array([4, 5, 6, 7]);
+    /// let mask = Mask::from_array([true, false, false, true]);
+    /// let c = mask.select(a, b);
+    /// assert_eq!(c.to_array(), [0, 5, 6, 3]);
+    /// ```
+    ///
+    /// `select` can also be used on masks:
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core_simd::Mask;
+    /// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
+    /// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
+    /// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
+    /// let c = mask.select(a, b);
+    /// assert_eq!(c.to_array(), [true, false, true, false]);
+    /// ```
+    #[inline]
+    pub fn select<S: Select<Self>>(self, true_values: S, false_values: S) -> S {
+        S::select(self, true_values, false_values)
+    }
+}
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@ -1,39 +1,39 @@
 macro_rules! impl_to_bytes {
-    { $name:ident, $size:literal } => {
-        impl<const LANES: usize> crate::$name<LANES>
+    { $ty:ty, $size:literal } => {
+        impl<const LANES: usize> crate::Simd<$ty, LANES>
        where
            crate::LaneCount<LANES>: crate::SupportedLaneCount,
            crate::LaneCount<{{ $size * LANES }}>: crate::SupportedLaneCount,
        {
            /// Return the memory representation of this integer as a byte array in native byte
            /// order.
-            pub fn to_ne_bytes(self) -> crate::SimdU8<{{ $size * LANES }}> {
+            pub fn to_ne_bytes(self) -> crate::Simd<u8, {{ $size * LANES }}> {
                unsafe { core::mem::transmute_copy(&self) }
            }

            /// Create a native endian integer value from its memory representation as a byte array
            /// in native endianness.
-            pub fn from_ne_bytes(bytes: crate::SimdU8<{{ $size * LANES }}>) -> Self {
+            pub fn from_ne_bytes(bytes: crate::Simd<u8, {{ $size * LANES }}>) -> Self {
                unsafe { core::mem::transmute_copy(&bytes) }
            }
        }
    }
 }

-impl_to_bytes! { SimdU8, 1 }
-impl_to_bytes! { SimdU16, 2 }
-impl_to_bytes! { SimdU32, 4 }
-impl_to_bytes! { SimdU64, 8 }
+impl_to_bytes! { u8, 1 }
+impl_to_bytes! { u16, 2 }
+impl_to_bytes! { u32, 4 }
+impl_to_bytes! { u64, 8 }
 #[cfg(target_pointer_width = "32")]
-impl_to_bytes! { SimdUsize, 4 }
+impl_to_bytes! { usize, 4 }
 #[cfg(target_pointer_width = "64")]
-impl_to_bytes! { SimdUsize, 8 }
+impl_to_bytes! { usize, 8 }

-impl_to_bytes! { SimdI8, 1 }
-impl_to_bytes! { SimdI16, 2 }
-impl_to_bytes! { SimdI32, 4 }
-impl_to_bytes! { SimdI64, 8 }
+impl_to_bytes! { i8, 1 }
+impl_to_bytes! { i16, 2 }
+impl_to_bytes! { i32, 4 }
+impl_to_bytes! { i64, 8 }
 #[cfg(target_pointer_width = "32")]
-impl_to_bytes! { SimdIsize, 4 }
+impl_to_bytes! { isize, 4 }
 #[cfg(target_pointer_width = "64")]
-impl_to_bytes! { SimdIsize, 8 }
+impl_to_bytes! { isize, 8 }
--- a/crates/core_simd/src/transmute.rs
+++ b/crates/core_simd/src/transmute.rs
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@ -1,6 +1,3 @@
-#[macro_use]
-mod vector_impl;
-
 mod float;
 mod int;
 mod uint;
@ -12,21 +9,399 @@ pub use uint::*;
 // Vectors of pointers are not for public use at the current time.
 pub(crate) mod ptr;

+use crate::{LaneCount, Mask, MaskElement, SupportedLaneCount};
+
+/// A SIMD vector of `LANES` elements of type `T`.
+#[repr(simd)]
+pub struct Simd<T, const LANES: usize>([T; LANES])
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    /// Construct a SIMD vector by setting all lanes to the given value.
+    pub const fn splat(value: T) -> Self {
+        Self([value; LANES])
+    }
+
+    /// Returns an array reference containing the entire SIMD vector.
+    pub const fn as_array(&self) -> &[T; LANES] {
+        &self.0
+    }
+
+    /// Returns a mutable array reference containing the entire SIMD vector.
+    pub fn as_mut_array(&mut self) -> &mut [T; LANES] {
+        &mut self.0
+    }
+
+    /// Converts an array to a SIMD vector.
+    pub const fn from_array(array: [T; LANES]) -> Self {
+        Self(array)
+    }
+
+    /// Converts a SIMD vector to an array.
+    pub const fn to_array(self) -> [T; LANES] {
+        self.0
+    }
+
+    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+    /// If an index is out of bounds, that lane instead selects the value from the "or" vector.
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core_simd::*;
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+    /// let alt = Simd::from_array([-5, -4, -3, -2]);
+    ///
+    /// let result = Simd::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
+    /// assert_eq!(result, Simd::from_array([-5, 13, 10, 15]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn gather_or(slice: &[T], idxs: Simd<usize, LANES>, or: Self) -> Self {
+        Self::gather_select(slice, Mask::splat(true), idxs, or)
+    }
+
+    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+    /// Out-of-bounds indices instead use the default value for that lane (0).
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core_simd::*;
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+    ///
+    /// let result = Simd::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
+    /// assert_eq!(result, Simd::from_array([0, 13, 10, 15]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn gather_or_default(slice: &[T], idxs: Simd<usize, LANES>) -> Self
+    where
+        T: Default,
+    {
+        Self::gather_or(slice, idxs, Self::splat(T::default()))
+    }
+
+    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+    /// Out-of-bounds or masked indices instead select the value from the "or" vector.
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core_simd::*;
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+    /// let alt = Simd::from_array([-5, -4, -3, -2]);
+    /// let mask = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+    ///
+    /// let result = Simd::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
+    /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2]));
+    /// ```
+    #[must_use]
+    #[inline]
+    pub fn gather_select(
+        slice: &[T],
+        mask: Mask<isize, LANES>,
+        idxs: Simd<usize, LANES>,
+        or: Self,
+    ) -> Self {
+        let mask = (mask & idxs.lanes_lt(Simd::splat(slice.len()))).to_int();
+        let base_ptr = crate::vector::ptr::SimdConstPtr::splat(slice.as_ptr());
+        // Ferris forgive me, I have done pointer arithmetic here.
+        let ptrs = base_ptr.wrapping_add(idxs);
+        // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+        unsafe { crate::intrinsics::simd_gather(or, ptrs, mask) }
+    }
+
+    /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
+    /// Out-of-bounds indices are not written.
+    /// `scatter` writes "in order", so if an index receives two writes, only the last is guaranteed.
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core_simd::*;
+    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 0]);
+    /// let vals = Simd::from_array([-27, 82, -41, 124]);
+    ///
+    /// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
+    /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
+    /// ```
+    #[inline]
+    pub fn scatter(self, slice: &mut [T], idxs: Simd<usize, LANES>) {
+        self.scatter_select(slice, Mask::splat(true), idxs)
+    }
+
+    /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
+    /// Out-of-bounds or masked indices are not written.
+    /// `scatter_select` writes "in order", so if an index receives two writes, only the last is guaranteed.
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core_simd::*;
+    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = Simd::from_array([9, 3, 0, 0]);
+    /// let vals = Simd::from_array([-27, 82, -41, 124]);
+    /// let mask = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+    ///
+    /// vals.scatter_select(&mut vec, mask, idxs); // index 0's second write is masked, thus omitted.
+    /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
+    /// ```
+    #[inline]
+    pub fn scatter_select(
+        self,
+        slice: &mut [T],
+        mask: Mask<isize, LANES>,
+        idxs: Simd<usize, LANES>,
+    ) {
+        // We must construct our scatter mask before we derive a pointer!
+        let mask = (mask & idxs.lanes_lt(Simd::splat(slice.len()))).to_int();
+        // SAFETY: This block works with *mut T derived from &mut 'a [T],
+        // which means it is delicate in Rust's borrowing model, circa 2021:
+        // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
+        // Even though this block is largely safe methods, it must be almost exactly this way
+        // to prevent invalidating the raw ptrs while they're live.
+        // Thus, entering this block requires all values to use being already ready:
+        // 0. idxs we want to write to, which are used to construct the mask.
+        // 1. mask, which depends on an initial &'a [T] and the idxs.
+        // 2. actual values to scatter (self).
+        // 3. &mut [T] which will become our base ptr.
+        unsafe {
+            // Now Entering ☢️ *mut T Zone
+            let base_ptr = crate::vector::ptr::SimdMutPtr::splat(slice.as_mut_ptr());
+            // Ferris forgive me, I have done pointer arithmetic here.
+            let ptrs = base_ptr.wrapping_add(idxs);
+            // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
+            crate::intrinsics::simd_scatter(self, ptrs, mask)
+            // Cleared ☢️ *mut T Zone
+        }
+    }
+}
+
+impl<T, const LANES: usize> Copy for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Simd<T, LANES>
+where
+    T: SimdElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T, const LANES: usize> Default for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + Default,
+{
+    #[inline]
+    fn default() -> Self {
+        Self::splat(T::default())
+    }
+}
+
+impl<T, const LANES: usize> PartialEq for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + PartialEq,
+{
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        // TODO use SIMD equality
+        self.to_array() == other.to_array()
+    }
+}
+
+impl<T, const LANES: usize> PartialOrd for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + PartialOrd,
+{
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        // TODO use SIMD equality
+        self.to_array().partial_cmp(other.as_ref())
+    }
+}
+
+impl<T, const LANES: usize> Eq for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + Eq,
+{
+}
+
+impl<T, const LANES: usize> Ord for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + Ord,
+{
+    #[inline]
+    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+        // TODO use SIMD equality
+        self.to_array().cmp(other.as_ref())
+    }
+}
+
+impl<T, const LANES: usize> core::hash::Hash for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement + core::hash::Hash,
+{
+    #[inline]
+    fn hash<H>(&self, state: &mut H)
+    where
+        H: core::hash::Hasher,
+    {
+        self.as_array().hash(state)
+    }
+}
+
+// array references
+impl<T, const LANES: usize> AsRef<[T; LANES]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    #[inline]
+    fn as_ref(&self) -> &[T; LANES] {
+        &self.0
+    }
+}
+
+impl<T, const LANES: usize> AsMut<[T; LANES]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    #[inline]
+    fn as_mut(&mut self) -> &mut [T; LANES] {
+        &mut self.0
+    }
+}
+
+// slice references
+impl<T, const LANES: usize> AsRef<[T]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    #[inline]
+    fn as_ref(&self) -> &[T] {
+        &self.0
+    }
+}
+
+impl<T, const LANES: usize> AsMut<[T]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    #[inline]
+    fn as_mut(&mut self) -> &mut [T] {
+        &mut self.0
+    }
+}
+
+// vector/array conversion
+impl<T, const LANES: usize> From<[T; LANES]> for Simd<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    fn from(array: [T; LANES]) -> Self {
+        Self(array)
+    }
+}
+
+impl<T, const LANES: usize> From<Simd<T, LANES>> for [T; LANES]
+where
+    LaneCount<LANES>: SupportedLaneCount,
+    T: SimdElement,
+{
+    fn from(vector: Simd<T, LANES>) -> Self {
+        vector.to_array()
+    }
+}
+
 mod sealed {
    pub trait Sealed {}
 }
+use sealed::Sealed;

-/// A representation of a vector as an "array" with indices, implementing
-/// operations applicable to any vector type based solely on "having lanes",
-/// and describing relationships between vector and scalar types.
-pub trait Vector: sealed::Sealed {
-    /// The scalar type in every lane of this vector type.
-    type Scalar: Copy + Sized;
-
-    /// The number of lanes for this vector.
-    const LANES: usize;
-
-    /// Generates a SIMD vector with the same value in every lane.
-    #[must_use]
-    fn splat(val: Self::Scalar) -> Self;
+/// Marker trait for types that may be used as SIMD vector elements.
+/// SAFETY: This trait, when implemented, asserts the compiler can monomorphize
+/// `#[repr(simd)]` structs with the marked type as an element.
+/// Strictly, it is valid to impl if the vector will not be miscompiled.
+/// Practically, it is user-unfriendly to impl it if the vector won't compile,
+/// even when no soundness guarantees are broken by allowing the user to try.
+pub unsafe trait SimdElement: Sealed + Copy {
+    /// The mask element type corresponding to this element type.
+    type Mask: MaskElement;
+}
+
+impl Sealed for u8 {}
+unsafe impl SimdElement for u8 {
+    type Mask = i8;
+}
+
+impl Sealed for u16 {}
+unsafe impl SimdElement for u16 {
+    type Mask = i16;
+}
+
+impl Sealed for u32 {}
+unsafe impl SimdElement for u32 {
+    type Mask = i32;
+}
+
+impl Sealed for u64 {}
+unsafe impl SimdElement for u64 {
+    type Mask = i64;
+}
+
+impl Sealed for usize {}
+unsafe impl SimdElement for usize {
+    type Mask = isize;
+}
+
+impl Sealed for i8 {}
+unsafe impl SimdElement for i8 {
+    type Mask = i8;
+}
+
+impl Sealed for i16 {}
+unsafe impl SimdElement for i16 {
+    type Mask = i16;
+}
+
+impl Sealed for i32 {}
+unsafe impl SimdElement for i32 {
+    type Mask = i32;
+}
+
+impl Sealed for i64 {}
+unsafe impl SimdElement for i64 {
+    type Mask = i64;
+}
+
+impl Sealed for isize {}
+unsafe impl SimdElement for isize {
+    type Mask = isize;
+}
+
+impl Sealed for f32 {}
+unsafe impl SimdElement for f32 {
+    type Mask = i32;
+}
+
+impl Sealed for f64 {}
+unsafe impl SimdElement for f64 {
+    type Mask = i64;
 }
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@ -1,32 +1,29 @@
 #![allow(non_camel_case_types)]

-use crate::{LaneCount, SupportedLaneCount};
+use crate::{LaneCount, Mask, Simd, SupportedLaneCount};

-/// Implements inherent methods for a float vector `$name` containing multiple
+/// Implements inherent methods for a float vector containing multiple
 /// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
-/// representation. Called from `define_float_vector!`.
+/// representation.
 macro_rules! impl_float_vector {
-    { $name:ident, $type:ident, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => {
-        impl_vector! { $name, $type }
-        impl_float_reductions! { $name, $type }
-
-        impl<const LANES: usize> $name<LANES>
+    { $type:ty, $bits_ty:ty, $mask_ty:ty } => {
+        impl<const LANES: usize> Simd<$type, LANES>
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
            /// Raw transmutation to an unsigned integer vector type with the
            /// same size and number of lanes.
            #[inline]
-            pub fn to_bits(self) -> crate::$bits_ty<LANES> {
-                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<crate::$bits_ty<LANES>>());
+            pub fn to_bits(self) -> Simd<$bits_ty, LANES> {
+                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Simd<$bits_ty, LANES>>());
                unsafe { core::mem::transmute_copy(&self) }
            }

            /// Raw transmutation from an unsigned integer vector type with the
            /// same size and number of lanes.
            #[inline]
-            pub fn from_bits(bits: crate::$bits_ty<LANES>) -> Self {
-                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<crate::$bits_ty<LANES>>());
+            pub fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self {
+                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Simd<$bits_ty, LANES>>());
                unsafe { core::mem::transmute_copy(&bits) }
            }

@ -67,58 +64,58 @@ macro_rules! impl_float_vector {
            #[inline]
            pub fn to_degrees(self) -> Self {
                // to_degrees uses a special constant for better precision, so extract that constant
-                self * Self::splat($type::to_degrees(1.))
+                self * Self::splat(<$type>::to_degrees(1.))
            }

            /// Converts each lane from degrees to radians.
            #[inline]
            pub fn to_radians(self) -> Self {
-                self * Self::splat($type::to_radians(1.))
+                self * Self::splat(<$type>::to_radians(1.))
            }

            /// Returns true for each lane if it has a positive sign, including
            /// `+0.0`, `NaN`s with positive sign bit and positive infinity.
            #[inline]
-            pub fn is_sign_positive(self) -> crate::$mask_ty<LANES> {
+            pub fn is_sign_positive(self) -> Mask<$mask_ty, LANES> {
                !self.is_sign_negative()
            }

            /// Returns true for each lane if it has a negative sign, including
            /// `-0.0`, `NaN`s with negative sign bit and negative infinity.
            #[inline]
-            pub fn is_sign_negative(self) -> crate::$mask_ty<LANES> {
-                let sign_bits = self.to_bits() & crate::$bits_ty::splat((!0 >> 1) + 1);
-                sign_bits.lanes_gt(crate::$bits_ty::splat(0))
+            pub fn is_sign_negative(self) -> Mask<$mask_ty, LANES> {
+                let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1);
+                sign_bits.lanes_gt(Simd::splat(0))
            }

            /// Returns true for each lane if its value is `NaN`.
            #[inline]
-            pub fn is_nan(self) -> crate::$mask_ty<LANES> {
+            pub fn is_nan(self) -> Mask<$mask_ty, LANES> {
                self.lanes_ne(self)
            }

            /// Returns true for each lane if its value is positive infinity or negative infinity.
            #[inline]
-            pub fn is_infinite(self) -> crate::$mask_ty<LANES> {
+            pub fn is_infinite(self) -> Mask<$mask_ty, LANES> {
                self.abs().lanes_eq(Self::splat(<$type>::INFINITY))
            }

            /// Returns true for each lane if its value is neither infinite nor `NaN`.
            #[inline]
-            pub fn is_finite(self) -> crate::$mask_ty<LANES> {
+            pub fn is_finite(self) -> Mask<$mask_ty, LANES> {
                self.abs().lanes_lt(Self::splat(<$type>::INFINITY))
            }

            /// Returns true for each lane if its value is subnormal.
            #[inline]
-            pub fn is_subnormal(self) -> crate::$mask_ty<LANES> {
-                self.abs().lanes_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).lanes_eq(crate::$bits_ty::splat(0))
+            pub fn is_subnormal(self) -> Mask<$mask_ty, LANES> {
+                self.abs().lanes_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).lanes_eq(Simd::splat(0))
            }

            /// Returns true for each lane if its value is neither neither zero, infinite,
            /// subnormal, or `NaN`.
            #[inline]
-            pub fn is_normal(self) -> crate::$mask_ty<LANES> {
+            pub fn is_normal(self) -> Mask<$mask_ty, LANES> {
                !(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
            }

@ -129,7 +126,7 @@ macro_rules! impl_float_vector {
            /// * `NAN` if the number is `NAN`
            #[inline]
            pub fn signum(self) -> Self {
-                self.is_nan().select(Self::splat($type::NAN), Self::splat(1.0).copysign(self))
+                self.is_nan().select(Self::splat(<$type>::NAN), Self::splat(1.0).copysign(self))
            }

            /// Returns each lane with the magnitude of `self` and the sign of `sign`.
@ -186,39 +183,26 @@ macro_rules! impl_float_vector {
    };
 }

-/// A SIMD vector of containing `LANES` `f32` values.
-#[repr(simd)]
-pub struct SimdF32<const LANES: usize>([f32; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_float_vector! { SimdF32, f32, SimdU32, Mask32, SimdI32 }
-
-/// A SIMD vector of containing `LANES` `f64` values.
-#[repr(simd)]
-pub struct SimdF64<const LANES: usize>([f64; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_float_vector! { SimdF64, f64, SimdU64, Mask64, SimdI64 }
+impl_float_vector! { f32, u32, i32 }
+impl_float_vector! { f64, u64, i64 }

 /// Vector of two `f32` values
-pub type f32x2 = SimdF32<2>;
+pub type f32x2 = Simd<f32, 2>;

 /// Vector of four `f32` values
-pub type f32x4 = SimdF32<4>;
+pub type f32x4 = Simd<f32, 4>;

 /// Vector of eight `f32` values
-pub type f32x8 = SimdF32<8>;
+pub type f32x8 = Simd<f32, 8>;

 /// Vector of 16 `f32` values
-pub type f32x16 = SimdF32<16>;
+pub type f32x16 = Simd<f32, 16>;

 /// Vector of two `f64` values
-pub type f64x2 = SimdF64<2>;
+pub type f64x2 = Simd<f64, 2>;

 /// Vector of four `f64` values
-pub type f64x4 = SimdF64<4>;
+pub type f64x4 = Simd<f64, 4>;

 /// Vector of eight `f64` values
-pub type f64x8 = SimdF64<8>;
+pub type f64x8 = Simd<f64, 8>;
--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@ -1,49 +1,23 @@
 #![allow(non_camel_case_types)]

-use crate::{LaneCount, SupportedLaneCount};
+use crate::{LaneCount, Mask, Simd, SupportedLaneCount};

 /// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
 macro_rules! impl_integer_vector {
-    { $name:ident, $type:ty, $mask_ty:ident, $mask_impl_ty:ident } => {
-        impl_vector! { $name, $type }
-        impl_integer_reductions! { $name, $type }
-
-        impl<const LANES: usize> Eq for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {}
-
-        impl<const LANES: usize> Ord for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {
-            #[inline]
-            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
-                // TODO use SIMD cmp
-                self.as_array().cmp(other.as_ref())
-            }
-        }
-
-        impl<const LANES: usize> core::hash::Hash for $name<LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-            #[inline]
-            fn hash<H>(&self, state: &mut H)
-            where
-                H: core::hash::Hasher
-            {
-                self.as_array().hash(state)
-            }
-        }
-
-        impl<const LANES: usize> $name<LANES>
+    { $type:ty } => {
+        impl<const LANES: usize> Simd<$type, LANES>
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
            /// Returns true for each positive lane and false if it is zero or negative.
            #[inline]
-            pub fn is_positive(self) -> crate::$mask_ty<LANES> {
+            pub fn is_positive(self) -> Mask<$type, LANES> {
                self.lanes_gt(Self::splat(0))
            }

            /// Returns true for each negative lane and false if it is zero or positive.
            #[inline]
-            pub fn is_negative(self) -> crate::$mask_ty<LANES> {
+            pub fn is_negative(self) -> Mask<$type, LANES> {
                self.lanes_lt(Self::splat(0))
            }

@ -62,102 +36,68 @@ macro_rules! impl_integer_vector {
    }
 }

-/// A SIMD vector of containing `LANES` `isize` values.
-#[repr(simd)]
-pub struct SimdIsize<const LANES: usize>([isize; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_integer_vector! { SimdIsize, isize, MaskSize, SimdIsize }
-
-/// A SIMD vector of containing `LANES` `i16` values.
-#[repr(simd)]
-pub struct SimdI16<const LANES: usize>([i16; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_integer_vector! { SimdI16, i16, Mask16, SimdI16 }
-
-/// A SIMD vector of containing `LANES` `i32` values.
-#[repr(simd)]
-pub struct SimdI32<const LANES: usize>([i32; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_integer_vector! { SimdI32, i32, Mask32, SimdI32 }
-
-/// A SIMD vector of containing `LANES` `i64` values.
-#[repr(simd)]
-pub struct SimdI64<const LANES: usize>([i64; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_integer_vector! { SimdI64, i64, Mask64, SimdI64 }
-
-/// A SIMD vector of containing `LANES` `i8` values.
-#[repr(simd)]
-pub struct SimdI8<const LANES: usize>([i8; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_integer_vector! { SimdI8, i8, Mask8, SimdI8 }
+impl_integer_vector! { isize }
+impl_integer_vector! { i16 }
+impl_integer_vector! { i32 }
+impl_integer_vector! { i64 }
+impl_integer_vector! { i8 }

 /// Vector of two `isize` values
-pub type isizex2 = SimdIsize<2>;
+pub type isizex2 = Simd<isize, 2>;

 /// Vector of four `isize` values
-pub type isizex4 = SimdIsize<4>;
+pub type isizex4 = Simd<isize, 4>;

 /// Vector of eight `isize` values
-pub type isizex8 = SimdIsize<8>;
+pub type isizex8 = Simd<isize, 8>;

 /// Vector of two `i16` values
-pub type i16x2 = SimdI16<2>;
+pub type i16x2 = Simd<i16, 2>;

 /// Vector of four `i16` values
-pub type i16x4 = SimdI16<4>;
+pub type i16x4 = Simd<i16, 4>;

 /// Vector of eight `i16` values
-pub type i16x8 = SimdI16<8>;
+pub type i16x8 = Simd<i16, 8>;

 /// Vector of 16 `i16` values
-pub type i16x16 = SimdI16<16>;
+pub type i16x16 = Simd<i16, 16>;

 /// Vector of 32 `i16` values
-pub type i16x32 = SimdI16<32>;
+pub type i16x32 = Simd<i16, 32>;

 /// Vector of two `i32` values
-pub type i32x2 = SimdI32<2>;
+pub type i32x2 = Simd<i32, 2>;

 /// Vector of four `i32` values
-pub type i32x4 = SimdI32<4>;
+pub type i32x4 = Simd<i32, 4>;

 /// Vector of eight `i32` values
-pub type i32x8 = SimdI32<8>;
+pub type i32x8 = Simd<i32, 8>;

 /// Vector of 16 `i32` values
-pub type i32x16 = SimdI32<16>;
+pub type i32x16 = Simd<i32, 16>;

 /// Vector of two `i64` values
-pub type i64x2 = SimdI64<2>;
+pub type i64x2 = Simd<i64, 2>;

 /// Vector of four `i64` values
-pub type i64x4 = SimdI64<4>;
+pub type i64x4 = Simd<i64, 4>;

 /// Vector of eight `i64` values
-pub type i64x8 = SimdI64<8>;
+pub type i64x8 = Simd<i64, 8>;

 /// Vector of four `i8` values
-pub type i8x4 = SimdI8<4>;
+pub type i8x4 = Simd<i8, 4>;

 /// Vector of eight `i8` values
-pub type i8x8 = SimdI8<8>;
+pub type i8x8 = Simd<i8, 8>;

 /// Vector of 16 `i8` values
-pub type i8x16 = SimdI8<16>;
+pub type i8x16 = Simd<i8, 16>;

 /// Vector of 32 `i8` values
-pub type i8x32 = SimdI8<32>;
+pub type i8x32 = Simd<i8, 32>;

 /// Vector of 64 `i8` values
-pub type i8x64 = SimdI8<64>;
+pub type i8x64 = Simd<i8, 64>;
--- a/crates/core_simd/src/vector/ptr.rs
+++ b/crates/core_simd/src/vector/ptr.rs
@ -1,5 +1,5 @@
 //! Private implementation details of public gather/scatter APIs.
-use crate::{LaneCount, SimdUsize, SupportedLaneCount};
+use crate::{LaneCount, Simd, SupportedLaneCount};
 use core::mem;

 /// A vector of *const T.
@ -20,9 +20,9 @@ where

    #[inline]
    #[must_use]
-    pub fn wrapping_add(self, addend: SimdUsize<LANES>) -> Self {
+    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
        unsafe {
-            let x: SimdUsize<LANES> = mem::transmute_copy(&self);
+            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
            mem::transmute_copy(&{ x + (addend * mem::size_of::<T>()) })
        }
    }
@ -46,9 +46,9 @@ where

    #[inline]
    #[must_use]
-    pub fn wrapping_add(self, addend: SimdUsize<LANES>) -> Self {
+    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
        unsafe {
-            let x: SimdUsize<LANES> = mem::transmute_copy(&self);
+            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
            mem::transmute_copy(&{ x + (addend * mem::size_of::<T>()) })
        }
    }
--- a/crates/core_simd/src/vector/uint.rs
+++ b/crates/core_simd/src/vector/uint.rs
@ -1,134 +1,63 @@
 #![allow(non_camel_case_types)]

-use crate::{LaneCount, SupportedLaneCount};
-
-/// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
-macro_rules! impl_unsigned_vector {
-    { $name:ident, $type:ty } => {
-        impl_vector! { $name, $type }
-        impl_integer_reductions! { $name, $type }
-
-        impl<const LANES: usize> Eq for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {}
-
-        impl<const LANES: usize> Ord for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {
-            #[inline]
-            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
-                // TODO use SIMD cmp
-                self.as_array().cmp(other.as_ref())
-            }
-        }
-
-        impl<const LANES: usize> core::hash::Hash for $name<LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-            #[inline]
-            fn hash<H>(&self, state: &mut H)
-            where
-                H: core::hash::Hasher
-            {
-                self.as_array().hash(state)
-            }
-        }
-    }
-}
-
-/// A SIMD vector of containing `LANES` `usize` values.
-#[repr(simd)]
-pub struct SimdUsize<const LANES: usize>([usize; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_unsigned_vector! { SimdUsize, usize }
-
-/// A SIMD vector of containing `LANES` `u16` values.
-#[repr(simd)]
-pub struct SimdU16<const LANES: usize>([u16; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_unsigned_vector! { SimdU16, u16 }
-
-/// A SIMD vector of containing `LANES` `u32` values.
-#[repr(simd)]
-pub struct SimdU32<const LANES: usize>([u32; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_unsigned_vector! { SimdU32, u32 }
-
-/// A SIMD vector of containing `LANES` `u64` values.
-#[repr(simd)]
-pub struct SimdU64<const LANES: usize>([u64; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_unsigned_vector! { SimdU64, u64 }
-
-/// A SIMD vector of containing `LANES` `u8` values.
-#[repr(simd)]
-pub struct SimdU8<const LANES: usize>([u8; LANES])
-where
-    LaneCount<LANES>: SupportedLaneCount;
-
-impl_unsigned_vector! { SimdU8, u8 }
+use crate::Simd;

 /// Vector of two `usize` values
-pub type usizex2 = SimdUsize<2>;
+pub type usizex2 = Simd<usize, 2>;

 /// Vector of four `usize` values
-pub type usizex4 = SimdUsize<4>;
+pub type usizex4 = Simd<usize, 4>;

 /// Vector of eight `usize` values
-pub type usizex8 = SimdUsize<8>;
+pub type usizex8 = Simd<usize, 8>;

 /// Vector of two `u16` values
-pub type u16x2 = SimdU16<2>;
+pub type u16x2 = Simd<u16, 2>;

 /// Vector of four `u16` values
-pub type u16x4 = SimdU16<4>;
+pub type u16x4 = Simd<u16, 4>;

 /// Vector of eight `u16` values
-pub type u16x8 = SimdU16<8>;
+pub type u16x8 = Simd<u16, 8>;

 /// Vector of 16 `u16` values
-pub type u16x16 = SimdU16<16>;
+pub type u16x16 = Simd<u16, 16>;

 /// Vector of 32 `u16` values
-pub type u16x32 = SimdU16<32>;
+pub type u16x32 = Simd<u16, 32>;

 /// Vector of two `u32` values
-pub type u32x2 = SimdU32<2>;
+pub type u32x2 = Simd<u32, 2>;

 /// Vector of four `u32` values
-pub type u32x4 = SimdU32<4>;
+pub type u32x4 = Simd<u32, 4>;

 /// Vector of eight `u32` values
-pub type u32x8 = SimdU32<8>;
+pub type u32x8 = Simd<u32, 8>;

 /// Vector of 16 `u32` values
-pub type u32x16 = SimdU32<16>;
+pub type u32x16 = Simd<u32, 16>;

 /// Vector of two `u64` values
-pub type u64x2 = SimdU64<2>;
+pub type u64x2 = Simd<u64, 2>;

 /// Vector of four `u64` values
-pub type u64x4 = SimdU64<4>;
+pub type u64x4 = Simd<u64, 4>;

 /// Vector of eight `u64` values
-pub type u64x8 = SimdU64<8>;
+pub type u64x8 = Simd<u64, 8>;

 /// Vector of four `u8` values
-pub type u8x4 = SimdU8<4>;
+pub type u8x4 = Simd<u8, 4>;

 /// Vector of eight `u8` values
-pub type u8x8 = SimdU8<8>;
+pub type u8x8 = Simd<u8, 8>;

 /// Vector of 16 `u8` values
-pub type u8x16 = SimdU8<16>;
+pub type u8x16 = Simd<u8, 16>;

 /// Vector of 32 `u8` values
-pub type u8x32 = SimdU8<32>;
+pub type u8x32 = Simd<u8, 32>;

 /// Vector of 64 `u8` values
-pub type u8x64 = SimdU8<64>;
+pub type u8x64 = Simd<u8, 64>;
--- a/crates/core_simd/src/vector/vector_impl.rs
+++ b/crates/core_simd/src/vector/vector_impl.rs
@ -1,257 +0,0 @@
-/// Implements common traits on the specified vector `$name`, holding multiple `$lanes` of `$type`.
-macro_rules! impl_vector {
-    { $name:ident, $type:ty } => {
-        impl<const LANES: usize> crate::vector::sealed::Sealed for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {}
-
-        impl<const LANES: usize> crate::vector::Vector for $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            type Scalar = $type;
-            const LANES: usize = LANES;
-
-            #[inline]
-            fn splat(val: Self::Scalar) -> Self {
-                Self::splat(val)
-            }
-        }
-
-        impl<const LANES: usize> $name<LANES>
-        where
-            crate::LaneCount<LANES>: crate::SupportedLaneCount,
-        {
-            /// Construct a SIMD vector by setting all lanes to the given value.
-            pub const fn splat(value: $type) -> Self {
-                Self([value; LANES])
-            }
-
-            /// Returns an array reference containing the entire SIMD vector.
-            pub const fn as_array(&self) -> &[$type; LANES] {
-                &self.0
-            }
-
-            /// Returns a mutable array reference containing the entire SIMD vector.
-            pub fn as_mut_array(&mut self) -> &mut [$type; LANES] {
-                &mut self.0
-            }
-
-            /// Converts an array to a SIMD vector.
-            pub const fn from_array(array: [$type; LANES]) -> Self {
-                Self(array)
-            }
-
-            /// Converts a SIMD vector to an array.
-            pub const fn to_array(self) -> [$type; LANES] {
-                self.0
-            }
-
-            /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
-            /// If an index is out of bounds, that lane instead selects the value from the "or" vector.
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core_simd::*;
-            /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
-            /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
-            ///
-            /// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
-            /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15]));
-            /// ```
-            #[must_use]
-            #[inline]
-            pub fn gather_or(slice: &[$type], idxs: crate::SimdUsize<LANES>, or: Self) -> Self {
-                Self::gather_select(slice, crate::MaskSize::splat(true), idxs, or)
-            }
-
-            /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
-            /// Out-of-bounds indices instead use the default value for that lane (0).
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core_simd::*;
-            /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
-            ///
-            /// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
-            /// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15]));
-            /// ```
-            #[must_use]
-            #[inline]
-            pub fn gather_or_default(slice: &[$type], idxs: crate::SimdUsize<LANES>) -> Self {
-                Self::gather_or(slice, idxs, Self::splat(<$type>::default()))
-            }
-
-            /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
-            /// Out-of-bounds or masked indices instead select the value from the "or" vector.
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core_simd::*;
-            /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
-            /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
-            /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
-            ///
-            /// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
-            /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2]));
-            /// ```
-            #[must_use]
-            #[inline]
-            pub fn gather_select(
-                slice: &[$type],
-                mask: crate::MaskSize<LANES>,
-                idxs: crate::SimdUsize<LANES>,
-                or: Self,
-            ) -> Self
-            {
-                let mask = (mask & idxs.lanes_lt(crate::SimdUsize::splat(slice.len()))).to_int();
-                let base_ptr = crate::vector::ptr::SimdConstPtr::splat(slice.as_ptr());
-                // Ferris forgive me, I have done pointer arithmetic here.
-                let ptrs = base_ptr.wrapping_add(idxs);
-                // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
-                unsafe { crate::intrinsics::simd_gather(or, ptrs, mask) }
-            }
-
-            /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
-            /// Out-of-bounds indices are not written.
-            /// `scatter` writes "in order", so if an index receives two writes, only the last is guaranteed.
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core_simd::*;
-            /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
-            /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
-            ///
-            /// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
-            /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
-            /// ```
-            #[inline]
-            pub fn scatter(self, slice: &mut [$type], idxs: crate::SimdUsize<LANES>) {
-                self.scatter_select(slice, crate::MaskSize::splat(true), idxs)
-            }
-
-            /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
-            /// Out-of-bounds or masked indices are not written.
-            /// `scatter_select` writes "in order", so if an index receives two writes, only the last is guaranteed.
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core_simd::*;
-            /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
-            /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
-            /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
-            ///
-            /// vals.scatter_select(&mut vec, mask, idxs); // index 0's second write is masked, thus omitted.
-            /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
-            /// ```
-            #[inline]
-            pub fn scatter_select(
-                self,
-                slice: &mut [$type],
-                mask: crate::MaskSize<LANES>,
-                idxs: crate::SimdUsize<LANES>,
-            )
-            {
-                // We must construct our scatter mask before we derive a pointer!
-                let mask = (mask & idxs.lanes_lt(crate::SimdUsize::splat(slice.len()))).to_int();
-                // SAFETY: This block works with *mut T derived from &mut 'a [T],
-                // which means it is delicate in Rust's borrowing model, circa 2021:
-                // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
-                // Even though this block is largely safe methods, it must be almost exactly this way
-                // to prevent invalidating the raw ptrs while they're live.
-                // Thus, entering this block requires all values to use being already ready:
-                // 0. idxs we want to write to, which are used to construct the mask.
-                // 1. mask, which depends on an initial &'a [T] and the idxs.
-                // 2. actual values to scatter (self).
-                // 3. &mut [T] which will become our base ptr.
-                unsafe {
-                    // Now Entering ☢️ *mut T Zone
-                    let base_ptr = crate::vector::ptr::SimdMutPtr::splat(slice.as_mut_ptr());
-                    // Ferris forgive me, I have done pointer arithmetic here.
-                    let ptrs = base_ptr.wrapping_add(idxs);
-                    // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
-                    crate::intrinsics::simd_scatter(self, ptrs, mask)
-                    // Cleared ☢️ *mut T Zone
-                }
-            }
-        }
-
-        impl<const LANES: usize> Copy for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {}
-
-        impl<const LANES: usize> Clone for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            #[inline]
-            fn clone(&self) -> Self {
-                *self
-            }
-        }
-
-        impl<const LANES: usize> Default for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            #[inline]
-            fn default() -> Self {
-                Self::splat(<$type>::default())
-            }
-        }
-
-        impl<const LANES: usize> PartialEq for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            #[inline]
-            fn eq(&self, other: &Self) -> bool {
-                // TODO use SIMD equality
-                self.to_array() == other.to_array()
-            }
-        }
-
-        impl<const LANES: usize> PartialOrd for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            #[inline]
-            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
-                // TODO use SIMD equalitya
-                self.to_array().partial_cmp(other.as_ref())
-            }
-        }
-
-        // array references
-        impl<const LANES: usize> AsRef<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            #[inline]
-            fn as_ref(&self) -> &[$type; LANES] {
-                &self.0
-            }
-        }
-
-        impl<const LANES: usize> AsMut<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            #[inline]
-            fn as_mut(&mut self) -> &mut [$type; LANES] {
-                &mut self.0
-            }
-        }
-
-        // slice references
-        impl<const LANES: usize> AsRef<[$type]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            #[inline]
-            fn as_ref(&self) -> &[$type] {
-                &self.0
-            }
-        }
-
-        impl<const LANES: usize> AsMut<[$type]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            #[inline]
-            fn as_mut(&mut self) -> &mut [$type] {
-                &mut self.0
-            }
-        }
-
-        // vector/array conversion
-        impl<const LANES: usize> From<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            fn from(array: [$type; LANES]) -> Self {
-                Self(array)
-            }
-        }
-
-        impl <const LANES: usize> From<$name<LANES>> for [$type; LANES] where crate::LaneCount<LANES>: crate::SupportedLaneCount {
-            fn from(vector: $name<LANES>) -> Self {
-                vector.to_array()
-            }
-        }
-
-        impl_shuffle_2pow_lanes!{ $name }
-    }
-}
--- a/crates/core_simd/src/vendor/arm.rs
+++ b/crates/core_simd/src/vendor/arm.rs
@ -28,26 +28,26 @@ from_transmute! { unsafe u32x4 => uint32x4_t }
 from_transmute! { unsafe i32x2 => int32x2_t }
 from_transmute! { unsafe i32x4 => int32x4_t }

-from_transmute! { unsafe SimdU64<1> => uint64x1_t }
+from_transmute! { unsafe Simd<u64, 1> => uint64x1_t }
 from_transmute! { unsafe u64x2 => uint64x2_t }
-from_transmute! { unsafe SimdI64<1> => int64x1_t }
+from_transmute! { unsafe Simd<i64, 1> => int64x1_t }
 from_transmute! { unsafe i64x2 => int64x2_t }
-from_transmute! { unsafe SimdU64<1> => poly64x1_t }
+from_transmute! { unsafe Simd<u64, 1> => poly64x1_t }
 from_transmute! { unsafe u64x2 => poly64x2_t }

 #[cfg(target_arch = "arm")]
 mod arm {
    use super::*;
-    from_transmute! { unsafe SimdU8<4> => uint8x4_t }
-    from_transmute! { unsafe SimdI8<4> => int8x4_t }
+    from_transmute! { unsafe Simd<u8, 4> => uint8x4_t }
+    from_transmute! { unsafe Simd<i8, 4> => int8x4_t }

-    from_transmute! { unsafe SimdU16<2> => uint16x2_t }
-    from_transmute! { unsafe SimdI16<2> => int16x2_t }
+    from_transmute! { unsafe Simd<u16, 2> => uint16x2_t }
+    from_transmute! { unsafe Simd<i16, 2> => int16x2_t }
 }

 #[cfg(target_arch = "aarch64")]
 mod aarch64 {
    use super::*;
-    from_transmute! { unsafe SimdF64<1> => float64x1_t }
+    from_transmute! { unsafe Simd<f64, 1> => float64x1_t }
    from_transmute! { unsafe f64x2 => float64x2_t }
 }
--- a/crates/core_simd/src/vendor/x86.rs
+++ b/crates/core_simd/src/vendor/x86.rs
@ -45,10 +45,10 @@ mod p32 {
    use super::*;
    from_transmute! { unsafe usizex4 => __m128i }
    from_transmute! { unsafe usizex8 => __m256i }
-    from_transmute! { unsafe SimdUsize<16> => __m512i }
+    from_transmute! { unsafe Simd<usize, 16> => __m512i }
    from_transmute! { unsafe isizex4 => __m128i }
    from_transmute! { unsafe isizex8 => __m256i }
-    from_transmute! { unsafe SimdIsize<16> => __m512i }
+    from_transmute! { unsafe Simd<isize, 16> => __m512i }
 }

 #[cfg(target_pointer_width = "64")]
--- a/crates/core_simd/tests/f32_ops.rs
+++ b/crates/core_simd/tests/f32_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_float_tests! { SimdF32, f32, i32 }
+impl_float_tests! { f32, i32 }
--- a/crates/core_simd/tests/f64_ops.rs
+++ b/crates/core_simd/tests/f64_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_float_tests! { SimdF64, f64, i64 }
+impl_float_tests! { f64, i64 }
--- a/crates/core_simd/tests/i16_ops.rs
+++ b/crates/core_simd/tests/i16_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_signed_tests! { SimdI16, i16 }
+impl_signed_tests! { i16 }
--- a/crates/core_simd/tests/i32_ops.rs
+++ b/crates/core_simd/tests/i32_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_signed_tests! { SimdI32, i32 }
+impl_signed_tests! { i32 }
--- a/crates/core_simd/tests/i64_ops.rs
+++ b/crates/core_simd/tests/i64_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_signed_tests! { SimdI64, i64 }
+impl_signed_tests! { i64 }
--- a/crates/core_simd/tests/i8_ops.rs
+++ b/crates/core_simd/tests/i8_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_signed_tests! { SimdI8, i8 }
+impl_signed_tests! { i8 }
--- a/crates/core_simd/tests/isize_ops.rs
+++ b/crates/core_simd/tests/isize_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_signed_tests! { SimdIsize, isize }
+impl_signed_tests! { isize }
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@ -7,9 +7,9 @@ use wasm_bindgen_test::*;
 wasm_bindgen_test_configure!(run_in_browser);

 macro_rules! test_mask_api {
-    { $name:ident } => {
+    { $type:ident } => {
        #[allow(non_snake_case)]
-        mod $name {
+        mod $type {
            #[cfg(target_arch = "wasm32")]
            use wasm_bindgen_test::*;

@ -17,7 +17,7 @@ macro_rules! test_mask_api {
            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
            fn set_and_test() {
                let values = [true, false, false, true, false, false, true, false];
-                let mut mask = core_simd::$name::<8>::splat(false);
+                let mut mask = core_simd::Mask::<$type, 8>::splat(false);
                for (lane, value) in values.iter().copied().enumerate() {
                    mask.set(lane, value);
                }
@ -29,7 +29,7 @@ macro_rules! test_mask_api {
            #[test]
            #[should_panic]
            fn set_invalid_lane() {
-                let mut mask = core_simd::$name::<8>::splat(false);
+                let mut mask = core_simd::Mask::<$type, 8>::splat(false);
                mask.set(8, true);
                let _ = mask;
            }
@ -37,24 +37,24 @@ macro_rules! test_mask_api {
            #[test]
            #[should_panic]
            fn test_invalid_lane() {
-                let mask = core_simd::$name::<8>::splat(false);
+                let mask = core_simd::Mask::<$type, 8>::splat(false);
                let _ = mask.test(8);
            }

            #[test]
            fn any() {
-                assert!(!core_simd::$name::<8>::splat(false).any());
-                assert!(core_simd::$name::<8>::splat(true).any());
-                let mut v = core_simd::$name::<8>::splat(false);
+                assert!(!core_simd::Mask::<$type, 8>::splat(false).any());
+                assert!(core_simd::Mask::<$type, 8>::splat(true).any());
+                let mut v = core_simd::Mask::<$type, 8>::splat(false);
                v.set(2, true);
                assert!(v.any());
            }

            #[test]
            fn all() {
-                assert!(!core_simd::$name::<8>::splat(false).all());
-                assert!(core_simd::$name::<8>::splat(true).all());
-                let mut v = core_simd::$name::<8>::splat(false);
+                assert!(!core_simd::Mask::<$type, 8>::splat(false).all());
+                assert!(core_simd::Mask::<$type, 8>::splat(true).all());
+                let mut v = core_simd::Mask::<$type, 8>::splat(false);
                v.set(2, true);
                assert!(!v.all());
            }
@ -62,10 +62,10 @@ macro_rules! test_mask_api {
            #[test]
            fn roundtrip_int_conversion() {
                let values = [true, false, false, true, false, false, true, false];
-                let mask = core_simd::$name::<8>::from_array(values);
+                let mask = core_simd::Mask::<$type, 8>::from_array(values);
                let int = mask.to_int();
                assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]);
-                assert_eq!(core_simd::$name::<8>::from_int(int), mask);
+                assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask);
            }

            #[test]
@ -74,24 +74,24 @@ macro_rules! test_mask_api {
                    true, false, false, true, false, false, true, false,
                    true, true, false, false, false, false, false, true,
                ];
-                let mask = core_simd::$name::<16>::from_array(values);
+                let mask = core_simd::Mask::<$type, 16>::from_array(values);
                let bitmask = mask.to_bitmask();
                assert_eq!(bitmask, [0b01001001, 0b10000011]);
-                assert_eq!(core_simd::$name::<16>::from_bitmask(bitmask), mask);
+                assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
            }
        }
    }
 }

 mod mask_api {
-    test_mask_api! { Mask8 }
+    test_mask_api! { i8 }
 }

 #[test]
 fn convert() {
    let values = [true, false, false, true, false, false, true, false];
    assert_eq!(
-        core_simd::Mask8::from_array(values),
-        core_simd::Mask32::from_array(values).into()
+        core_simd::Mask::<i8, 8>::from_array(values),
+        core_simd::Mask::<i32, 8>::from_array(values).into()
    );
 }
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@ -3,19 +3,19 @@
 /// Compares the vector operation to the equivalent scalar operation.
 #[macro_export]
 macro_rules! impl_unary_op_test {
-    { $vector:ty, $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
+    { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
        test_helpers::test_lanes! {
            fn $fn<const LANES: usize>() {
                test_helpers::test_unary_elementwise(
-                    &<$vector as core::ops::$trait>::$fn,
+                    &<core_simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                    &$scalar_fn,
                    &|_| true,
                );
            }
        }
    };
-    { $vector:ty, $scalar:ty, $trait:ident :: $fn:ident } => {
-        impl_unary_op_test! { $vector, $scalar, $trait::$fn, <$scalar as core::ops::$trait>::$fn }
+    { $scalar:ty, $trait:ident :: $fn:ident } => {
+        impl_unary_op_test! { $scalar, $trait::$fn, <$scalar as core::ops::$trait>::$fn }
    };
 }

@ -24,14 +24,15 @@ macro_rules! impl_unary_op_test {
 /// Compares the vector operation to the equivalent scalar operation.
 #[macro_export]
 macro_rules! impl_binary_op_test {
-    { $vector:ty, $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr } => {
+    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr } => {
        mod $fn {
            use super::*;
+            use core_simd::Simd;

            test_helpers::test_lanes! {
                fn normal<const LANES: usize>() {
                    test_helpers::test_binary_elementwise(
-                        &<$vector as core::ops::$trait>::$fn,
+                        &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                        &$scalar_fn,
                        &|_, _| true,
                    );
@ -39,7 +40,7 @@ macro_rules! impl_binary_op_test {

                fn scalar_rhs<const LANES: usize>() {
                    test_helpers::test_binary_scalar_rhs_elementwise(
-                        &<$vector as core::ops::$trait<$scalar>>::$fn,
+                        &<Simd<$scalar, LANES> as core::ops::$trait<$scalar>>::$fn,
                        &$scalar_fn,
                        &|_, _| true,
                    );
@ -47,7 +48,7 @@ macro_rules! impl_binary_op_test {

                fn scalar_lhs<const LANES: usize>() {
                    test_helpers::test_binary_scalar_lhs_elementwise(
-                        &<$scalar as core::ops::$trait<$vector>>::$fn,
+                        &<$scalar as core::ops::$trait<Simd<$scalar, LANES>>>::$fn,
                        &$scalar_fn,
                        &|_, _| true,
                    );
@ -55,7 +56,7 @@ macro_rules! impl_binary_op_test {

                fn assign<const LANES: usize>() {
                    test_helpers::test_binary_elementwise(
-                        &|mut a, b| { <$vector as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
+                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
                        &$scalar_fn,
                        &|_, _| true,
                    );
@ -63,7 +64,7 @@ macro_rules! impl_binary_op_test {

                fn assign_scalar_rhs<const LANES: usize>() {
                    test_helpers::test_binary_scalar_rhs_elementwise(
-                        &|mut a, b| { <$vector as core::ops::$trait_assign<$scalar>>::$fn_assign(&mut a, b); a },
+                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign<$scalar>>::$fn_assign(&mut a, b); a },
                        &$scalar_fn,
                        &|_, _| true,
                    );
@ -71,8 +72,8 @@ macro_rules! impl_binary_op_test {
            }
        }
    };
-    { $vector:ty, $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident } => {
-        impl_binary_op_test! { $vector, $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn }
+    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident } => {
+        impl_binary_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn }
    };
 }

@ -84,14 +85,15 @@ macro_rules! impl_binary_op_test {
 /// Compares the vector operation to the equivalent scalar operation.
 #[macro_export]
 macro_rules! impl_binary_checked_op_test {
-    { $vector:ty, $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr, $check_fn:expr } => {
+    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr, $check_fn:expr } => {
        mod $fn {
            use super::*;
+            use core_simd::Simd;

            test_helpers::test_lanes! {
                fn normal<const LANES: usize>() {
                    test_helpers::test_binary_elementwise(
-                        &<$vector as core::ops::$trait>::$fn,
+                        &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                        &$scalar_fn,
                        &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)),
                    );
@ -99,7 +101,7 @@ macro_rules! impl_binary_checked_op_test {

                fn scalar_rhs<const LANES: usize>() {
                    test_helpers::test_binary_scalar_rhs_elementwise(
-                        &<$vector as core::ops::$trait<$scalar>>::$fn,
+                        &<Simd<$scalar, LANES> as core::ops::$trait<$scalar>>::$fn,
                        &$scalar_fn,
                        &|x, y| x.iter().all(|x| $check_fn(*x, y)),
                    );
@ -107,7 +109,7 @@ macro_rules! impl_binary_checked_op_test {

                fn scalar_lhs<const LANES: usize>() {
                    test_helpers::test_binary_scalar_lhs_elementwise(
-                        &<$scalar as core::ops::$trait<$vector>>::$fn,
+                        &<$scalar as core::ops::$trait<Simd<$scalar, LANES>>>::$fn,
                        &$scalar_fn,
                        &|x, y| y.iter().all(|y| $check_fn(x, *y)),
                    );
@ -115,7 +117,7 @@ macro_rules! impl_binary_checked_op_test {

                fn assign<const LANES: usize>() {
                    test_helpers::test_binary_elementwise(
-                        &|mut a, b| { <$vector as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
+                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
                        &$scalar_fn,
                        &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)),
                    )
@ -123,7 +125,7 @@ macro_rules! impl_binary_checked_op_test {

                fn assign_scalar_rhs<const LANES: usize>() {
                    test_helpers::test_binary_scalar_rhs_elementwise(
-                        &|mut a, b| { <$vector as core::ops::$trait_assign<$scalar>>::$fn_assign(&mut a, b); a },
+                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign<$scalar>>::$fn_assign(&mut a, b); a },
                        &$scalar_fn,
                        &|x, y| x.iter().all(|x| $check_fn(*x, y)),
                    )
@ -131,8 +133,8 @@ macro_rules! impl_binary_checked_op_test {
            }
        }
    };
-    { $vector:ty, $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $check_fn:expr } => {
-        impl_binary_nonzero_rhs_op_test! { $vector, $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn, $check_fn }
+    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $check_fn:expr } => {
+        impl_binary_checked_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn, $check_fn }
    };
 }

@ -216,9 +218,9 @@ macro_rules! impl_common_integer_tests {
 /// Implement tests for signed integers.
 #[macro_export]
 macro_rules! impl_signed_tests {
-    { $vector:ident, $scalar:tt } => {
+    { $scalar:tt } => {
        mod $scalar {
-            type Vector<const LANES: usize> = core_simd::$vector<LANES>;
+            type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
            type Scalar = $scalar;

            impl_common_integer_tests! { Vector, Scalar }
@ -305,18 +307,18 @@ macro_rules! impl_signed_tests {
                }
            }

-            impl_binary_op_test!(Vector<LANES>, Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
-            impl_binary_op_test!(Vector<LANES>, Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
-            impl_binary_op_test!(Vector<LANES>, Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);
+            impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
+            impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
+            impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);

            // Exclude Div and Rem panicking cases
-            impl_binary_checked_op_test!(Vector<LANES>, Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));
-            impl_binary_checked_op_test!(Vector<LANES>, Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));
+            impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));
+            impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));

-            impl_unary_op_test!(Vector<LANES>, Scalar, Not::not);
-            impl_binary_op_test!(Vector<LANES>, Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
-            impl_binary_op_test!(Vector<LANES>, Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
-            impl_binary_op_test!(Vector<LANES>, Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
+            impl_unary_op_test!(Scalar, Not::not);
+            impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
+            impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
+            impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
        }
    }
 }
@ -324,9 +326,9 @@ macro_rules! impl_signed_tests {
 /// Implement tests for unsigned integers.
 #[macro_export]
 macro_rules! impl_unsigned_tests {
-    { $vector:ident, $scalar:tt } => {
+    { $scalar:tt } => {
        mod $scalar {
-            type Vector<const LANES: usize> = core_simd::$vector<LANES>;
+            type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
            type Scalar = $scalar;

            impl_common_integer_tests! { Vector, Scalar }
@ -339,18 +341,18 @@ macro_rules! impl_unsigned_tests {
                }
            }

-            impl_binary_op_test!(Vector<LANES>, Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
-            impl_binary_op_test!(Vector<LANES>, Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
-            impl_binary_op_test!(Vector<LANES>, Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);
+            impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
+            impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
+            impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);

            // Exclude Div and Rem panicking cases
-            impl_binary_checked_op_test!(Vector<LANES>, Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |_, y| y != 0);
-            impl_binary_checked_op_test!(Vector<LANES>, Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |_, y| y != 0);
+            impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |_, y| y != 0);
+            impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |_, y| y != 0);

-            impl_unary_op_test!(Vector<LANES>, Scalar, Not::not);
-            impl_binary_op_test!(Vector<LANES>, Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
-            impl_binary_op_test!(Vector<LANES>, Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
-            impl_binary_op_test!(Vector<LANES>, Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
+            impl_unary_op_test!(Scalar, Not::not);
+            impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
+            impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
+            impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
        }
    }
 }
@ -358,17 +360,17 @@ macro_rules! impl_unsigned_tests {
 /// Implement tests for floating point numbers.
 #[macro_export]
 macro_rules! impl_float_tests {
-    { $vector:ident, $scalar:tt, $int_scalar:tt } => {
+    { $scalar:tt, $int_scalar:tt } => {
        mod $scalar {
-            type Vector<const LANES: usize> = core_simd::$vector<LANES>;
+            type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
            type Scalar = $scalar;

-            impl_unary_op_test!(Vector<LANES>, Scalar, Neg::neg);
-            impl_binary_op_test!(Vector<LANES>, Scalar, Add::add, AddAssign::add_assign);
-            impl_binary_op_test!(Vector<LANES>, Scalar, Sub::sub, SubAssign::sub_assign);
-            impl_binary_op_test!(Vector<LANES>, Scalar, Mul::mul, MulAssign::mul_assign);
-            impl_binary_op_test!(Vector<LANES>, Scalar, Div::div, DivAssign::div_assign);
-            impl_binary_op_test!(Vector<LANES>, Scalar, Rem::rem, RemAssign::rem_assign);
+            impl_unary_op_test!(Scalar, Neg::neg);
+            impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign);
+            impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign);
+            impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign);
+            impl_binary_op_test!(Scalar, Div::div, DivAssign::div_assign);
+            impl_binary_op_test!(Scalar, Rem::rem, RemAssign::rem_assign);

            test_helpers::test_lanes! {
                fn is_sign_positive<const LANES: usize>() {
--- a/crates/core_simd/tests/permute.rs
+++ b/crates/core_simd/tests/permute.rs
@ -1,6 +1,6 @@
 #![feature(portable_simd)]

-use core_simd::SimdU32;
+use core_simd::Simd;

 #[cfg(target_arch = "wasm32")]
 use wasm_bindgen_test::*;
@ -11,7 +11,7 @@ wasm_bindgen_test_configure!(run_in_browser);
 #[test]
 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 fn simple_shuffle() {
-    let a = SimdU32::from_array([2, 4, 1, 9]);
+    let a = Simd::from_array([2, 4, 1, 9]);
    let b = a;
    assert_eq!(a.shuffle::<{ [3, 1, 4, 6] }>(b).to_array(), [9, 4, 2, 1]);
 }
@ -19,15 +19,15 @@ fn simple_shuffle() {
 #[test]
 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 fn reverse() {
-    let a = SimdU32::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
+    let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
    assert_eq!(a.reverse().to_array(), [7, 6, 5, 4, 3, 2, 1, 0]);
 }

 #[test]
 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 fn interleave() {
-    let a = SimdU32::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
-    let b = SimdU32::from_array([8, 9, 10, 11, 12, 13, 14, 15]);
+    let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
+    let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]);
    let (lo, hi) = a.interleave(b);
    assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]);
    assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]);
--- a/crates/core_simd/tests/round.rs
+++ b/crates/core_simd/tests/round.rs
@ -1,9 +1,9 @@
 #![feature(portable_simd)]

 macro_rules! float_rounding_test {
-    { $vector:ident, $scalar:tt, $int_scalar:tt } => {
+    { $scalar:tt, $int_scalar:tt } => {
        mod $scalar {
-            type Vector<const LANES: usize> = core_simd::$vector<LANES>;
+            type Vector<const LANES: usize> = core_simd::Simd<$scalar, LANES>;
            type Scalar = $scalar;
            type IntScalar = $int_scalar;

@ -88,5 +88,5 @@ macro_rules! float_rounding_test {
    }
 }

-float_rounding_test! { SimdF32, f32, i32 }
-float_rounding_test! { SimdF64, f64, i64 }
+float_rounding_test! { f32, i32 }
+float_rounding_test! { f64, i64 }
--- a/crates/core_simd/tests/to_bytes.rs
+++ b/crates/core_simd/tests/to_bytes.rs
@ -2,13 +2,13 @@
 #![allow(incomplete_features)]
 #![cfg(feature = "const_evaluatable_checked")]

-use core_simd::SimdU32;
+use core_simd::Simd;

 #[test]
 fn byte_convert() {
-    let int = SimdU32::from_array([0xdeadbeef, 0x8badf00d]);
+    let int = Simd::<u32, 2>::from_array([0xdeadbeef, 0x8badf00d]);
    let bytes = int.to_ne_bytes();
    assert_eq!(int[0].to_ne_bytes(), bytes[..4]);
    assert_eq!(int[1].to_ne_bytes(), bytes[4..]);
-    assert_eq!(SimdU32::from_ne_bytes(bytes), int);
+    assert_eq!(Simd::<u32, 2>::from_ne_bytes(bytes), int);
 }
--- a/crates/core_simd/tests/u16_ops.rs
+++ b/crates/core_simd/tests/u16_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_unsigned_tests! { SimdU16, u16 }
+impl_unsigned_tests! { u16 }
--- a/crates/core_simd/tests/u32_ops.rs
+++ b/crates/core_simd/tests/u32_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_unsigned_tests! { SimdU32, u32 }
+impl_unsigned_tests! { u32 }
--- a/crates/core_simd/tests/u64_ops.rs
+++ b/crates/core_simd/tests/u64_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_unsigned_tests! { SimdU64, u64 }
+impl_unsigned_tests! { u64 }
--- a/crates/core_simd/tests/u8_ops.rs
+++ b/crates/core_simd/tests/u8_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_unsigned_tests! { SimdU8, u8 }
+impl_unsigned_tests! { u8 }
--- a/crates/core_simd/tests/usize_ops.rs
+++ b/crates/core_simd/tests/usize_ops.rs
@ -2,4 +2,4 @@

 #[macro_use]
 mod ops_macros;
-impl_unsigned_tests! { SimdUsize, usize }
+impl_unsigned_tests! { usize }