Merge portable-simd#195 - portable-simd:trait-ops

Generic `core::ops` for `Simd<T, _>` In order to maintain type soundness, we need to be sure we only implement an operation for `Simd<T, _> where T: SimdElement`... and also valid for that operation in general. While we could do this purely parametrically, it is more sound to implement the operators directly for the base scalar type arguments and then use type parameters to extend the operators to the "higher order" operations. This implements that strategy and cleans up `simd::ops` into a few submodules: - assign.rs: `core::ops::*Assign` - deref.rs: `core::ops` impls which "deref" borrowed versions of the arguments - unary.rs: encloses the logic for unary operators on `Simd`, as unary ops are much simpler This is possible since everything need not be nested in a single maze of macros anymore. The result simplifies the logic and allows reasoning about what operators are valid based on the expressed trait bounds, and also reduces the size of the trait implementation output in rustdoc, for a huge win of 4 MB off the size of `struct.Simd.html`! This addresses a common user complaint, as the original was over 5.5 MB and capable of crashing browsers! This also carries a fix for a type-inference-related breakage, by removing the autosplatting (vector + scalar binop) impls, as unfortunately the presence of autosplatting was capable of busting type inference. We will likely need to see results from a Crater run before we can understand how to re-land autosplatting.
2021-12-02 17:41:30 -08:00 · 2021-12-02 17:41:30 -08:00 · a8385522ad
commit a8385522ad
parent b2dac7124b 8003b04323
9 changed files with 377 additions and 459 deletions
--- a/crates/core_simd/examples/nbody.rs
+++ b/crates/core_simd/examples/nbody.rs
@ -97,7 +97,7 @@ mod nbody {
        let sun = &mut sun[0];
        for body in rest {
            let m_ratio = body.mass / SOLAR_MASS;
-            sun.v -= body.v * m_ratio;
+            sun.v -= body.v * Simd::splat(m_ratio);
        }
    }

@ -143,14 +143,14 @@ mod nbody {
        let mut i = 0;
        for j in 0..N_BODIES {
            for k in j + 1..N_BODIES {
-                let f = r[i] * mag[i];
-                bodies[j].v -= f * bodies[k].mass;
-                bodies[k].v += f * bodies[j].mass;
+                let f = r[i] * Simd::splat(mag[i]);
+                bodies[j].v -= f * Simd::splat(bodies[k].mass);
+                bodies[k].v += f * Simd::splat(bodies[j].mass);
                i += 1
            }
        }
        for body in bodies {
-            body.x += dt * body.v
+            body.x += Simd::splat(dt) * body.v
        }
    }

--- a/crates/core_simd/src/math.rs
+++ b/crates/core_simd/src/math.rs
@ -17,7 +17,7 @@ macro_rules! impl_uint_arith {
            /// let max = Simd::splat(MAX);
            /// let unsat = x + max;
            /// let sat = x.saturating_add(max);
-            /// assert_eq!(x - 1, unsat);
+            /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1]));
            /// assert_eq!(sat, max);
            /// ```
            #[inline]
@ -37,7 +37,7 @@ macro_rules! impl_uint_arith {
            /// let max = Simd::splat(MAX);
            /// let unsat = x - max;
            /// let sat = x.saturating_sub(max);
-            /// assert_eq!(unsat, x + 1);
+            /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0]));
            /// assert_eq!(sat, Simd::splat(0));
            #[inline]
            pub fn saturating_sub(self, second: Self) -> Self {
@ -105,7 +105,7 @@ macro_rules! impl_int_arith {
            #[inline]
            pub fn abs(self) -> Self {
                const SHR: $ty = <$ty>::BITS as $ty - 1;
-                let m = self >> SHR;
+                let m = self >> Simd::splat(SHR);
                (self^m) - m
            }

@ -128,7 +128,7 @@ macro_rules! impl_int_arith {
            pub fn saturating_abs(self) -> Self {
                // arith shift for -1 or 0 mask based on sign bit, giving 2s complement
                const SHR: $ty = <$ty>::BITS as $ty - 1;
-                let m = self >> SHR;
+                let m = self >> Simd::splat(SHR);
                (self^m).saturating_sub(m)
            }

--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@ -1,5 +1,13 @@
 use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::ops::{Add, Mul};
+use core::ops::{BitAnd, BitOr, BitXor};
+use core::ops::{Div, Rem, Sub};
+use core::ops::{Shl, Shr};
+
+mod assign;
+mod deref;
+mod unary;

 impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
 where
@ -57,166 +65,44 @@ macro_rules! impl_ref_ops {
            $(#[$attrs])*
            fn $fn($self_tok, $rhs_arg: $rhs_arg_ty) -> Self::Output $body
        }
-
-        impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for $type
-        where
-            LaneCount<$lanes2>: SupportedLaneCount,
-        {
-            type Output = <$type as core::ops::$trait<$rhs>>::Output;
-
-            $(#[$attrs])*
-            fn $fn($self_tok, $rhs_arg: &$rhs) -> Self::Output {
-                core::ops::$trait::$fn($self_tok, *$rhs_arg)
-            }
-        }
-
-        impl<const $lanes: usize> core::ops::$trait<$rhs> for &'_ $type
-        where
-            LaneCount<$lanes2>: SupportedLaneCount,
-        {
-            type Output = <$type as core::ops::$trait<$rhs>>::Output;
-
-            $(#[$attrs])*
-            fn $fn($self_tok, $rhs_arg: $rhs) -> Self::Output {
-                core::ops::$trait::$fn(*$self_tok, $rhs_arg)
-            }
-        }
-
-        impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for &'_ $type
-        where
-            LaneCount<$lanes2>: SupportedLaneCount,
-        {
-            type Output = <$type as core::ops::$trait<$rhs>>::Output;
-
-            $(#[$attrs])*
-            fn $fn($self_tok, $rhs_arg: &$rhs) -> Self::Output {
-                core::ops::$trait::$fn(*$self_tok, *$rhs_arg)
-            }
-        }
    };
-
-    // binary assignment op
-    {
-        impl<const $lanes:ident: usize> core::ops::$trait:ident<$rhs:ty> for $type:ty
-        where
-            LaneCount<$lanes2:ident>: SupportedLaneCount,
-        {
-            $(#[$attrs:meta])*
-            fn $fn:ident(&mut $self_tok:ident, $rhs_arg:ident: $rhs_arg_ty:ty) $body:tt
-        }
-    } => {
-        impl<const $lanes: usize> core::ops::$trait<$rhs> for $type
-        where
-            LaneCount<$lanes2>: SupportedLaneCount,
-        {
-            $(#[$attrs])*
-            fn $fn(&mut $self_tok, $rhs_arg: $rhs_arg_ty) $body
-        }
-
-        impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for $type
-        where
-            LaneCount<$lanes2>: SupportedLaneCount,
-        {
-            $(#[$attrs])*
-            fn $fn(&mut $self_tok, $rhs_arg: &$rhs_arg_ty) {
-                core::ops::$trait::$fn($self_tok, *$rhs_arg)
-            }
-        }
-    };
-
-    // unary op
-    {
-        impl<const $lanes:ident: usize> core::ops::$trait:ident for $type:ty
-        where
-            LaneCount<$lanes2:ident>: SupportedLaneCount,
-        {
-            type Output = $output:ty;
-            fn $fn:ident($self_tok:ident) -> Self::Output $body:tt
-        }
-    } => {
-        impl<const $lanes: usize> core::ops::$trait for $type
-        where
-            LaneCount<$lanes2>: SupportedLaneCount,
-        {
-            type Output = $output;
-            fn $fn($self_tok) -> Self::Output $body
-        }
-
-        impl<const $lanes: usize> core::ops::$trait for &'_ $type
-        where
-            LaneCount<$lanes2>: SupportedLaneCount,
-        {
-            type Output = <$type as core::ops::$trait>::Output;
-            fn $fn($self_tok) -> Self::Output {
-                core::ops::$trait::$fn(*$self_tok)
-            }
-        }
-    }
 }

 /// Automatically implements operators over vectors and scalars for a particular vector.
 macro_rules! impl_op {
    { impl Add for $scalar:ty } => {
-        impl_op! { @binary $scalar, Add::add, AddAssign::add_assign, simd_add }
+        impl_op! { @binary $scalar, Add::add, simd_add }
    };
    { impl Sub for $scalar:ty } => {
-        impl_op! { @binary $scalar, Sub::sub, SubAssign::sub_assign, simd_sub }
+        impl_op! { @binary $scalar, Sub::sub, simd_sub }
    };
    { impl Mul for $scalar:ty } => {
-        impl_op! { @binary $scalar, Mul::mul, MulAssign::mul_assign, simd_mul }
+        impl_op! { @binary $scalar, Mul::mul, simd_mul }
    };
    { impl Div for $scalar:ty } => {
-        impl_op! { @binary $scalar, Div::div, DivAssign::div_assign, simd_div }
+        impl_op! { @binary $scalar, Div::div, simd_div }
    };
    { impl Rem for $scalar:ty } => {
-        impl_op! { @binary $scalar, Rem::rem, RemAssign::rem_assign, simd_rem }
+        impl_op! { @binary $scalar, Rem::rem, simd_rem }
    };
    { impl Shl for $scalar:ty } => {
-        impl_op! { @binary $scalar, Shl::shl, ShlAssign::shl_assign, simd_shl }
+        impl_op! { @binary $scalar, Shl::shl, simd_shl }
    };
    { impl Shr for $scalar:ty } => {
-        impl_op! { @binary $scalar, Shr::shr, ShrAssign::shr_assign, simd_shr }
+        impl_op! { @binary $scalar, Shr::shr, simd_shr }
    };
    { impl BitAnd for $scalar:ty } => {
-        impl_op! { @binary $scalar, BitAnd::bitand, BitAndAssign::bitand_assign, simd_and }
+        impl_op! { @binary $scalar, BitAnd::bitand, simd_and }
    };
    { impl BitOr for $scalar:ty } => {
-        impl_op! { @binary $scalar, BitOr::bitor, BitOrAssign::bitor_assign, simd_or }
+        impl_op! { @binary $scalar, BitOr::bitor, simd_or }
    };
    { impl BitXor for $scalar:ty } => {
-        impl_op! { @binary $scalar, BitXor::bitxor, BitXorAssign::bitxor_assign, simd_xor }
-    };
-
-    { impl Not for $scalar:ty } => {
-        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::Not for Simd<$scalar, LANES>
-            where
-                LaneCount<LANES>: SupportedLaneCount,
-            {
-                type Output = Self;
-                fn not(self) -> Self::Output {
-                    self ^ Self::splat(!<$scalar>::default())
-                }
-            }
-        }
-    };
-
-    { impl Neg for $scalar:ty } => {
-        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::Neg for Simd<$scalar, LANES>
-            where
-                LaneCount<LANES>: SupportedLaneCount,
-            {
-                type Output = Self;
-                fn neg(self) -> Self::Output {
-                    unsafe { intrinsics::simd_neg(self) }
-                }
-            }
-        }
+        impl_op! { @binary $scalar, BitXor::bitxor, simd_xor }
    };

    // generic binary op with assignment when output is `Self`
-    { @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $assign_trait:ident :: $assign_trait_fn:ident, $intrinsic:ident } => {
+    { @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $intrinsic:ident } => {
        impl_ref_ops! {
            impl<const LANES: usize> core::ops::$trait<Self> for Simd<$scalar, LANES>
            where
@ -232,60 +118,6 @@ macro_rules! impl_op {
                }
            }
        }
-
-        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::$trait<$scalar> for Simd<$scalar, LANES>
-            where
-                LaneCount<LANES>: SupportedLaneCount,
-            {
-                type Output = Self;
-
-                #[inline]
-                fn $trait_fn(self, rhs: $scalar) -> Self::Output {
-                    core::ops::$trait::$trait_fn(self, Self::splat(rhs))
-                }
-            }
-        }
-
-        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::$trait<Simd<$scalar, LANES>> for $scalar
-            where
-                LaneCount<LANES>: SupportedLaneCount,
-            {
-                type Output = Simd<$scalar, LANES>;
-
-                #[inline]
-                fn $trait_fn(self, rhs: Simd<$scalar, LANES>) -> Self::Output {
-                    core::ops::$trait::$trait_fn(Simd::splat(self), rhs)
-                }
-            }
-        }
-
-        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::$assign_trait<Self> for Simd<$scalar, LANES>
-            where
-                LaneCount<LANES>: SupportedLaneCount,
-            {
-                #[inline]
-                fn $assign_trait_fn(&mut self, rhs: Self) {
-                    unsafe {
-                        *self = intrinsics::$intrinsic(*self, rhs);
-                    }
-                }
-            }
-        }
-
-        impl_ref_ops! {
-            impl<const LANES: usize> core::ops::$assign_trait<$scalar> for Simd<$scalar, LANES>
-            where
-                LaneCount<LANES>: SupportedLaneCount,
-            {
-                #[inline]
-                fn $assign_trait_fn(&mut self, rhs: $scalar) {
-                    core::ops::$assign_trait::$assign_trait_fn(self, Self::splat(rhs));
-                }
-            }
-        }
    };
 }

@ -298,7 +130,6 @@ macro_rules! impl_float_ops {
            impl_op! { impl Mul for $scalar }
            impl_op! { impl Div for $scalar }
            impl_op! { impl Rem for $scalar }
-            impl_op! { impl Neg for $scalar }
        )*
    };
 }
@ -313,7 +144,6 @@ macro_rules! impl_unsigned_int_ops {
            impl_op! { impl BitAnd for $scalar }
            impl_op! { impl BitOr  for $scalar }
            impl_op! { impl BitXor for $scalar }
-            impl_op! { impl Not for $scalar }

            // Integers panic on divide by 0
            impl_ref_ops! {
@ -344,67 +174,6 @@ macro_rules! impl_unsigned_int_ops {
                }
            }

-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::Div<$scalar> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    type Output = Self;
-
-                    #[inline]
-                    fn div(self, rhs: $scalar) -> Self::Output {
-                        if rhs == 0 {
-                            panic!("attempt to divide by zero");
-                        }
-                        if <$scalar>::MIN != 0 &&
-                            self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
-                            rhs == -1 as _ {
-                                panic!("attempt to divide with overflow");
-                        }
-                        let rhs = Self::splat(rhs);
-                        unsafe { intrinsics::simd_div(self, rhs) }
-                    }
-                }
-            }
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::Div<Simd<$scalar, LANES>> for $scalar
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    type Output = Simd<$scalar, LANES>;
-
-                    #[inline]
-                    fn div(self, rhs: Simd<$scalar, LANES>) -> Self::Output {
-                        Simd::splat(self) / rhs
-                    }
-                }
-            }
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::DivAssign<Self> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    #[inline]
-                    fn div_assign(&mut self, rhs: Self) {
-                        *self = *self / rhs;
-                    }
-                }
-            }
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::DivAssign<$scalar> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    #[inline]
-                    fn div_assign(&mut self, rhs: $scalar) {
-                        *self = *self / rhs;
-                    }
-                }
-            }
-
            // remainder panics on zero divisor
            impl_ref_ops! {
                impl<const LANES: usize> core::ops::Rem<Self> for Simd<$scalar, LANES>
@ -434,67 +203,6 @@ macro_rules! impl_unsigned_int_ops {
                }
            }

-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::Rem<$scalar> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    type Output = Self;
-
-                    #[inline]
-                    fn rem(self, rhs: $scalar) -> Self::Output {
-                        if rhs == 0 {
-                            panic!("attempt to calculate the remainder with a divisor of zero");
-                        }
-                        if <$scalar>::MIN != 0 &&
-                            self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
-                            rhs == -1 as _ {
-                                panic!("attempt to calculate the remainder with overflow");
-                        }
-                        let rhs = Self::splat(rhs);
-                        unsafe { intrinsics::simd_rem(self, rhs) }
-                    }
-                }
-            }
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::Rem<Simd<$scalar, LANES>> for $scalar
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    type Output = Simd<$scalar, LANES>;
-
-                    #[inline]
-                    fn rem(self, rhs: Simd<$scalar, LANES>) -> Self::Output {
-                        Simd::splat(self) % rhs
-                    }
-                }
-            }
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::RemAssign<Self> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    #[inline]
-                    fn rem_assign(&mut self, rhs: Self) {
-                        *self = *self % rhs;
-                    }
-                }
-            }
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::RemAssign<$scalar> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    #[inline]
-                    fn rem_assign(&mut self, rhs: $scalar) {
-                        *self = *self % rhs;
-                    }
-                }
-            }
-
            // shifts panic on overflow
            impl_ref_ops! {
                impl<const LANES: usize> core::ops::Shl<Self> for Simd<$scalar, LANES>
@ -518,49 +226,6 @@ macro_rules! impl_unsigned_int_ops {
                }
            }

-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::Shl<$scalar> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    type Output = Self;
-
-                    #[inline]
-                    fn shl(self, rhs: $scalar) -> Self::Output {
-                        if invalid_shift_rhs(rhs) {
-                            panic!("attempt to shift left with overflow");
-                        }
-                        let rhs = Self::splat(rhs);
-                        unsafe { intrinsics::simd_shl(self, rhs) }
-                    }
-                }
-            }
-
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::ShlAssign<Self> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    #[inline]
-                    fn shl_assign(&mut self, rhs: Self) {
-                        *self = *self << rhs;
-                    }
-                }
-            }
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::ShlAssign<$scalar> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    #[inline]
-                    fn shl_assign(&mut self, rhs: $scalar) {
-                        *self = *self << rhs;
-                    }
-                }
-            }
-
            impl_ref_ops! {
                impl<const LANES: usize> core::ops::Shr<Self> for Simd<$scalar, LANES>
                where
@ -582,49 +247,6 @@ macro_rules! impl_unsigned_int_ops {
                    }
                }
            }
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::Shr<$scalar> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    type Output = Self;
-
-                    #[inline]
-                    fn shr(self, rhs: $scalar) -> Self::Output {
-                        if invalid_shift_rhs(rhs) {
-                            panic!("attempt to shift with overflow");
-                        }
-                        let rhs = Self::splat(rhs);
-                        unsafe { intrinsics::simd_shr(self, rhs) }
-                    }
-                }
-            }
-
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::ShrAssign<Self> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    #[inline]
-                    fn shr_assign(&mut self, rhs: Self) {
-                        *self = *self >> rhs;
-                    }
-                }
-            }
-
-            impl_ref_ops! {
-                impl<const LANES: usize> core::ops::ShrAssign<$scalar> for Simd<$scalar, LANES>
-                where
-                    LaneCount<LANES>: SupportedLaneCount,
-                {
-                    #[inline]
-                    fn shr_assign(&mut self, rhs: $scalar) {
-                        *self = *self >> rhs;
-                    }
-                }
-            }
        )*
    };
 }
@ -633,9 +255,6 @@ macro_rules! impl_unsigned_int_ops {
 macro_rules! impl_signed_int_ops {
    { $($scalar:ty),* } => {
        impl_unsigned_int_ops! { $($scalar),* }
-        $( // scalar
-            impl_op! { impl Neg for $scalar }
-        )*
    };
 }

--- a/crates/core_simd/src/ops/assign.rs
+++ b/crates/core_simd/src/ops/assign.rs
@ -0,0 +1,124 @@
+//! Assignment operators
+use super::*;
+use core::ops::{AddAssign, MulAssign}; // commutative binary op-assignment
+use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign}; // commutative bit binary op-assignment
+use core::ops::{DivAssign, RemAssign, SubAssign}; // non-commutative binary op-assignment
+use core::ops::{ShlAssign, ShrAssign}; // non-commutative bit binary op-assignment
+
+// Arithmetic
+
+macro_rules! assign_ops {
+    ($(impl<T, U, const LANES: usize> $assignTrait:ident<U> for Simd<T, LANES>
+        where
+            Self: $trait:ident,
+        {
+            fn $assign_call:ident(rhs: U) {
+                $call:ident
+            }
+        })*) => {
+        $(impl<T, U, const LANES: usize> $assignTrait<U> for Simd<T, LANES>
+        where
+            Self: $trait<U, Output = Self>,
+            T: SimdElement,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            fn $assign_call(&mut self, rhs: U) {
+                *self = self.$call(rhs);
+            }
+        })*
+    }
+}
+
+assign_ops! {
+    // Arithmetic
+    impl<T, U, const LANES: usize> AddAssign<U> for Simd<T, LANES>
+    where
+        Self: Add,
+    {
+        fn add_assign(rhs: U) {
+            add
+        }
+    }
+
+    impl<T, U, const LANES: usize> MulAssign<U> for Simd<T, LANES>
+    where
+        Self: Mul,
+    {
+        fn mul_assign(rhs: U) {
+            mul
+        }
+    }
+
+    impl<T, U, const LANES: usize> SubAssign<U> for Simd<T, LANES>
+    where
+        Self: Sub,
+    {
+        fn sub_assign(rhs: U) {
+            sub
+        }
+    }
+
+    impl<T, U, const LANES: usize> DivAssign<U> for Simd<T, LANES>
+    where
+        Self: Div,
+    {
+        fn div_assign(rhs: U) {
+            div
+        }
+    }
+    impl<T, U, const LANES: usize> RemAssign<U> for Simd<T, LANES>
+    where
+        Self: Rem,
+    {
+        fn rem_assign(rhs: U) {
+            rem
+        }
+    }
+
+    // Bitops
+    impl<T, U, const LANES: usize> BitAndAssign<U> for Simd<T, LANES>
+    where
+        Self: BitAnd,
+    {
+        fn bitand_assign(rhs: U) {
+            bitand
+        }
+    }
+
+    impl<T, U, const LANES: usize> BitOrAssign<U> for Simd<T, LANES>
+    where
+        Self: BitOr,
+    {
+        fn bitor_assign(rhs: U) {
+            bitor
+        }
+    }
+
+    impl<T, U, const LANES: usize> BitXorAssign<U> for Simd<T, LANES>
+    where
+        Self: BitXor,
+    {
+        fn bitxor_assign(rhs: U) {
+            bitxor
+        }
+    }
+
+    impl<T, U, const LANES: usize> ShlAssign<U> for Simd<T, LANES>
+    where
+        Self: Shl,
+    {
+        fn shl_assign(rhs: U) {
+            shl
+        }
+    }
+
+    impl<T, U, const LANES: usize> ShrAssign<U> for Simd<T, LANES>
+    where
+        Self: Shr,
+    {
+        fn shr_assign(rhs: U) {
+            shr
+        }
+    }
+}
--- a/crates/core_simd/src/ops/deref.rs
+++ b/crates/core_simd/src/ops/deref.rs
@ -0,0 +1,124 @@
+//! This module hacks in "implicit deref" for Simd's operators.
+//! Ideally, Rust would take care of this itself,
+//! and method calls usually handle the LHS implicitly.
+//! But this is not the case with arithmetic ops.
+use super::*;
+
+macro_rules! deref_lhs {
+    (impl<T, const LANES: usize> $trait:ident for $simd:ty {
+            fn $call:ident
+        }) => {
+        impl<T, const LANES: usize> $trait<$simd> for &$simd
+        where
+            T: SimdElement,
+            $simd: $trait<$simd, Output = $simd>,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Output = Simd<T, LANES>;
+
+            #[inline]
+            #[must_use = "operator returns a new vector without mutating the inputs"]
+            fn $call(self, rhs: $simd) -> Self::Output {
+                (*self).$call(rhs)
+            }
+        }
+    };
+}
+
+macro_rules! deref_rhs {
+    (impl<T, const LANES: usize> $trait:ident for $simd:ty {
+            fn $call:ident
+        }) => {
+        impl<T, const LANES: usize> $trait<&$simd> for $simd
+        where
+            T: SimdElement,
+            $simd: $trait<$simd, Output = $simd>,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Output = Simd<T, LANES>;
+
+            #[inline]
+            #[must_use = "operator returns a new vector without mutating the inputs"]
+            fn $call(self, rhs: &$simd) -> Self::Output {
+                self.$call(*rhs)
+            }
+        }
+    };
+}
+
+macro_rules! deref_ops {
+    ($(impl<T, const LANES: usize> $trait:ident for $simd:ty {
+            fn $call:ident
+        })*) => {
+        $(
+            deref_rhs! {
+                impl<T, const LANES: usize> $trait for $simd {
+                    fn $call
+                }
+            }
+            deref_lhs! {
+                impl<T, const LANES: usize> $trait for $simd {
+                    fn $call
+                }
+            }
+            impl<'lhs, 'rhs, T, const LANES: usize> $trait<&'rhs $simd> for &'lhs $simd
+            where
+                T: SimdElement,
+                $simd: $trait<$simd, Output = $simd>,
+                LaneCount<LANES>: SupportedLaneCount,
+            {
+                type Output = $simd;
+
+                #[inline]
+                #[must_use = "operator returns a new vector without mutating the inputs"]
+                fn $call(self, rhs: &$simd) -> Self::Output {
+                    (*self).$call(*rhs)
+                }
+            }
+        )*
+    }
+}
+
+deref_ops! {
+    // Arithmetic
+    impl<T, const LANES: usize> Add for Simd<T, LANES> {
+        fn add
+    }
+
+    impl<T, const LANES: usize> Mul for Simd<T, LANES> {
+        fn mul
+    }
+
+    impl<T, const LANES: usize> Sub for Simd<T, LANES> {
+        fn sub
+    }
+
+    impl<T, const LANES: usize> Div for Simd<T, LANES> {
+        fn div
+    }
+
+    impl<T, const LANES: usize> Rem for Simd<T, LANES> {
+        fn rem
+    }
+
+    // Bitops
+    impl<T, const LANES: usize> BitAnd for Simd<T, LANES> {
+        fn bitand
+    }
+
+    impl<T, const LANES: usize> BitOr for Simd<T, LANES> {
+        fn bitor
+    }
+
+    impl<T, const LANES: usize> BitXor for Simd<T, LANES> {
+        fn bitxor
+    }
+
+    impl<T, const LANES: usize> Shl for Simd<T, LANES> {
+        fn shl
+    }
+
+    impl<T, const LANES: usize> Shr for Simd<T, LANES> {
+        fn shr
+    }
+}
--- a/crates/core_simd/src/ops/unary.rs
+++ b/crates/core_simd/src/ops/unary.rs
@ -0,0 +1,77 @@
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::ops::{Neg, Not}; // unary ops
+
+macro_rules! neg {
+    ($(impl<const LANES: usize> Neg for Simd<$scalar:ty, LANES>)*) => {
+        $(impl<const LANES: usize> Neg for Simd<$scalar, LANES>
+        where
+            $scalar: SimdElement,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Output = Self;
+
+            #[inline]
+            #[must_use = "operator returns a new vector without mutating the input"]
+            fn neg(self) -> Self::Output {
+                unsafe { intrinsics::simd_neg(self) }
+            }
+        })*
+    }
+}
+
+neg! {
+    impl<const LANES: usize> Neg for Simd<f32, LANES>
+
+    impl<const LANES: usize> Neg for Simd<f64, LANES>
+
+    impl<const LANES: usize> Neg for Simd<i8, LANES>
+
+    impl<const LANES: usize> Neg for Simd<i16, LANES>
+
+    impl<const LANES: usize> Neg for Simd<i32, LANES>
+
+    impl<const LANES: usize> Neg for Simd<i64, LANES>
+
+    impl<const LANES: usize> Neg for Simd<isize, LANES>
+}
+
+macro_rules! not {
+    ($(impl<const LANES: usize> Not for Simd<$scalar:ty, LANES>)*) => {
+        $(impl<const LANES: usize> Not for Simd<$scalar, LANES>
+        where
+            $scalar: SimdElement,
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Output = Self;
+
+            #[inline]
+            #[must_use = "operator returns a new vector without mutating the input"]
+            fn not(self) -> Self::Output {
+                self ^ (Simd::splat(!(0 as $scalar)))
+            }
+        })*
+    }
+}
+
+not! {
+    impl<const LANES: usize> Not for Simd<i8, LANES>
+
+    impl<const LANES: usize> Not for Simd<i16, LANES>
+
+    impl<const LANES: usize> Not for Simd<i32, LANES>
+
+    impl<const LANES: usize> Not for Simd<i64, LANES>
+
+    impl<const LANES: usize> Not for Simd<isize, LANES>
+
+    impl<const LANES: usize> Not for Simd<u8, LANES>
+
+    impl<const LANES: usize> Not for Simd<u16, LANES>
+
+    impl<const LANES: usize> Not for Simd<u32, LANES>
+
+    impl<const LANES: usize> Not for Simd<u64, LANES>
+
+    impl<const LANES: usize> Not for Simd<usize, LANES>
+}
--- a/crates/core_simd/src/vector/ptr.rs
+++ b/crates/core_simd/src/vector/ptr.rs
@ -23,7 +23,7 @@ where
    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
        unsafe {
            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
-            mem::transmute_copy(&{ x + (addend * mem::size_of::<T>()) })
+            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
        }
    }
 }
@ -49,7 +49,7 @@ where
    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
        unsafe {
            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
-            mem::transmute_copy(&{ x + (addend * mem::size_of::<T>()) })
+            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
        }
    }
 }
--- a/crates/core_simd/tests/autoderef.rs
+++ b/crates/core_simd/tests/autoderef.rs
@ -0,0 +1,22 @@
+// Test that we handle all our "auto-deref" cases correctly.
+#![feature(portable_simd)]
+use core_simd::f32x4;
+
+#[cfg(target_arch = "wasm32")]
+use wasm_bindgen_test::*;
+
+#[cfg(target_arch = "wasm32")]
+wasm_bindgen_test_configure!(run_in_browser);
+
+#[test]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn deref() {
+    let x = f32x4::splat(1.0);
+    let y = f32x4::splat(2.0);
+    let a = &x;
+    let b = &y;
+    assert_eq!(f32x4::splat(3.0), x + y);
+    assert_eq!(f32x4::splat(3.0), x + b);
+    assert_eq!(f32x4::splat(3.0), a + y);
+    assert_eq!(f32x4::splat(3.0), a + b);
+}
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@ -38,22 +38,6 @@ macro_rules! impl_binary_op_test {
                    );
                }

-                fn scalar_rhs<const LANES: usize>() {
-                    test_helpers::test_binary_scalar_rhs_elementwise(
-                        &<Simd<$scalar, LANES> as core::ops::$trait<$scalar>>::$fn,
-                        &$scalar_fn,
-                        &|_, _| true,
-                    );
-                }
-
-                fn scalar_lhs<const LANES: usize>() {
-                    test_helpers::test_binary_scalar_lhs_elementwise(
-                        &<$scalar as core::ops::$trait<Simd<$scalar, LANES>>>::$fn,
-                        &$scalar_fn,
-                        &|_, _| true,
-                    );
-                }
-
                fn assign<const LANES: usize>() {
                    test_helpers::test_binary_elementwise(
                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
@ -61,14 +45,6 @@ macro_rules! impl_binary_op_test {
                        &|_, _| true,
                    );
                }
-
-                fn assign_scalar_rhs<const LANES: usize>() {
-                    test_helpers::test_binary_scalar_rhs_elementwise(
-                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign<$scalar>>::$fn_assign(&mut a, b); a },
-                        &$scalar_fn,
-                        &|_, _| true,
-                    );
-                }
            }
        }
    };
@ -99,22 +75,6 @@ macro_rules! impl_binary_checked_op_test {
                    );
                }

-                fn scalar_rhs<const LANES: usize>() {
-                    test_helpers::test_binary_scalar_rhs_elementwise(
-                        &<Simd<$scalar, LANES> as core::ops::$trait<$scalar>>::$fn,
-                        &$scalar_fn,
-                        &|x, y| x.iter().all(|x| $check_fn(*x, y)),
-                    );
-                }
-
-                fn scalar_lhs<const LANES: usize>() {
-                    test_helpers::test_binary_scalar_lhs_elementwise(
-                        &<$scalar as core::ops::$trait<Simd<$scalar, LANES>>>::$fn,
-                        &$scalar_fn,
-                        &|x, y| y.iter().all(|y| $check_fn(x, *y)),
-                    );
-                }
-
                fn assign<const LANES: usize>() {
                    test_helpers::test_binary_elementwise(
                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
@ -122,14 +82,6 @@ macro_rules! impl_binary_checked_op_test {
                        &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)),
                    )
                }
-
-                fn assign_scalar_rhs<const LANES: usize>() {
-                    test_helpers::test_binary_scalar_rhs_elementwise(
-                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign<$scalar>>::$fn_assign(&mut a, b); a },
-                        &$scalar_fn,
-                        &|x, y| x.iter().all(|x| $check_fn(*x, y)),
-                    )
-                }
            }
        }
    };