Rollup merge of #142078 - sayantn:more-intrinsics, r=workingjubilee

Add SIMD funnel shift and round-to-even intrinsics This PR adds 3 new SIMD intrinsics - `simd_funnel_shl` - funnel shift left - `simd_funnel_shr` - funnel shift right - `simd_round_ties_even` (vector version of `round_ties_even_fN`) TODO (future PR): implement `simd_fsh{l,r}` in miri, cg_gcc and cg_clif (it is surprisingly hard to implement without branches, the common tricks that rotate uses doesn't work because we have 2 elements now. e.g, the `-n&31` trick used by cg_gcc to implement rotate doesn't work with this because then `fshl(a, b, 0)` will be `a | b`) [#t-compiler > More SIMD intrinsics](https://rust-lang.zulipchat.com/#narrow/channel/131828-t-compiler/topic/More.20SIMD.20intrinsics/with/522130286) `@rustbot` label T-compiler T-libs A-intrinsics F-core_intrinsics r? `@workingjubilee`
2025-06-29 12:29:53 +02:00 · 2025-06-29 12:29:53 +02:00 · 66ad1f2abf
commit 66ad1f2abf
parent 5ca574e85b 2ffa1dd392
12 changed files with 217 additions and 28 deletions
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@ -496,7 +496,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
        | sym::simd_flog
        | sym::simd_flog10
        | sym::simd_flog2
-        | sym::simd_round => {
+        | sym::simd_round
+        | sym::simd_round_ties_even => {
            intrinsic_args!(fx, args => (a); intrinsic);

            if !a.layout().ty.is_simd() {
@ -527,6 +528,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                    (sym::simd_flog2, types::F64) => "log2",
                    (sym::simd_round, types::F32) => "roundf",
                    (sym::simd_round, types::F64) => "round",
+                    (sym::simd_round_ties_even, types::F32) => "rintf",
+                    (sym::simd_round_ties_even, types::F64) => "rint",
                    _ => unreachable!("{:?}", intrinsic),
                };
                fx.lib_call(
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@ -780,6 +780,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
            sym::simd_fsin => "sin",
            sym::simd_fsqrt => "sqrt",
            sym::simd_round => "round",
+            sym::simd_round_ties_even => "rint",
            sym::simd_trunc => "trunc",
            _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
        };
@ -827,6 +828,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
            | sym::simd_fsin
            | sym::simd_fsqrt
            | sym::simd_round
+            | sym::simd_round_ties_even
            | sym::simd_trunc
    ) {
        return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@ -1537,6 +1537,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
            sym::simd_fsin => "llvm.sin",
            sym::simd_fsqrt => "llvm.sqrt",
            sym::simd_round => "llvm.round",
+            sym::simd_round_ties_even => "llvm.rint",
            sym::simd_trunc => "llvm.trunc",
            _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
        };
@ -1563,6 +1564,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
            | sym::simd_fsqrt
            | sym::simd_relaxed_fma
            | sym::simd_round
+            | sym::simd_round_ties_even
            | sym::simd_trunc
    ) {
        return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
@ -2309,7 +2311,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
    // Unary integer intrinsics
    if matches!(
        name,
-        sym::simd_bswap | sym::simd_bitreverse | sym::simd_ctlz | sym::simd_ctpop | sym::simd_cttz
+        sym::simd_bswap
+            | sym::simd_bitreverse
+            | sym::simd_ctlz
+            | sym::simd_ctpop
+            | sym::simd_cttz
+            | sym::simd_funnel_shl
+            | sym::simd_funnel_shr
    ) {
        let vec_ty = bx.cx.type_vector(
            match *in_elem.kind() {
@ -2330,6 +2338,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
            sym::simd_ctlz => "llvm.ctlz",
            sym::simd_ctpop => "llvm.ctpop",
            sym::simd_cttz => "llvm.cttz",
+            sym::simd_funnel_shl => "llvm.fshl",
+            sym::simd_funnel_shr => "llvm.fshr",
            _ => unreachable!(),
        };
        let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits();
@ -2350,6 +2360,11 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                // simple unary argument cases
                Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[args[0].immediate()]))
            }
+            sym::simd_funnel_shl | sym::simd_funnel_shr => Ok(bx.call_intrinsic(
+                llvm_intrinsic,
+                &[vec_ty],
+                &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
+            )),
            _ => unreachable!(),
        };
    }
--- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs
+++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
@ -594,8 +594,9 @@ pub(crate) fn check_intrinsic_type(
        | sym::simd_ceil
        | sym::simd_floor
        | sym::simd_round
+        | sym::simd_round_ties_even
        | sym::simd_trunc => (1, 0, vec![param(0)], param(0)),
-        sym::simd_fma | sym::simd_relaxed_fma => {
+        sym::simd_fma | sym::simd_relaxed_fma | sym::simd_funnel_shl | sym::simd_funnel_shr => {
            (1, 0, vec![param(0), param(0), param(0)], param(0))
        }
        sym::simd_gather => (3, 0, vec![param(0), param(1), param(2)], param(0)),
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@ -1977,6 +1977,8 @@ symbols! {
        simd_fmin,
        simd_fsin,
        simd_fsqrt,
+        simd_funnel_shl,
+        simd_funnel_shr,
        simd_gather,
        simd_ge,
        simd_gt,
@ -2004,6 +2006,7 @@ symbols! {
        simd_relaxed_fma,
        simd_rem,
        simd_round,
+        simd_round_ties_even,
        simd_saturating_add,
        simd_saturating_sub,
        simd_scatter,
--- a/library/core/src/intrinsics/simd.rs
+++ b/library/core/src/intrinsics/simd.rs
@ -126,6 +126,40 @@ pub unsafe fn simd_shl<T>(lhs: T, rhs: T) -> T;
 #[rustc_nounwind]
 pub unsafe fn simd_shr<T>(lhs: T, rhs: T) -> T;

+/// Funnel Shifts vector left elementwise, with UB on overflow.
+///
+/// Concatenates `a` and `b` elementwise (with `a` in the most significant half),
+/// creating a vector of the same length, but with each element being twice as
+/// wide. Then shift this vector left elementwise by `shift`, shifting in zeros,
+/// and extract the most significant half of each of the elements. If `a` and `b`
+/// are the same, this is equivalent to an elementwise rotate left operation.
+///
+/// `T` must be a vector of integers.
+///
+/// # Safety
+///
+/// Each element of `shift` must be less than `<int>::BITS`.
+#[rustc_intrinsic]
+#[rustc_nounwind]
+pub unsafe fn simd_funnel_shl<T>(a: T, b: T, shift: T) -> T;
+
+/// Funnel Shifts vector right elementwise, with UB on overflow.
+///
+/// Concatenates `a` and `b` elementwise (with `a` in the most significant half),
+/// creating a vector of the same length, but with each element being twice as
+/// wide. Then shift this vector right elementwise by `shift`, shifting in zeros,
+/// and extract the least significant half of each of the elements. If `a` and `b`
+/// are the same, this is equivalent to an elementwise rotate right operation.
+///
+/// `T` must be a vector of integers.
+///
+/// # Safety
+///
+/// Each element of `shift` must be less than `<int>::BITS`.
+#[rustc_intrinsic]
+#[rustc_nounwind]
+pub unsafe fn simd_funnel_shr<T>(a: T, b: T, shift: T) -> T;
+
 /// "Ands" vectors elementwise.
 ///
 /// `T` must be a vector of integers.
@ -678,6 +712,14 @@ pub unsafe fn simd_floor<T>(x: T) -> T;
 #[rustc_nounwind]
 pub unsafe fn simd_round<T>(x: T) -> T;

+/// Rounds each element to the closest integer-valued float.
+/// Ties are resolved by rounding to the number with an even least significant digit
+///
+/// `T` must be a vector of floats.
+#[rustc_intrinsic]
+#[rustc_nounwind]
+pub unsafe fn simd_round_ties_even<T>(x: T) -> T;
+
 /// Returns the integer part of each element as an integer-valued float.
 /// In other words, non-integer values are truncated towards zero.
 ///
--- a/src/tools/miri/src/intrinsics/simd.rs
+++ b/src/tools/miri/src/intrinsics/simd.rs
@ -36,6 +36,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
            | "ceil"
            | "floor"
            | "round"
+            | "round_ties_even"
            | "trunc"
            | "fsqrt"
            | "fsin"
@ -71,6 +72,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                    "ceil" => Op::Round(rustc_apfloat::Round::TowardPositive),
                    "floor" => Op::Round(rustc_apfloat::Round::TowardNegative),
                    "round" => Op::Round(rustc_apfloat::Round::NearestTiesToAway),
+                    "round_ties_even" => Op::Round(rustc_apfloat::Round::NearestTiesToEven),
                    "trunc" => Op::Round(rustc_apfloat::Round::TowardZero),
                    "ctlz" => Op::Numeric(sym::ctlz),
                    "ctpop" => Op::Numeric(sym::ctpop),
--- a/src/tools/miri/tests/pass/intrinsics/portable-simd.rs
+++ b/src/tools/miri/tests/pass/intrinsics/portable-simd.rs
@ -569,6 +569,10 @@ fn simd_round() {
        f32x4::from_array([0.9, 1.001, 2.0, -4.5]).round(),
        f32x4::from_array([1.0, 1.0, 2.0, -5.0])
    );
+    assert_eq!(
+        unsafe { intrinsics::simd_round_ties_even(f32x4::from_array([0.9, 1.001, 2.0, -4.5])) },
+        f32x4::from_array([1.0, 1.0, 2.0, -4.0])
+    );
    assert_eq!(
        f32x4::from_array([0.9, 1.001, 2.0, -4.5]).trunc(),
        f32x4::from_array([0.0, 1.0, 2.0, -4.0])
@ -586,6 +590,10 @@ fn simd_round() {
        f64x4::from_array([0.9, 1.001, 2.0, -4.5]).round(),
        f64x4::from_array([1.0, 1.0, 2.0, -5.0])
    );
+    assert_eq!(
+        unsafe { intrinsics::simd_round_ties_even(f64x4::from_array([0.9, 1.001, 2.0, -4.5])) },
+        f64x4::from_array([1.0, 1.0, 2.0, -4.0])
+    );
    assert_eq!(
        f64x4::from_array([0.9, 1.001, 2.0, -4.5]).trunc(),
        f64x4::from_array([0.0, 1.0, 2.0, -4.0])
--- a/tests/ui/simd/intrinsic/float-math-pass.rs
+++ b/tests/ui/simd/intrinsic/float-math-pass.rs
@ -85,6 +85,9 @@ fn main() {
        let r = simd_round(h);
        assert_eq!(x, r);

+        let r = simd_round_ties_even(h);
+        assert_eq!(z, r);
+
        let r = simd_trunc(h);
        assert_eq!(z, r);
    }
--- a/tests/ui/simd/intrinsic/generic-arithmetic-2.rs
+++ b/tests/ui/simd/intrinsic/generic-arithmetic-2.rs
@ -43,6 +43,10 @@ fn main() {
        simd_shl(y, y);
        simd_shr(x, x);
        simd_shr(y, y);
+        simd_funnel_shl(x, x, x);
+        simd_funnel_shl(y, y, y);
+        simd_funnel_shr(x, x, x);
+        simd_funnel_shr(y, y, y);
        simd_and(x, x);
        simd_and(y, y);
        simd_or(x, x);
@ -73,6 +77,10 @@ fn main() {
        //~^ ERROR expected SIMD input type, found non-SIMD `i32`
        simd_shr(0, 0);
        //~^ ERROR expected SIMD input type, found non-SIMD `i32`
+        simd_funnel_shl(0, 0, 0);
+        //~^ ERROR expected SIMD input type, found non-SIMD `i32`
+        simd_funnel_shr(0, 0, 0);
+        //~^ ERROR expected SIMD input type, found non-SIMD `i32`
        simd_and(0, 0);
        //~^ ERROR expected SIMD input type, found non-SIMD `i32`
        simd_or(0, 0);
@ -95,6 +103,10 @@ fn main() {
        //~^ ERROR unsupported operation on `f32x4` with element `f32`
        simd_shr(z, z);
        //~^ ERROR unsupported operation on `f32x4` with element `f32`
+        simd_funnel_shl(z, z, z);
+        //~^ ERROR unsupported operation on `f32x4` with element `f32`
+        simd_funnel_shr(z, z, z);
+        //~^ ERROR unsupported operation on `f32x4` with element `f32`
        simd_and(z, z);
        //~^ ERROR unsupported operation on `f32x4` with element `f32`
        simd_or(z, z);
--- a/tests/ui/simd/intrinsic/generic-arithmetic-2.stderr
+++ b/tests/ui/simd/intrinsic/generic-arithmetic-2.stderr
@ -1,147 +1,171 @@
 error[E0511]: invalid monomorphization of `simd_add` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:64:9
+  --> $DIR/generic-arithmetic-2.rs:68:9
   |
 LL |         simd_add(0, 0);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_sub` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:66:9
+  --> $DIR/generic-arithmetic-2.rs:70:9
   |
 LL |         simd_sub(0, 0);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_mul` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:68:9
+  --> $DIR/generic-arithmetic-2.rs:72:9
   |
 LL |         simd_mul(0, 0);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_div` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:70:9
+  --> $DIR/generic-arithmetic-2.rs:74:9
   |
 LL |         simd_div(0, 0);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_shl` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:72:9
+  --> $DIR/generic-arithmetic-2.rs:76:9
   |
 LL |         simd_shl(0, 0);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_shr` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:74:9
+  --> $DIR/generic-arithmetic-2.rs:78:9
   |
 LL |         simd_shr(0, 0);
   |         ^^^^^^^^^^^^^^

+error[E0511]: invalid monomorphization of `simd_funnel_shl` intrinsic: expected SIMD input type, found non-SIMD `i32`
+  --> $DIR/generic-arithmetic-2.rs:80:9
+   |
+LL |         simd_funnel_shl(0, 0, 0);
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^
+
+error[E0511]: invalid monomorphization of `simd_funnel_shr` intrinsic: expected SIMD input type, found non-SIMD `i32`
+  --> $DIR/generic-arithmetic-2.rs:82:9
+   |
+LL |         simd_funnel_shr(0, 0, 0);
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^
+
 error[E0511]: invalid monomorphization of `simd_and` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:76:9
+  --> $DIR/generic-arithmetic-2.rs:84:9
   |
 LL |         simd_and(0, 0);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_or` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:78:9
+  --> $DIR/generic-arithmetic-2.rs:86:9
   |
 LL |         simd_or(0, 0);
   |         ^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_xor` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:80:9
+  --> $DIR/generic-arithmetic-2.rs:88:9
   |
 LL |         simd_xor(0, 0);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_neg` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:83:9
+  --> $DIR/generic-arithmetic-2.rs:91:9
   |
 LL |         simd_neg(0);
   |         ^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_bswap` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:85:9
+  --> $DIR/generic-arithmetic-2.rs:93:9
   |
 LL |         simd_bswap(0);
   |         ^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_bitreverse` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:87:9
+  --> $DIR/generic-arithmetic-2.rs:95:9
   |
 LL |         simd_bitreverse(0);
   |         ^^^^^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_ctlz` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:89:9
+  --> $DIR/generic-arithmetic-2.rs:97:9
   |
 LL |         simd_ctlz(0);
   |         ^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_cttz` intrinsic: expected SIMD input type, found non-SIMD `i32`
-  --> $DIR/generic-arithmetic-2.rs:91:9
+  --> $DIR/generic-arithmetic-2.rs:99:9
   |
 LL |         simd_cttz(0);
   |         ^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_shl` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:94:9
+  --> $DIR/generic-arithmetic-2.rs:102:9
   |
 LL |         simd_shl(z, z);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_shr` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:96:9
+  --> $DIR/generic-arithmetic-2.rs:104:9
   |
 LL |         simd_shr(z, z);
   |         ^^^^^^^^^^^^^^

+error[E0511]: invalid monomorphization of `simd_funnel_shl` intrinsic: unsupported operation on `f32x4` with element `f32`
+  --> $DIR/generic-arithmetic-2.rs:106:9
+   |
+LL |         simd_funnel_shl(z, z, z);
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^
+
+error[E0511]: invalid monomorphization of `simd_funnel_shr` intrinsic: unsupported operation on `f32x4` with element `f32`
+  --> $DIR/generic-arithmetic-2.rs:108:9
+   |
+LL |         simd_funnel_shr(z, z, z);
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^
+
 error[E0511]: invalid monomorphization of `simd_and` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:98:9
+  --> $DIR/generic-arithmetic-2.rs:110:9
   |
 LL |         simd_and(z, z);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_or` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:100:9
+  --> $DIR/generic-arithmetic-2.rs:112:9
   |
 LL |         simd_or(z, z);
   |         ^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_xor` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:102:9
+  --> $DIR/generic-arithmetic-2.rs:114:9
   |
 LL |         simd_xor(z, z);
   |         ^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_bswap` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:104:9
+  --> $DIR/generic-arithmetic-2.rs:116:9
   |
 LL |         simd_bswap(z);
   |         ^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_bitreverse` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:106:9
+  --> $DIR/generic-arithmetic-2.rs:118:9
   |
 LL |         simd_bitreverse(z);
   |         ^^^^^^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_ctlz` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:108:9
+  --> $DIR/generic-arithmetic-2.rs:120:9
   |
 LL |         simd_ctlz(z);
   |         ^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_ctpop` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:110:9
+  --> $DIR/generic-arithmetic-2.rs:122:9
   |
 LL |         simd_ctpop(z);
   |         ^^^^^^^^^^^^^

 error[E0511]: invalid monomorphization of `simd_cttz` intrinsic: unsupported operation on `f32x4` with element `f32`
-  --> $DIR/generic-arithmetic-2.rs:112:9
+  --> $DIR/generic-arithmetic-2.rs:124:9
   |
 LL |         simd_cttz(z);
   |         ^^^^^^^^^^^^

-error: aborting due to 24 previous errors
+error: aborting due to 28 previous errors

 For more information about this error, try `rustc --explain E0511`.
--- a/tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
+++ b/tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
@ -83,6 +83,80 @@ fn main() {
        all_eq!(simd_shr(simd_shl(y1, y2), y2), y1);
        all_eq!(simd_shr(simd_shl(y2, y1), y1), y2);

+        all_eq!(
+            simd_funnel_shl(x1, x2, x1),
+            i32x4([
+                (1 << 1) | (2 >> 31),
+                (2 << 2) | (3 >> 30),
+                (3 << 3) | (4 >> 29),
+                (4 << 4) | (5 >> 28)
+            ])
+        );
+        all_eq!(
+            simd_funnel_shl(x2, x1, x1),
+            i32x4([
+                (2 << 1) | (1 >> 31),
+                (3 << 2) | (2 >> 30),
+                (4 << 3) | (3 >> 29),
+                (5 << 4) | (4 >> 28)
+            ])
+        );
+        all_eq!(
+            simd_funnel_shl(y1, y2, y1),
+            U32::<4>([
+                (1 << 1) | (2 >> 31),
+                (2 << 2) | (3 >> 30),
+                (3 << 3) | (4 >> 29),
+                (4 << 4) | (5 >> 28)
+            ])
+        );
+        all_eq!(
+            simd_funnel_shl(y2, y1, y1),
+            U32::<4>([
+                (2 << 1) | (1 >> 31),
+                (3 << 2) | (2 >> 30),
+                (4 << 3) | (3 >> 29),
+                (5 << 4) | (4 >> 28)
+            ])
+        );
+
+        all_eq!(
+            simd_funnel_shr(x1, x2, x1),
+            i32x4([
+                (1 << 31) | (2 >> 1),
+                (2 << 30) | (3 >> 2),
+                (3 << 29) | (4 >> 3),
+                (4 << 28) | (5 >> 4)
+            ])
+        );
+        all_eq!(
+            simd_funnel_shr(x2, x1, x1),
+            i32x4([
+                (2 << 31) | (1 >> 1),
+                (3 << 30) | (2 >> 2),
+                (4 << 29) | (3 >> 3),
+                (5 << 28) | (4 >> 4)
+            ])
+        );
+        all_eq!(
+            simd_funnel_shr(y1, y2, y1),
+            U32::<4>([
+                (1 << 31) | (2 >> 1),
+                (2 << 30) | (3 >> 2),
+                (3 << 29) | (4 >> 3),
+                (4 << 28) | (5 >> 4)
+            ])
+        );
+        all_eq!(
+            simd_funnel_shr(y2, y1, y1),
+            U32::<4>([
+                (2 << 31) | (1 >> 1),
+                (3 << 30) | (2 >> 2),
+                (4 << 29) | (3 >> 3),
+                (5 << 28) | (4 >> 4)
+            ])
+        );
+
        // ensure we get logical vs. arithmetic shifts correct
        let (a, b, c, d) = (-12, -123, -1234, -12345);
        all_eq!(simd_shr(i32x4([a, b, c, d]), x1), i32x4([a >> 1, b >> 2, c >> 3, d >> 4]));