From 99136301583c6c88e41ac517b9b4b37dadf1ec83 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 14:54:43 +0100
Subject: [PATCH 01/18] Reduce usage of subst types in the intrinsic code

Using the arguments often saves a layout_of call
---
 src/intrinsics/mod.rs | 114 ++++++++++++++++++++----------------------
 1 file changed, 55 insertions(+), 59 deletions(-)
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index f4703b22ecbc..da9aa45069b8 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -41,19 +41,11 @@ macro intrinsic_arg {
     }
 }
 
-macro intrinsic_substs {
-    ($substs:expr, $index:expr,) => {},
-    ($substs:expr, $index:expr, $first:ident $(,$rest:ident)*) => {
-        let $first = $substs.type_at($index);
-        intrinsic_substs!($substs, $index+1, $($rest),*);
-    }
-}
-
 macro intrinsic_match {
     ($fx:expr, $intrinsic:expr, $substs:expr, $args:expr,
     _ => $unknown:block;
     $(
-        $($($name:tt).*)|+ $(if $cond:expr)?, $(<$($subst:ident),*>)? ($($a:ident $arg:ident),*) $content:block;
+        $($($name:tt).*)|+ $(if $cond:expr)?, ($($a:ident $arg:ident),*) $content:block;
     )*) => {
         let _ = $substs; // Silence warning when substs is unused.
         match $intrinsic {
@@ -61,9 +53,6 @@ macro intrinsic_match {
                 $(intrinsic_pat!($($name).*))|* $(if $cond)? => {
                     #[allow(unused_parens, non_snake_case)]
                     {
-                        $(
-                            intrinsic_substs!($substs, 0, $($subst),*);
-                        )?
                         if let [$($arg),*] = $args {
                             let ($($arg,)*) = (
                                 $(intrinsic_arg!($a $fx, $arg),)*
@@ -492,7 +481,8 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
         breakpoint, () {
             fx.bcx.ins().debugtrap();
         };
-        copy | copy_nonoverlapping, <elem_ty> (v src, v dst, v count) {
+        copy | copy_nonoverlapping, (v src, v dst, v count) {
+            let elem_ty = substs.type_at(0);
             let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
             assert_eq!(args.len(), 3);
             let byte_amount = if elem_size != 1 {
@@ -510,7 +500,8 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             }
         };
         // NOTE: the volatile variants have src and dst swapped
-        volatile_copy_memory | volatile_copy_nonoverlapping_memory, <elem_ty> (v dst, v src, v count) {
+        volatile_copy_memory | volatile_copy_nonoverlapping_memory, (v dst, v src, v count) {
+            let elem_ty = substs.type_at(0);
             let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
             assert_eq!(args.len(), 3);
             let byte_amount = if elem_size != 1 {
@@ -528,8 +519,8 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
                 fx.bcx.call_memmove(fx.target_config, dst, src, byte_amount);
             }
         };
-        size_of_val, <T> (c ptr) {
-            let layout = fx.layout_of(T);
+        size_of_val, (c ptr) {
+            let layout = fx.layout_of(substs.type_at(0));
             let size = if layout.is_unsized() {
                 let (_ptr, info) = ptr.load_scalar_pair(fx);
                 let (size, _align) = crate::unsize::size_and_align_of_dst(fx, layout, info);
@@ -542,8 +533,8 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             };
             ret.write_cvalue(fx, CValue::by_val(size, usize_layout));
         };
-        min_align_of_val, <T> (c ptr) {
-            let layout = fx.layout_of(T);
+        min_align_of_val, (c ptr) {
+            let layout = fx.layout_of(substs.type_at(0));
             let align = if layout.is_unsized() {
                 let (_ptr, info) = ptr.load_scalar_pair(fx);
                 let (_size, align) = crate::unsize::size_and_align_of_dst(fx, layout, info);
@@ -589,7 +580,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             );
             ret.write_cvalue(fx, res);
         };
-        saturating_add | saturating_sub, <T> (c lhs, c rhs) {
+        saturating_add | saturating_sub, (c lhs, c rhs) {
             assert_eq!(lhs.layout().ty, rhs.layout().ty);
             let bin_op = match intrinsic {
                 sym::saturating_add => BinOp::Add,
@@ -597,7 +588,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
                 _ => unreachable!(),
             };
 
-            let signed = type_sign(T);
+            let signed = type_sign(lhs.layout().ty);
 
             let checked_res = crate::num::codegen_checked_int_binop(
                 fx,
@@ -607,7 +598,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             );
 
             let (val, has_overflow) = checked_res.load_scalar_pair(fx);
-            let clif_ty = fx.clif_type(T).unwrap();
+            let clif_ty = fx.clif_type(lhs.layout().ty).unwrap();
 
             let (min, max) = type_min_max_value(&mut fx.bcx, clif_ty, signed);
 
@@ -629,17 +620,19 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
                 _ => unreachable!(),
             };
 
-            let res = CValue::by_val(val, fx.layout_of(T));
+            let res = CValue::by_val(val, lhs.layout());
 
             ret.write_cvalue(fx, res);
         };
-        rotate_left, <T>(v x, v y) {
-            let layout = fx.layout_of(T);
+        rotate_left, (c x, v y) {
+            let layout = x.layout();
+            let x = x.load_scalar(fx);
             let res = fx.bcx.ins().rotl(x, y);
             ret.write_cvalue(fx, CValue::by_val(res, layout));
         };
-        rotate_right, <T>(v x, v y) {
-            let layout = fx.layout_of(T);
+        rotate_right, (c x, v y) {
+            let layout = x.layout();
+            let x = x.load_scalar(fx);
             let res = fx.bcx.ins().rotr(x, y);
             ret.write_cvalue(fx, CValue::by_val(res, layout));
         };
@@ -675,29 +668,33 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             // FIXME use emit_small_memset
             fx.bcx.call_memset(fx.target_config, dst_ptr, val, count);
         };
-        ctlz | ctlz_nonzero, <T> (v arg) {
+        ctlz | ctlz_nonzero, (c arg) {
+            let val = arg.load_scalar(fx);
             // FIXME trap on `ctlz_nonzero` with zero arg.
-            let res = fx.bcx.ins().clz(arg);
-            let res = CValue::by_val(res, fx.layout_of(T));
+            let res = fx.bcx.ins().clz(val);
+            let res = CValue::by_val(res, arg.layout());
             ret.write_cvalue(fx, res);
         };
-        cttz | cttz_nonzero, <T> (v arg) {
+        cttz | cttz_nonzero, (c arg) {
+            let val = arg.load_scalar(fx);
             // FIXME trap on `cttz_nonzero` with zero arg.
-            let res = fx.bcx.ins().ctz(arg);
-            let res = CValue::by_val(res, fx.layout_of(T));
+            let res = fx.bcx.ins().ctz(val);
+            let res = CValue::by_val(res, arg.layout());
             ret.write_cvalue(fx, res);
         };
-        ctpop, <T> (v arg) {
-            let res = fx.bcx.ins().popcnt(arg);
-            let res = CValue::by_val(res, fx.layout_of(T));
+        ctpop, (c arg) {
+            let val = arg.load_scalar(fx);
+            let res = fx.bcx.ins().popcnt(val);
+            let res = CValue::by_val(res, arg.layout());
             ret.write_cvalue(fx, res);
         };
-        bitreverse, <T> (v arg) {
-            let res = fx.bcx.ins().bitrev(arg);
-            let res = CValue::by_val(res, fx.layout_of(T));
+        bitreverse, (c arg) {
+            let val = arg.load_scalar(fx);
+            let res = fx.bcx.ins().bitrev(val);
+            let res = CValue::by_val(res, arg.layout());
             ret.write_cvalue(fx, res);
         };
-        bswap, <T> (v arg) {
+        bswap, (c arg) {
             // FIXME(CraneStation/cranelift#794) add bswap instruction to cranelift
             fn swap(bcx: &mut FunctionBuilder<'_>, v: Value) -> Value {
                 match bcx.func.dfg.value_type(v) {
@@ -773,15 +770,16 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
                     ty => unreachable!("bswap {}", ty),
                 }
             }
-            let res = CValue::by_val(swap(&mut fx.bcx, arg), fx.layout_of(T));
+            let val = arg.load_scalar(fx);
+            let res = CValue::by_val(swap(&mut fx.bcx, val), arg.layout());
             ret.write_cvalue(fx, res);
         };
-        assert_inhabited | assert_zero_valid | assert_uninit_valid, <T> () {
-            let layout = fx.layout_of(T);
+        assert_inhabited | assert_zero_valid | assert_uninit_valid, () {
+            let layout = fx.layout_of(substs.type_at(0));
             if layout.abi.is_uninhabited() {
                 with_no_trimmed_paths(|| crate::base::codegen_panic(
                     fx,
-                    &format!("attempted to instantiate uninhabited type `{}`", T),
+                    &format!("attempted to instantiate uninhabited type `{}`", layout.ty),
                     span,
                 ));
                 return;
@@ -790,7 +788,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             if intrinsic == sym::assert_zero_valid && !layout.might_permit_raw_init(fx, /*zero:*/ true) {
                 with_no_trimmed_paths(|| crate::base::codegen_panic(
                     fx,
-                    &format!("attempted to zero-initialize type `{}`, which is invalid", T),
+                    &format!("attempted to zero-initialize type `{}`, which is invalid", layout.ty),
                     span,
                 ));
                 return;
@@ -799,7 +797,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             if intrinsic == sym::assert_uninit_valid && !layout.might_permit_raw_init(fx, /*zero:*/ false) {
                 with_no_trimmed_paths(|| crate::base::codegen_panic(
                     fx,
-                    &format!("attempted to leave type `{}` uninitialized, which is invalid", T),
+                    &format!("attempted to leave type `{}` uninitialized, which is invalid", layout.ty),
                     span,
                 ));
                 return;
@@ -832,10 +830,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             ret.write_cvalue(fx, val);
         };
 
-        ptr_offset_from, <T> (v ptr, v base) {
+        ptr_offset_from, (v ptr, v base) {
+            let ty = substs.type_at(0);
             let isize_layout = fx.layout_of(fx.tcx.types.isize);
 
-            let pointee_size: u64 = fx.layout_of(T).size.bytes();
+            let pointee_size: u64 = fx.layout_of(ty).size.bytes();
             let diff = fx.bcx.ins().isub(ptr, base);
             // FIXME this can be an exact division.
             let val = CValue::by_val(fx.bcx.ins().sdiv_imm(diff, pointee_size as i64), isize_layout);
@@ -864,13 +863,14 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             // FIXME use a compiler fence once Cranelift supports it
             fx.bcx.ins().fence();
         };
-        _ if intrinsic.as_str().starts_with("atomic_load"), <T> (v ptr) {
-            validate_atomic_type!(fx, intrinsic, span, T);
-            let ty = fx.clif_type(T).unwrap();
+        _ if intrinsic.as_str().starts_with("atomic_load"), (v ptr) {
+            let ty = substs.type_at(0);
+            validate_atomic_type!(fx, intrinsic, span, ty);
+            let clif_ty = fx.clif_type(ty).unwrap();
 
-            let val = fx.bcx.ins().atomic_load(ty, MemFlags::trusted(), ptr);
+            let val = fx.bcx.ins().atomic_load(clif_ty, MemFlags::trusted(), ptr);
 
-            let val = CValue::by_val(val, fx.layout_of(T));
+            let val = CValue::by_val(val, fx.layout_of(ty));
             ret.write_cvalue(fx, val);
         };
         _ if intrinsic.as_str().starts_with("atomic_store"), (v ptr, c val) {
@@ -1101,18 +1101,14 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             ret.write_cvalue(fx, CValue::by_val(res, ret.layout()));
         };
 
-        raw_eq, <T>(v lhs_ref, v rhs_ref) {
-            fn type_by_size(size: Size) -> Option<Type> {
-                Type::int(size.bits().try_into().ok()?)
-            }
-
-            let size = fx.layout_of(T).layout.size;
+        raw_eq, (v lhs_ref, v rhs_ref) {
+            let size = fx.layout_of(substs.type_at(0)).layout.size;
             // FIXME add and use emit_small_memcmp
             let is_eq_value =
                 if size == Size::ZERO {
                     // No bytes means they're trivially equal
                     fx.bcx.ins().iconst(types::I8, 1)
-                } else if let Some(clty) = type_by_size(size) {
+                } else if let Some(clty) = size.bits().try_into().ok().and_then(Type::int) {
                     // Can't use `trusted` for these loads; they could be unaligned.
                     let mut flags = MemFlags::new();
                     flags.set_notrap();

From c5b969583ff12cfd0cd85e2923753101024b72ad Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 15:17:42 +0100
Subject: [PATCH 02/18] Split codegen_intrinsic_call function

This should reduce compile times of cg_clif
---
 src/intrinsics/mod.rs | 49 +++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index da9aa45069b8..517deba58050 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -73,7 +73,7 @@ macro intrinsic_match {
 }
 
 macro call_intrinsic_match {
-    ($fx:expr, $intrinsic:expr, $substs:expr, $ret:expr, $destination:expr, $args:expr, $(
+    ($fx:expr, $intrinsic:expr, $substs:expr, $ret:expr, $args:expr, $(
         $name:ident($($arg:ident),*) -> $ty:ident => $func:ident,
     )*) => {
         match $intrinsic {
@@ -87,19 +87,13 @@ macro call_intrinsic_match {
                         let res = $fx.easy_call(stringify!($func), &[$($arg),*], $fx.tcx.types.$ty);
                         $ret.write_cvalue($fx, res);
 
-                        if let Some((_, dest)) = $destination {
-                            let ret_block = $fx.get_block(dest);
-                            $fx.bcx.ins().jump(ret_block, &[]);
-                            return;
-                        } else {
-                            unreachable!();
-                        }
+                        return true;
                     } else {
                         bug!("wrong number of args for intrinsic {:?}", $intrinsic);
                     }
                 }
             )*
-            _ => {}
+            _ => false,
         }
     }
 }
@@ -397,7 +391,6 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
     span: Span,
 ) {
     let intrinsic = fx.tcx.item_name(instance.def_id());
-    let substs = instance.substs;
 
     let ret = match destination {
         Some((place, _)) => place,
@@ -420,13 +413,27 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
         self::simd::codegen_simd_intrinsic_call(fx, instance, args, ret, span);
         let ret_block = fx.get_block(destination.expect("SIMD intrinsics don't diverge").1);
         fx.bcx.ins().jump(ret_block, &[]);
-        return;
+    } else if codegen_float_intrinsic_call(fx, instance, args, ret) {
+        let ret_block = fx.get_block(destination.expect("Float intrinsics don't diverge").1);
+        fx.bcx.ins().jump(ret_block, &[]);
+    } else {
+        codegen_regular_intrinsic_call(fx, instance, args, ret, span, destination);
     }
+}
 
-    let usize_layout = fx.layout_of(fx.tcx.types.usize);
+fn codegen_float_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    instance: Instance<'tcx>,
+    args: &[mir::Operand<'tcx>],
+    ret: CPlace<'tcx>,
+) -> bool {
+    let def_id = instance.def_id();
+    let substs = instance.substs;
+
+    let intrinsic = fx.tcx.item_name(def_id);
 
     call_intrinsic_match! {
-        fx, intrinsic, substs, ret, destination, args,
+        fx, intrinsic, substs, ret, args,
         expf32(flt) -> f32 => expf,
         expf64(flt) -> f64 => exp,
         exp2f32(flt) -> f32 => exp2f,
@@ -467,6 +474,22 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
         cosf32(flt) -> f32 => cosf,
         cosf64(flt) -> f64 => cos,
     }
+}
+
+fn codegen_regular_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    instance: Instance<'tcx>,
+    args: &[mir::Operand<'tcx>],
+    ret: CPlace<'tcx>,
+    span: Span,
+    destination: Option<(CPlace<'tcx>, BasicBlock)>,
+) {
+    let def_id = instance.def_id();
+    let substs = instance.substs;
+
+    let intrinsic = fx.tcx.item_name(def_id);
+
+    let usize_layout = fx.layout_of(fx.tcx.types.usize);
 
     intrinsic_match! {
         fx, intrinsic, substs, args,

From 70cc24254500f783ddd08e65e6abcf21e27c0c27 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 15:22:46 +0100
Subject: [PATCH 03/18] Remove a couple of duplicate calls

---
 src/intrinsics/mod.rs  | 25 ++++++++++---------------
 src/intrinsics/simd.rs | 11 +++++------
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 517deba58050..cfe3e7bb9201 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -9,7 +9,8 @@ pub(crate) use cpuid::codegen_cpuid_call;
 pub(crate) use llvm::codegen_llvm_intrinsic_call;
 
 use rustc_middle::ty::print::with_no_trimmed_paths;
-use rustc_span::symbol::{kw, sym};
+use rustc_middle::ty::subst::SubstsRef;
+use rustc_span::symbol::{kw, sym, Symbol};
 
 use crate::prelude::*;
 use cranelift_codegen::ir::AtomicRmwOp;
@@ -391,6 +392,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
     span: Span,
 ) {
     let intrinsic = fx.tcx.item_name(instance.def_id());
+    let substs = instance.substs;
 
     let ret = match destination {
         Some((place, _)) => place,
@@ -410,28 +412,24 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
     };
 
     if intrinsic.as_str().starts_with("simd_") {
-        self::simd::codegen_simd_intrinsic_call(fx, instance, args, ret, span);
+        self::simd::codegen_simd_intrinsic_call(fx, intrinsic, substs, args, ret, span);
         let ret_block = fx.get_block(destination.expect("SIMD intrinsics don't diverge").1);
         fx.bcx.ins().jump(ret_block, &[]);
-    } else if codegen_float_intrinsic_call(fx, instance, args, ret) {
+    } else if codegen_float_intrinsic_call(fx, intrinsic, substs, args, ret) {
         let ret_block = fx.get_block(destination.expect("Float intrinsics don't diverge").1);
         fx.bcx.ins().jump(ret_block, &[]);
     } else {
-        codegen_regular_intrinsic_call(fx, instance, args, ret, span, destination);
+        codegen_regular_intrinsic_call(fx, instance, intrinsic, substs, args, ret, span, destination);
     }
 }
 
 fn codegen_float_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
-    instance: Instance<'tcx>,
+    intrinsic: Symbol,
+    substs: SubstsRef<'tcx>,
     args: &[mir::Operand<'tcx>],
     ret: CPlace<'tcx>,
 ) -> bool {
-    let def_id = instance.def_id();
-    let substs = instance.substs;
-
-    let intrinsic = fx.tcx.item_name(def_id);
-
     call_intrinsic_match! {
         fx, intrinsic, substs, ret, args,
         expf32(flt) -> f32 => expf,
@@ -479,16 +477,13 @@ fn codegen_float_intrinsic_call<'tcx>(
 fn codegen_regular_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     instance: Instance<'tcx>,
+    intrinsic: Symbol,
+    substs: SubstsRef<'tcx>,
     args: &[mir::Operand<'tcx>],
     ret: CPlace<'tcx>,
     span: Span,
     destination: Option<(CPlace<'tcx>, BasicBlock)>,
 ) {
-    let def_id = instance.def_id();
-    let substs = instance.substs;
-
-    let intrinsic = fx.tcx.item_name(def_id);
-
     let usize_layout = fx.layout_of(fx.tcx.types.usize);
 
     intrinsic_match! {
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index 6c0631d9ecbd..d8dcf5d0ab98 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -1,20 +1,19 @@
 //! Codegen `extern "platform-intrinsic"` intrinsics.
 
+use rustc_middle::ty::subst::SubstsRef;
+use rustc_span::Symbol;
+
 use super::*;
 use crate::prelude::*;
 
 pub(super) fn codegen_simd_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
-    instance: Instance<'tcx>,
+    intrinsic: Symbol,
+    substs: SubstsRef<'tcx>,
     args: &[mir::Operand<'tcx>],
     ret: CPlace<'tcx>,
     span: Span,
 ) {
-    let def_id = instance.def_id();
-    let substs = instance.substs;
-
-    let intrinsic = fx.tcx.item_name(def_id);
-
     intrinsic_match! {
         fx, intrinsic, substs, args,
         _ => {

From 409e3eb2cbc7a18e1a9e6bc607766ef18cd79dfc Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 15:24:10 +0100
Subject: [PATCH 04/18] Remove unnecessary argument

---
 src/intrinsics/mod.rs | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index cfe3e7bb9201..24e9ed338223 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -74,13 +74,12 @@ macro intrinsic_match {
 }
 
 macro call_intrinsic_match {
-    ($fx:expr, $intrinsic:expr, $substs:expr, $ret:expr, $args:expr, $(
+    ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $(
         $name:ident($($arg:ident),*) -> $ty:ident => $func:ident,
     )*) => {
         match $intrinsic {
             $(
                 sym::$name => {
-                    assert!($substs.is_noop());
                     if let [$(ref $arg),*] = *$args {
                         let ($($arg,)*) = (
                             $(codegen_operand($fx, $arg),)*
@@ -415,7 +414,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
         self::simd::codegen_simd_intrinsic_call(fx, intrinsic, substs, args, ret, span);
         let ret_block = fx.get_block(destination.expect("SIMD intrinsics don't diverge").1);
         fx.bcx.ins().jump(ret_block, &[]);
-    } else if codegen_float_intrinsic_call(fx, intrinsic, substs, args, ret) {
+    } else if codegen_float_intrinsic_call(fx, intrinsic, args, ret) {
         let ret_block = fx.get_block(destination.expect("Float intrinsics don't diverge").1);
         fx.bcx.ins().jump(ret_block, &[]);
     } else {
@@ -426,12 +425,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
 fn codegen_float_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     intrinsic: Symbol,
-    substs: SubstsRef<'tcx>,
     args: &[mir::Operand<'tcx>],
     ret: CPlace<'tcx>,
 ) -> bool {
     call_intrinsic_match! {
-        fx, intrinsic, substs, ret, args,
+        fx, intrinsic, ret, args,
         expf32(flt) -> f32 => expf,
         expf64(flt) -> f64 => exp,
         exp2f32(flt) -> f32 => exp2f,

From 046e094842b4c1b046aad66750838304c017796b Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 15:31:44 +0100
Subject: [PATCH 05/18] Only use a single bug!() invocation in
 call_intrinsic_match

This reduces code size
---
 src/intrinsics/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 24e9ed338223..29b30631d0fc 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -88,13 +88,13 @@ macro call_intrinsic_match {
                         $ret.write_cvalue($fx, res);
 
                         return true;
-                    } else {
-                        bug!("wrong number of args for intrinsic {:?}", $intrinsic);
                     }
                 }
             )*
-            _ => false,
+            _ => return false,
         }
+
+        bug!("wrong number of args for intrinsic {:?}", $intrinsic);
     }
 }
 

From a1a164083ea9cdf8f3d6f053cdfb6b3355787c44 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 16:44:54 +0100
Subject: [PATCH 06/18] Move call_intrinsic_match macro into
 codegen_float_intrinsic_call

---
 src/intrinsics/mod.rs | 50 +++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 29b30631d0fc..27e3b1b11f1d 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -73,31 +73,6 @@ macro intrinsic_match {
     }
 }
 
-macro call_intrinsic_match {
-    ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $(
-        $name:ident($($arg:ident),*) -> $ty:ident => $func:ident,
-    )*) => {
-        match $intrinsic {
-            $(
-                sym::$name => {
-                    if let [$(ref $arg),*] = *$args {
-                        let ($($arg,)*) = (
-                            $(codegen_operand($fx, $arg),)*
-                        );
-                        let res = $fx.easy_call(stringify!($func), &[$($arg),*], $fx.tcx.types.$ty);
-                        $ret.write_cvalue($fx, res);
-
-                        return true;
-                    }
-                }
-            )*
-            _ => return false,
-        }
-
-        bug!("wrong number of args for intrinsic {:?}", $intrinsic);
-    }
-}
-
 macro validate_atomic_type($fx:ident, $intrinsic:ident, $span:ident, $ty:expr) {
     match $ty.kind() {
         ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
@@ -428,6 +403,31 @@ fn codegen_float_intrinsic_call<'tcx>(
     args: &[mir::Operand<'tcx>],
     ret: CPlace<'tcx>,
 ) -> bool {
+    macro call_intrinsic_match {
+        ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $(
+            $name:ident($($arg:ident),*) -> $ty:ident => $func:ident,
+        )*) => {
+            match $intrinsic {
+                $(
+                    sym::$name => {
+                        if let [$(ref $arg),*] = *$args {
+                            let ($($arg,)*) = (
+                                $(codegen_operand($fx, $arg),)*
+                            );
+                            let res = $fx.easy_call(stringify!($func), &[$($arg),*], $fx.tcx.types.$ty);
+                            $ret.write_cvalue($fx, res);
+
+                            return true;
+                        }
+                    }
+                )*
+                _ => return false,
+            }
+
+            bug!("wrong number of args for intrinsic {:?}", $intrinsic);
+        }
+    }
+
     call_intrinsic_match! {
         fx, intrinsic, ret, args,
         expf32(flt) -> f32 => expf,

From 300974714c96524806b44e36c5d6a7d0e854fc3e Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 17:11:28 +0100
Subject: [PATCH 07/18] Dedup write_cvalue calls in
 codegen_float_intrinsic_call

Also directly use an array instead of going through a tuple. This
reduces the amount of llvm ir lines for this function by almost half
from 3086 to 1662.
---
 src/intrinsics/mod.rs | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 27e3b1b11f1d..0d667847b9aa 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -393,7 +393,16 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
         let ret_block = fx.get_block(destination.expect("Float intrinsics don't diverge").1);
         fx.bcx.ins().jump(ret_block, &[]);
     } else {
-        codegen_regular_intrinsic_call(fx, instance, intrinsic, substs, args, ret, span, destination);
+        codegen_regular_intrinsic_call(
+            fx,
+            instance,
+            intrinsic,
+            substs,
+            args,
+            ret,
+            span,
+            destination,
+        );
     }
 }
 
@@ -407,24 +416,27 @@ fn codegen_float_intrinsic_call<'tcx>(
         ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $(
             $name:ident($($arg:ident),*) -> $ty:ident => $func:ident,
         )*) => {
-            match $intrinsic {
+            let res = match $intrinsic {
                 $(
                     sym::$name => {
                         if let [$(ref $arg),*] = *$args {
-                            let ($($arg,)*) = (
-                                $(codegen_operand($fx, $arg),)*
-                            );
-                            let res = $fx.easy_call(stringify!($func), &[$($arg),*], $fx.tcx.types.$ty);
-                            $ret.write_cvalue($fx, res);
-
-                            return true;
+                            let args = [$(codegen_operand($fx, $arg),)*];
+                            Some($fx.easy_call(stringify!($func), &args, $fx.tcx.types.$ty))
+                        } else {
+                            None
                         }
                     }
                 )*
                 _ => return false,
+            };
+
+            if let Some(res) = res {
+                $ret.write_cvalue($fx, res);
+            } else {
+                bug!("wrong number of args for intrinsic {:?}", $intrinsic);
             }
 
-            bug!("wrong number of args for intrinsic {:?}", $intrinsic);
+            true
         }
     }
 

From baad993daead9ddc127dd897035f6616f4e367f6 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 17:19:11 +0100
Subject: [PATCH 08/18] Dedup codegen_operand calls in
 codegen_float_intrinsic_call

This reduces the amount of llvm ir lines for this function by a little
over half from 1662 to 781.
---
 src/intrinsics/mod.rs | 105 +++++++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 47 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 0d667847b9aa..8da6c7ae9eb8 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -414,73 +414,84 @@ fn codegen_float_intrinsic_call<'tcx>(
 ) -> bool {
     macro call_intrinsic_match {
         ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $(
-            $name:ident($($arg:ident),*) -> $ty:ident => $func:ident,
+            $name:ident($arg_count:literal) -> $ty:ident => $func:ident,
         )*) => {
-            let res = match $intrinsic {
+            let (name, arg_count, ty) = match $intrinsic {
                 $(
-                    sym::$name => {
-                        if let [$(ref $arg),*] = *$args {
-                            let args = [$(codegen_operand($fx, $arg),)*];
-                            Some($fx.easy_call(stringify!($func), &args, $fx.tcx.types.$ty))
-                        } else {
-                            None
-                        }
-                    }
+                    sym::$name => (stringify!($func), $arg_count, $fx.tcx.types.$ty),
                 )*
                 _ => return false,
             };
 
-            if let Some(res) = res {
-                $ret.write_cvalue($fx, res);
-            } else {
+            if $args.len() != arg_count {
                 bug!("wrong number of args for intrinsic {:?}", $intrinsic);
             }
 
+            let (a, b, c);
+            let args = match $args {
+                [x] => {
+                    a = [codegen_operand($fx, x)];
+                    &a as &[_]
+                }
+                [x, y] => {
+                    b = [codegen_operand($fx, x), codegen_operand($fx, y)];
+                    &b
+                }
+                [x, y, z] => {
+                    c = [codegen_operand($fx, x), codegen_operand($fx, y), codegen_operand($fx, z)];
+                    &c
+                }
+                _ => unreachable!(),
+            };
+
+            let res = $fx.easy_call(name, &args, ty);
+            $ret.write_cvalue($fx, res);
+
             true
         }
     }
 
     call_intrinsic_match! {
         fx, intrinsic, ret, args,
-        expf32(flt) -> f32 => expf,
-        expf64(flt) -> f64 => exp,
-        exp2f32(flt) -> f32 => exp2f,
-        exp2f64(flt) -> f64 => exp2,
-        sqrtf32(flt) -> f32 => sqrtf,
-        sqrtf64(flt) -> f64 => sqrt,
-        powif32(a, x) -> f32 => __powisf2, // compiler-builtins
-        powif64(a, x) -> f64 => __powidf2, // compiler-builtins
-        powf32(a, x) -> f32 => powf,
-        powf64(a, x) -> f64 => pow,
-        logf32(flt) -> f32 => logf,
-        logf64(flt) -> f64 => log,
-        log2f32(flt) -> f32 => log2f,
-        log2f64(flt) -> f64 => log2,
-        log10f32(flt) -> f32 => log10f,
-        log10f64(flt) -> f64 => log10,
-        fabsf32(flt) -> f32 => fabsf,
-        fabsf64(flt) -> f64 => fabs,
-        fmaf32(x, y, z) -> f32 => fmaf,
-        fmaf64(x, y, z) -> f64 => fma,
-        copysignf32(x, y) -> f32 => copysignf,
-        copysignf64(x, y) -> f64 => copysign,
+        expf32(1) -> f32 => expf,
+        expf64(1) -> f64 => exp,
+        exp2f32(1) -> f32 => exp2f,
+        exp2f64(1) -> f64 => exp2,
+        sqrtf32(1) -> f32 => sqrtf,
+        sqrtf64(1) -> f64 => sqrt,
+        powif32(2) -> f32 => __powisf2, // compiler-builtins
+        powif64(2) -> f64 => __powidf2, // compiler-builtins
+        powf32(2) -> f32 => powf,
+        powf64(2) -> f64 => pow,
+        logf32(1) -> f32 => logf,
+        logf64(1) -> f64 => log,
+        log2f32(1) -> f32 => log2f,
+        log2f64(1) -> f64 => log2,
+        log10f32(1) -> f32 => log10f,
+        log10f64(1) -> f64 => log10,
+        fabsf32(1) -> f32 => fabsf,
+        fabsf64(1) -> f64 => fabs,
+        fmaf32(3) -> f32 => fmaf,
+        fmaf64(3) -> f64 => fma,
+        copysignf32(2) -> f32 => copysignf,
+        copysignf64(2) -> f64 => copysign,
 
         // rounding variants
         // FIXME use clif insts
-        floorf32(flt) -> f32 => floorf,
-        floorf64(flt) -> f64 => floor,
-        ceilf32(flt) -> f32 => ceilf,
-        ceilf64(flt) -> f64 => ceil,
-        truncf32(flt) -> f32 => truncf,
-        truncf64(flt) -> f64 => trunc,
-        roundf32(flt) -> f32 => roundf,
-        roundf64(flt) -> f64 => round,
+        floorf32(1) -> f32 => floorf,
+        floorf64(1) -> f64 => floor,
+        ceilf32(1) -> f32 => ceilf,
+        ceilf64(1) -> f64 => ceil,
+        truncf32(1) -> f32 => truncf,
+        truncf64(1) -> f64 => trunc,
+        roundf32(1) -> f32 => roundf,
+        roundf64(1) -> f64 => round,
 
         // trigonometry
-        sinf32(flt) -> f32 => sinf,
-        sinf64(flt) -> f64 => sin,
-        cosf32(flt) -> f32 => cosf,
-        cosf64(flt) -> f64 => cos,
+        sinf32(1) -> f32 => sinf,
+        sinf64(1) -> f64 => sin,
+        cosf32(1) -> f32 => cosf,
+        cosf64(1) -> f64 => cos,
     }
 }
 

From 9e6d8c1b244213c0a7677504ffeced9cc9c97e27 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 17:22:23 +0100
Subject: [PATCH 09/18] Remove the call_intrinsic_match macro

---
 src/intrinsics/mod.rs | 134 ++++++++++++++++++------------------------
 1 file changed, 58 insertions(+), 76 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 8da6c7ae9eb8..bd6ef41ef66c 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -412,87 +412,69 @@ fn codegen_float_intrinsic_call<'tcx>(
     args: &[mir::Operand<'tcx>],
     ret: CPlace<'tcx>,
 ) -> bool {
-    macro call_intrinsic_match {
-        ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $(
-            $name:ident($arg_count:literal) -> $ty:ident => $func:ident,
-        )*) => {
-            let (name, arg_count, ty) = match $intrinsic {
-                $(
-                    sym::$name => (stringify!($func), $arg_count, $fx.tcx.types.$ty),
-                )*
-                _ => return false,
-            };
+    let (name, arg_count, ty) = match intrinsic {
+        sym::expf32 => ("expf", 1, fx.tcx.types.f32),
+        sym::expf64 => ("exp", 1, fx.tcx.types.f64),
+        sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32),
+        sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64),
+        sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32),
+        sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64),
+        sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32), // compiler-builtins
+        sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64), // compiler-builtins
+        sym::powf32 => ("powf", 2, fx.tcx.types.f32),
+        sym::powf64 => ("pow", 2, fx.tcx.types.f64),
+        sym::logf32 => ("logf", 1, fx.tcx.types.f32),
+        sym::logf64 => ("log", 1, fx.tcx.types.f64),
+        sym::log2f32 => ("log2f", 1, fx.tcx.types.f32),
+        sym::log2f64 => ("log2", 1, fx.tcx.types.f64),
+        sym::log10f32 => ("log10f", 1, fx.tcx.types.f32),
+        sym::log10f64 => ("log10", 1, fx.tcx.types.f64),
+        sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32),
+        sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64),
+        sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32),
+        sym::fmaf64 => ("fma", 3, fx.tcx.types.f64),
+        sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32),
+        sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64),
+        sym::floorf32 => ("floorf", 1, fx.tcx.types.f32),
+        sym::floorf64 => ("floor", 1, fx.tcx.types.f64),
+        sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32),
+        sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64),
+        sym::truncf32 => ("truncf", 1, fx.tcx.types.f32),
+        sym::truncf64 => ("trunc", 1, fx.tcx.types.f64),
+        sym::roundf32 => ("roundf", 1, fx.tcx.types.f32),
+        sym::roundf64 => ("round", 1, fx.tcx.types.f64),
+        sym::sinf32 => ("sinf", 1, fx.tcx.types.f32),
+        sym::sinf64 => ("sin", 1, fx.tcx.types.f64),
+        sym::cosf32 => ("cosf", 1, fx.tcx.types.f32),
+        sym::cosf64 => ("cos", 1, fx.tcx.types.f64),
+        _ => return false,
+    };
 
-            if $args.len() != arg_count {
-                bug!("wrong number of args for intrinsic {:?}", $intrinsic);
-            }
+    if args.len() != arg_count {
+        bug!("wrong number of args for intrinsic {:?}", intrinsic);
+    }
 
-            let (a, b, c);
-            let args = match $args {
-                [x] => {
-                    a = [codegen_operand($fx, x)];
-                    &a as &[_]
-                }
-                [x, y] => {
-                    b = [codegen_operand($fx, x), codegen_operand($fx, y)];
-                    &b
-                }
-                [x, y, z] => {
-                    c = [codegen_operand($fx, x), codegen_operand($fx, y), codegen_operand($fx, z)];
-                    &c
-                }
-                _ => unreachable!(),
-            };
-
-            let res = $fx.easy_call(name, &args, ty);
-            $ret.write_cvalue($fx, res);
-
-            true
+    let (a, b, c);
+    let args = match args {
+        [x] => {
+            a = [codegen_operand(fx, x)];
+            &a as &[_]
         }
-    }
+        [x, y] => {
+            b = [codegen_operand(fx, x), codegen_operand(fx, y)];
+            &b
+        }
+        [x, y, z] => {
+            c = [codegen_operand(fx, x), codegen_operand(fx, y), codegen_operand(fx, z)];
+            &c
+        }
+        _ => unreachable!(),
+    };
 
-    call_intrinsic_match! {
-        fx, intrinsic, ret, args,
-        expf32(1) -> f32 => expf,
-        expf64(1) -> f64 => exp,
-        exp2f32(1) -> f32 => exp2f,
-        exp2f64(1) -> f64 => exp2,
-        sqrtf32(1) -> f32 => sqrtf,
-        sqrtf64(1) -> f64 => sqrt,
-        powif32(2) -> f32 => __powisf2, // compiler-builtins
-        powif64(2) -> f64 => __powidf2, // compiler-builtins
-        powf32(2) -> f32 => powf,
-        powf64(2) -> f64 => pow,
-        logf32(1) -> f32 => logf,
-        logf64(1) -> f64 => log,
-        log2f32(1) -> f32 => log2f,
-        log2f64(1) -> f64 => log2,
-        log10f32(1) -> f32 => log10f,
-        log10f64(1) -> f64 => log10,
-        fabsf32(1) -> f32 => fabsf,
-        fabsf64(1) -> f64 => fabs,
-        fmaf32(3) -> f32 => fmaf,
-        fmaf64(3) -> f64 => fma,
-        copysignf32(2) -> f32 => copysignf,
-        copysignf64(2) -> f64 => copysign,
+    let res = fx.easy_call(name, &args, ty);
+    ret.write_cvalue(fx, res);
 
-        // rounding variants
-        // FIXME use clif insts
-        floorf32(1) -> f32 => floorf,
-        floorf64(1) -> f64 => floor,
-        ceilf32(1) -> f32 => ceilf,
-        ceilf64(1) -> f64 => ceil,
-        truncf32(1) -> f32 => truncf,
-        truncf64(1) -> f64 => trunc,
-        roundf32(1) -> f32 => roundf,
-        roundf64(1) -> f64 => round,
-
-        // trigonometry
-        sinf32(1) -> f32 => sinf,
-        sinf64(1) -> f64 => sin,
-        cosf32(1) -> f32 => cosf,
-        cosf64(1) -> f64 => cos,
-    }
+    true
 }
 
 fn codegen_regular_intrinsic_call<'tcx>(

From 9295b086f6e2a42a7739bd522051060b0c12f885 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 17:29:16 +0100
Subject: [PATCH 10/18] Turn validate_simd_type into a function

This effectively outlines it, significantly reducing the size of
the codegen_simd_intrinsic_call llvm ir from 10419 lines to 6378 lines.
---
 src/intrinsics/mod.rs  |  8 ++---
 src/intrinsics/simd.rs | 80 +++++++++++++++++++++---------------------
 2 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index bd6ef41ef66c..f305942a8c3b 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -91,11 +91,11 @@ macro validate_atomic_type($fx:ident, $intrinsic:ident, $span:ident, $ty:expr) {
     }
 }
 
-macro validate_simd_type($fx:ident, $intrinsic:ident, $span:ident, $ty:expr) {
-    if !$ty.is_simd() {
-        $fx.tcx.sess.span_err($span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", $intrinsic, $ty));
+fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: Span, ty: Ty<'_>) {
+    if !ty.is_simd() {
+        fx.tcx.sess.span_err(span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty));
         // Prevent verifier error
-        crate::trap::trap_unreachable($fx, "compilation should not have succeeded");
+        crate::trap::trap_unreachable(fx, "compilation should not have succeeded");
         return;
     }
 }
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index d8dcf5d0ab98..9e42ff587bd7 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -21,7 +21,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_cast, (c a) {
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
                 let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();
 
@@ -34,27 +34,27 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_eq, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_cmp!(fx, Equal|Equal(x, y) -> ret);
         };
         simd_ne, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_cmp!(fx, NotEqual|NotEqual(x, y) -> ret);
         };
         simd_lt, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_cmp!(fx, UnsignedLessThan|SignedLessThan|LessThan(x, y) -> ret);
         };
         simd_le, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_cmp!(fx, UnsignedLessThanOrEqual|SignedLessThanOrEqual|LessThanOrEqual(x, y) -> ret);
         };
         simd_gt, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_cmp!(fx, UnsignedGreaterThan|SignedGreaterThan|GreaterThan(x, y) -> ret);
         };
         simd_ge, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_cmp!(
                 fx,
                 UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual|GreaterThanOrEqual
@@ -64,7 +64,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         // simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U
         _ if intrinsic.as_str().starts_with("simd_shuffle"), (c x, c y, o idx) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
 
             // If this intrinsic is the older "simd_shuffleN" form, simply parse the integer.
             // If there is no suffix, use the index array length.
@@ -166,7 +166,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_extract, (c v, o idx) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             let idx_const = if let Some(idx_const) = crate::constant::mir_operand_get_const_val(fx, idx) {
                 idx_const
             } else {
@@ -194,7 +194,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_neg, (c a) {
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
                 let ret_lane = match lane_layout.ty.kind() {
                     ty::Int(_) => fx.bcx.ins().ineg(lane),
@@ -206,7 +206,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_fabs, (c a) {
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
                 let ret_lane = fx.bcx.ins().fabs(lane);
                 CValue::by_val(ret_lane, ret_lane_layout)
@@ -214,7 +214,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_fsqrt, (c a) {
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
                 let ret_lane = fx.bcx.ins().sqrt(lane);
                 CValue::by_val(ret_lane, ret_lane_layout)
@@ -222,23 +222,23 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_add, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret);
         };
         simd_sub, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_int_flt_binop!(fx, isub|fsub(x, y) -> ret);
         };
         simd_mul, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_int_flt_binop!(fx, imul|fmul(x, y) -> ret);
         };
         simd_div, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_int_flt_binop!(fx, udiv|sdiv|fdiv(x, y) -> ret);
         };
         simd_rem, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
                 let res_lane = match lane_layout.ty.kind() {
                     ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
@@ -261,28 +261,28 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         };
         simd_shl, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_int_binop!(fx, ishl(x, y) -> ret);
         };
         simd_shr, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_int_binop!(fx, ushr|sshr(x, y) -> ret);
         };
         simd_and, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_int_binop!(fx, band(x, y) -> ret);
         };
         simd_or, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_int_binop!(fx, bor(x, y) -> ret);
         };
         simd_xor, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_int_binop!(fx, bxor(x, y) -> ret);
         };
 
         simd_fma, (c a, c b, c c) {
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             assert_eq!(a.layout(), b.layout());
             assert_eq!(a.layout(), c.layout());
             let layout = a.layout();
@@ -305,16 +305,16 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_fmin, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_flt_binop!(fx, fmin(x, y) -> ret);
         };
         simd_fmax, (c x, c y) {
-            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            validate_simd_type(fx, intrinsic, span, x.layout().ty);
             simd_flt_binop!(fx, fmax(x, y) -> ret);
         };
 
         simd_round, (c a) {
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
                 let res_lane = match lane_layout.ty.kind() {
                     ty::Float(FloatTy::F32) => fx.lib_call(
@@ -335,21 +335,21 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         };
         simd_ceil, (c a) {
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
                 let ret_lane = fx.bcx.ins().ceil(lane);
                 CValue::by_val(ret_lane, ret_lane_layout)
             });
         };
         simd_floor, (c a) {
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
                 let ret_lane = fx.bcx.ins().floor(lane);
                 CValue::by_val(ret_lane, ret_lane_layout)
             });
         };
         simd_trunc, (c a) {
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
                 let ret_lane = fx.bcx.ins().trunc(lane);
                 CValue::by_val(ret_lane, ret_lane_layout)
@@ -357,7 +357,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
                 if lane_layout.ty.is_floating_point() {
                     fx.bcx.ins().fadd(a, b)
@@ -368,7 +368,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
                 if lane_layout.ty.is_floating_point() {
                     fx.bcx.ins().fmul(a, b)
@@ -379,32 +379,32 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_reduce_all, (c v) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b));
         };
 
         simd_reduce_any, (c v) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
         };
 
         simd_reduce_and, (c v) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b));
         };
 
         simd_reduce_or, (c v) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b));
         };
 
         simd_reduce_xor, (c v) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
         };
 
         simd_reduce_min, (c v) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
                 let lt = match layout.ty.kind() {
                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
@@ -417,7 +417,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_reduce_max, (c v) {
-            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            validate_simd_type(fx, intrinsic, span, v.layout().ty);
             simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
                 let gt = match layout.ty.kind() {
                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
@@ -430,8 +430,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_select, (c m, c a, c b) {
-            validate_simd_type!(fx, intrinsic, span, m.layout().ty);
-            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            validate_simd_type(fx, intrinsic, span, m.layout().ty);
+            validate_simd_type(fx, intrinsic, span, a.layout().ty);
             assert_eq!(a.layout(), b.layout());
 
             let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);

From 4e3a8d5fb90dca1dd8462ea45b73d23ba0603f76 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 17:30:01 +0100
Subject: [PATCH 11/18] Move validate_simd_type from intrinsics to
 intrinsics::simd

---
 src/intrinsics/mod.rs  | 9 ---------
 src/intrinsics/simd.rs | 9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index f305942a8c3b..36490c370009 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -91,15 +91,6 @@ macro validate_atomic_type($fx:ident, $intrinsic:ident, $span:ident, $ty:expr) {
     }
 }
 
-fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: Span, ty: Ty<'_>) {
-    if !ty.is_simd() {
-        fx.tcx.sess.span_err(span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty));
-        // Prevent verifier error
-        crate::trap::trap_unreachable(fx, "compilation should not have succeeded");
-        return;
-    }
-}
-
 pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Option<Type> {
     let (element, count) = match layout.abi {
         Abi::Vector { element, count } => (element, count),
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index 9e42ff587bd7..8bc4dd8e615b 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -6,6 +6,15 @@ use rustc_span::Symbol;
 use super::*;
 use crate::prelude::*;
 
+fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: Span, ty: Ty<'_>) {
+    if !ty.is_simd() {
+        fx.tcx.sess.span_err(span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty));
+        // Prevent verifier error
+        crate::trap::trap_unreachable(fx, "compilation should not have succeeded");
+        return;
+    }
+}
+
 pub(super) fn codegen_simd_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     intrinsic: Symbol,

From 8ace43e65012a5e4a3e07d399a2a5832e18cf917 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 17:34:55 +0100
Subject: [PATCH 12/18] Move a couple of macros to intrinsics::simd

---
 src/intrinsics/mod.rs  | 115 -----------------------------------------
 src/intrinsics/simd.rs | 115 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+), 115 deletions(-)

diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 36490c370009..d6b35bba9e7f 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -234,121 +234,6 @@ fn bool_to_zero_or_max_uint<'tcx>(
     CValue::by_val(res, layout)
 }
 
-macro simd_cmp {
-    ($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
-        let vector_ty = clif_vector_type($fx.tcx, $x.layout());
-
-        if let Some(vector_ty) = vector_ty {
-            let x = $x.load_scalar($fx);
-            let y = $y.load_scalar($fx);
-            let val = if vector_ty.lane_type().is_float() {
-                $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y)
-            } else {
-                $fx.bcx.ins().icmp(IntCC::$cc, x, y)
-            };
-
-            // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1.
-            let val = $fx.bcx.ins().raw_bitcast(vector_ty, val);
-
-            $ret.write_cvalue($fx, CValue::by_val(val, $ret.layout()));
-        } else {
-            simd_pair_for_each_lane(
-                $fx,
-                $x,
-                $y,
-                $ret,
-                |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
-                    let res_lane = match lane_layout.ty.kind() {
-                        ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane),
-                        ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
-                        _ => unreachable!("{:?}", lane_layout.ty),
-                    };
-                    bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
-                },
-            );
-        }
-    },
-    ($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
-        // FIXME use vector icmp when possible
-        simd_pair_for_each_lane(
-            $fx,
-            $x,
-            $y,
-            $ret,
-            |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
-                let res_lane = match lane_layout.ty.kind() {
-                    ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
-                    ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
-                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
-                    _ => unreachable!("{:?}", lane_layout.ty),
-                };
-                bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
-            },
-        );
-    },
-}
-
-macro simd_int_binop {
-    ($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
-        simd_int_binop!($fx, $op|$op($x, $y) -> $ret);
-    },
-    ($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => {
-        simd_pair_for_each_lane(
-            $fx,
-            $x,
-            $y,
-            $ret,
-            |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
-                let res_lane = match lane_layout.ty.kind() {
-                    ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
-                    ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
-                    _ => unreachable!("{:?}", lane_layout.ty),
-                };
-                CValue::by_val(res_lane, ret_lane_layout)
-            },
-        );
-    },
-}
-
-macro simd_int_flt_binop {
-    ($fx:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
-        simd_int_flt_binop!($fx, $op|$op|$op_f($x, $y) -> $ret);
-    },
-    ($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
-        simd_pair_for_each_lane(
-            $fx,
-            $x,
-            $y,
-            $ret,
-            |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
-                let res_lane = match lane_layout.ty.kind() {
-                    ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
-                    ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
-                    ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
-                    _ => unreachable!("{:?}", lane_layout.ty),
-                };
-                CValue::by_val(res_lane, ret_lane_layout)
-            },
-        );
-    },
-}
-
-macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
-    simd_pair_for_each_lane(
-        $fx,
-        $x,
-        $y,
-        $ret,
-        |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
-            let res_lane = match lane_layout.ty.kind() {
-                ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
-                _ => unreachable!("{:?}", lane_layout.ty),
-            };
-            CValue::by_val(res_lane, ret_lane_layout)
-        },
-    );
-}
-
 pub(crate) fn codegen_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     instance: Instance<'tcx>,
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index 8bc4dd8e615b..181b45a87409 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -15,6 +15,121 @@ fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span:
     }
 }
 
+macro simd_cmp {
+    ($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
+        let vector_ty = clif_vector_type($fx.tcx, $x.layout());
+
+        if let Some(vector_ty) = vector_ty {
+            let x = $x.load_scalar($fx);
+            let y = $y.load_scalar($fx);
+            let val = if vector_ty.lane_type().is_float() {
+                $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y)
+            } else {
+                $fx.bcx.ins().icmp(IntCC::$cc, x, y)
+            };
+
+            // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1.
+            let val = $fx.bcx.ins().raw_bitcast(vector_ty, val);
+
+            $ret.write_cvalue($fx, CValue::by_val(val, $ret.layout()));
+        } else {
+            simd_pair_for_each_lane(
+                $fx,
+                $x,
+                $y,
+                $ret,
+                |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
+                    let res_lane = match lane_layout.ty.kind() {
+                        ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane),
+                        ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
+                        _ => unreachable!("{:?}", lane_layout.ty),
+                    };
+                    bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
+                },
+            );
+        }
+    },
+    ($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
+        // FIXME use vector icmp when possible
+        simd_pair_for_each_lane(
+            $fx,
+            $x,
+            $y,
+            $ret,
+            |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
+                    ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
+                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
+            },
+        );
+    },
+}
+
+macro simd_int_binop {
+    ($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
+        simd_int_binop!($fx, $op|$op($x, $y) -> $ret);
+    },
+    ($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => {
+        simd_pair_for_each_lane(
+            $fx,
+            $x,
+            $y,
+            $ret,
+            |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
+                    ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                CValue::by_val(res_lane, ret_lane_layout)
+            },
+        );
+    },
+}
+
+macro simd_int_flt_binop {
+    ($fx:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
+        simd_int_flt_binop!($fx, $op|$op|$op_f($x, $y) -> $ret);
+    },
+    ($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
+        simd_pair_for_each_lane(
+            $fx,
+            $x,
+            $y,
+            $ret,
+            |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
+                    ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
+                    ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                CValue::by_val(res_lane, ret_lane_layout)
+            },
+        );
+    },
+}
+
+macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
+    simd_pair_for_each_lane(
+        $fx,
+        $x,
+        $y,
+        $ret,
+        |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+            let res_lane = match lane_layout.ty.kind() {
+                ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
+                _ => unreachable!("{:?}", lane_layout.ty),
+            };
+            CValue::by_val(res_lane, ret_lane_layout)
+        },
+    );
+}
+
 pub(super) fn codegen_simd_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     intrinsic: Symbol,

From 78e2d4a275caec70a0b64bdc97084bacc3610076 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 17:39:00 +0100
Subject: [PATCH 13/18] Remove support for vector icmp for now

Real simd support will need an overhaul in the future anyway. For now it
only complicates the code.
---
 src/intrinsics/simd.rs | 46 ++++++++++++++----------------------------
 1 file changed, 15 insertions(+), 31 deletions(-)

diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index 181b45a87409..443e2954e511 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -17,37 +17,21 @@ fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span:
 
 macro simd_cmp {
     ($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
-        let vector_ty = clif_vector_type($fx.tcx, $x.layout());
-
-        if let Some(vector_ty) = vector_ty {
-            let x = $x.load_scalar($fx);
-            let y = $y.load_scalar($fx);
-            let val = if vector_ty.lane_type().is_float() {
-                $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y)
-            } else {
-                $fx.bcx.ins().icmp(IntCC::$cc, x, y)
-            };
-
-            // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1.
-            let val = $fx.bcx.ins().raw_bitcast(vector_ty, val);
-
-            $ret.write_cvalue($fx, CValue::by_val(val, $ret.layout()));
-        } else {
-            simd_pair_for_each_lane(
-                $fx,
-                $x,
-                $y,
-                $ret,
-                |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
-                    let res_lane = match lane_layout.ty.kind() {
-                        ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane),
-                        ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
-                        _ => unreachable!("{:?}", lane_layout.ty),
-                    };
-                    bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
-                },
-            );
-        }
+        // FIXME use vector icmp when possible
+        simd_pair_for_each_lane(
+            $fx,
+            $x,
+            $y,
+            $ret,
+            |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane),
+                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
+            },
+        );
     },
     ($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
         // FIXME use vector icmp when possible

From d4d2b24d5530c50aa80985938fe13e51e6db8750 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 17:44:55 +0100
Subject: [PATCH 14/18] Slightly simplify some macros by removing an extra case
 for when signedness doesn't matter

This is slightly more verbose when invoking the macro.
---
 src/intrinsics/simd.rs | 146 +++++++++++++++++------------------------
 1 file changed, 60 insertions(+), 86 deletions(-)

diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index 443e2954e511..bea99346b0a8 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -15,90 +15,64 @@ fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span:
     }
 }
 
-macro simd_cmp {
-    ($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
-        // FIXME use vector icmp when possible
-        simd_pair_for_each_lane(
-            $fx,
-            $x,
-            $y,
-            $ret,
-            |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
-                let res_lane = match lane_layout.ty.kind() {
-                    ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane),
-                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
-                    _ => unreachable!("{:?}", lane_layout.ty),
-                };
-                bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
-            },
-        );
-    },
-    ($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
-        // FIXME use vector icmp when possible
-        simd_pair_for_each_lane(
-            $fx,
-            $x,
-            $y,
-            $ret,
-            |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
-                let res_lane = match lane_layout.ty.kind() {
-                    ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
-                    ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
-                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
-                    _ => unreachable!("{:?}", lane_layout.ty),
-                };
-                bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
-            },
-        );
-    },
+macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) {
+    // FIXME use vector instructions when possible
+    simd_pair_for_each_lane(
+        $fx,
+        $x,
+        $y,
+        $ret,
+        |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
+            let res_lane = match lane_layout.ty.kind() {
+                ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
+                ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
+                ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
+                _ => unreachable!("{:?}", lane_layout.ty),
+            };
+            bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
+        },
+    );
 }
 
-macro simd_int_binop {
-    ($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
-        simd_int_binop!($fx, $op|$op($x, $y) -> $ret);
-    },
-    ($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => {
-        simd_pair_for_each_lane(
-            $fx,
-            $x,
-            $y,
-            $ret,
-            |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
-                let res_lane = match lane_layout.ty.kind() {
-                    ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
-                    ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
-                    _ => unreachable!("{:?}", lane_layout.ty),
-                };
-                CValue::by_val(res_lane, ret_lane_layout)
-            },
-        );
-    },
+macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) {
+    // FIXME use vector instructions when possible
+    simd_pair_for_each_lane(
+        $fx,
+        $x,
+        $y,
+        $ret,
+        |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+            let res_lane = match lane_layout.ty.kind() {
+                ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
+                ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
+                _ => unreachable!("{:?}", lane_layout.ty),
+            };
+            CValue::by_val(res_lane, ret_lane_layout)
+        },
+    );
 }
 
-macro simd_int_flt_binop {
-    ($fx:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
-        simd_int_flt_binop!($fx, $op|$op|$op_f($x, $y) -> $ret);
-    },
-    ($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
-        simd_pair_for_each_lane(
-            $fx,
-            $x,
-            $y,
-            $ret,
-            |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
-                let res_lane = match lane_layout.ty.kind() {
-                    ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
-                    ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
-                    ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
-                    _ => unreachable!("{:?}", lane_layout.ty),
-                };
-                CValue::by_val(res_lane, ret_lane_layout)
-            },
-        );
-    },
+macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) {
+    // FIXME use vector instructions when possible
+    simd_pair_for_each_lane(
+        $fx,
+        $x,
+        $y,
+        $ret,
+        |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+            let res_lane = match lane_layout.ty.kind() {
+                ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
+                ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
+                ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
+                _ => unreachable!("{:?}", lane_layout.ty),
+            };
+            CValue::by_val(res_lane, ret_lane_layout)
+        },
+    );
 }
 
 macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
+    // FIXME use vector instructions when possible
     simd_pair_for_each_lane(
         $fx,
         $x,
@@ -143,11 +117,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_eq, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_cmp!(fx, Equal|Equal(x, y) -> ret);
+            simd_cmp!(fx, Equal|Equal|Equal(x, y) -> ret);
         };
         simd_ne, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_cmp!(fx, NotEqual|NotEqual(x, y) -> ret);
+            simd_cmp!(fx, NotEqual|NotEqual|NotEqual(x, y) -> ret);
         };
         simd_lt, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
@@ -331,15 +305,15 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_add, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret);
+            simd_int_flt_binop!(fx, iadd|iadd|fadd(x, y) -> ret);
         };
         simd_sub, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_int_flt_binop!(fx, isub|fsub(x, y) -> ret);
+            simd_int_flt_binop!(fx, isub|isub|fsub(x, y) -> ret);
         };
         simd_mul, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_int_flt_binop!(fx, imul|fmul(x, y) -> ret);
+            simd_int_flt_binop!(fx, imul|imul|fmul(x, y) -> ret);
         };
         simd_div, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
@@ -370,7 +344,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
         simd_shl, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_int_binop!(fx, ishl(x, y) -> ret);
+            simd_int_binop!(fx, ishl|ishl(x, y) -> ret);
         };
         simd_shr, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
@@ -378,15 +352,15 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
         simd_and, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_int_binop!(fx, band(x, y) -> ret);
+            simd_int_binop!(fx, band|band(x, y) -> ret);
         };
         simd_or, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_int_binop!(fx, bor(x, y) -> ret);
+            simd_int_binop!(fx, bor|bor(x, y) -> ret);
         };
         simd_xor, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_int_binop!(fx, bxor(x, y) -> ret);
+            simd_int_binop!(fx, bxor|bxor(x, y) -> ret);
         };
 
         simd_fma, (c a, c b, c c) {

From 57d25ef60e7237e18092aea4081a11a4d5a28c1c Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:32:27 +0100
Subject: [PATCH 15/18] Use simplified version of bool_to_zero_or_max_uint in
 simd_cmp

---
 src/intrinsics/simd.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index bea99346b0a8..f38a30011d39 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -29,7 +29,13 @@ macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident)
                 ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
                 _ => unreachable!("{:?}", lane_layout.ty),
             };
-            bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
+
+            let ty = fx.clif_type(res_lane_layout.ty).unwrap();
+
+            let res_lane = fx.bcx.ins().bint(ty, res_lane);
+            let res_lane = fx.bcx.ins().ineg(res_lane);
+
+            CValue::by_val(res_lane, res_lane_layout)
         },
     );
 }

From b60eced4057791f9a5c94ac5c9a1c26d015dd2b7 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:43:08 +0100
Subject: [PATCH 16/18] Return Value instead of CValue from the
 simd_for_each_lane closure

---
 src/intrinsics/llvm.rs | 14 ++++----
 src/intrinsics/mod.rs  | 10 +++---
 src/intrinsics/simd.rs | 74 +++++++++++++++++-------------------------
 3 files changed, 42 insertions(+), 56 deletions(-)

diff --git a/src/intrinsics/llvm.rs b/src/intrinsics/llvm.rs
index be3704ca2768..13c7cf677edd 100644
--- a/src/intrinsics/llvm.rs
+++ b/src/intrinsics/llvm.rs
@@ -83,22 +83,20 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
         };
         "llvm.x86.sse2.psrli.d", (c a, o imm8) {
             let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, res_lane_layout, lane| {
-                let res_lane = match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| {
+                match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
                     imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
                     _ => fx.bcx.ins().iconst(types::I32, 0),
-                };
-                CValue::by_val(res_lane, res_lane_layout)
+                }
             });
         };
         "llvm.x86.sse2.pslli.d", (c a, o imm8) {
             let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, res_lane_layout, lane| {
-                let res_lane = match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| {
+                match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
                     imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
                     _ => fx.bcx.ins().iconst(types::I32, 0),
-                };
-                CValue::by_val(res_lane, res_lane_layout)
+                }
             });
         };
         "llvm.x86.sse2.storeu.dq", (v mem_addr, c a) {
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index d6b35bba9e7f..dee192a69af3 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -113,7 +113,7 @@ fn simd_for_each_lane<'tcx>(
         TyAndLayout<'tcx>,
         TyAndLayout<'tcx>,
         Value,
-    ) -> CValue<'tcx>,
+    ) -> Value,
 ) {
     let layout = val.layout();
 
@@ -127,6 +127,7 @@ fn simd_for_each_lane<'tcx>(
         let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
 
         let res_lane = f(fx, lane_layout, ret_lane_layout, lane);
+        let res_lane = CValue::by_val(res_lane, ret_lane_layout);
 
         ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
     }
@@ -143,7 +144,7 @@ fn simd_pair_for_each_lane<'tcx>(
         TyAndLayout<'tcx>,
         Value,
         Value,
-    ) -> CValue<'tcx>,
+    ) -> Value,
 ) {
     assert_eq!(x.layout(), y.layout());
     let layout = x.layout();
@@ -159,6 +160,7 @@ fn simd_pair_for_each_lane<'tcx>(
         let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx);
 
         let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane);
+        let res_lane = CValue::by_val(res_lane, ret_lane_layout);
 
         ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
     }
@@ -215,7 +217,7 @@ fn bool_to_zero_or_max_uint<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     layout: TyAndLayout<'tcx>,
     val: Value,
-) -> CValue<'tcx> {
+) -> Value {
     let ty = fx.clif_type(layout.ty).unwrap();
 
     let int_ty = match ty {
@@ -231,7 +233,7 @@ fn bool_to_zero_or_max_uint<'tcx>(
         res = fx.bcx.ins().bitcast(ty, res);
     }
 
-    CValue::by_val(res, layout)
+    res
 }
 
 pub(crate) fn codegen_intrinsic_call<'tcx>(
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index f38a30011d39..0ab48aaea1ad 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -33,9 +33,7 @@ macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident)
             let ty = fx.clif_type(res_lane_layout.ty).unwrap();
 
             let res_lane = fx.bcx.ins().bint(ty, res_lane);
-            let res_lane = fx.bcx.ins().ineg(res_lane);
-
-            CValue::by_val(res_lane, res_lane_layout)
+            fx.bcx.ins().ineg(res_lane)
         },
     );
 }
@@ -47,13 +45,12 @@ macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $r
         $x,
         $y,
         $ret,
-        |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
-            let res_lane = match lane_layout.ty.kind() {
+        |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
+            match lane_layout.ty.kind() {
                 ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
                 ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
                 _ => unreachable!("{:?}", lane_layout.ty),
-            };
-            CValue::by_val(res_lane, ret_lane_layout)
+            }
         },
     );
 }
@@ -65,14 +62,13 @@ macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident,
         $x,
         $y,
         $ret,
-        |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
-            let res_lane = match lane_layout.ty.kind() {
+        |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
+            match lane_layout.ty.kind() {
                 ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
                 ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
                 ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
                 _ => unreachable!("{:?}", lane_layout.ty),
-            };
-            CValue::by_val(res_lane, ret_lane_layout)
+            }
         },
     );
 }
@@ -84,12 +80,11 @@ macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
         $x,
         $y,
         $ret,
-        |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
-            let res_lane = match lane_layout.ty.kind() {
+        |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
+            match lane_layout.ty.kind() {
                 ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
                 _ => unreachable!("{:?}", lane_layout.ty),
-            };
-            CValue::by_val(res_lane, ret_lane_layout)
+            }
         },
     );
 }
@@ -116,8 +111,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                 let from_signed = type_sign(lane_layout.ty);
                 let to_signed = type_sign(ret_lane_layout.ty);
 
-                let ret_lane = clif_int_or_float_cast(fx, lane, from_signed, ret_lane_ty, to_signed);
-                CValue::by_val(ret_lane, ret_lane_layout)
+                clif_int_or_float_cast(fx, lane, from_signed, ret_lane_ty, to_signed)
             });
         };
 
@@ -283,29 +277,26 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_neg, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
-                let ret_lane = match lane_layout.ty.kind() {
+            simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| {
+                match lane_layout.ty.kind() {
                     ty::Int(_) => fx.bcx.ins().ineg(lane),
                     ty::Float(_) => fx.bcx.ins().fneg(lane),
                     _ => unreachable!(),
-                };
-                CValue::by_val(ret_lane, ret_lane_layout)
+                }
             });
         };
 
         simd_fabs, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
-                let ret_lane = fx.bcx.ins().fabs(lane);
-                CValue::by_val(ret_lane, ret_lane_layout)
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+                fx.bcx.ins().fabs(lane)
             });
         };
 
         simd_fsqrt, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
-                let ret_lane = fx.bcx.ins().sqrt(lane);
-                CValue::by_val(ret_lane, ret_lane_layout)
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+                fx.bcx.ins().sqrt(lane)
             });
         };
 
@@ -327,8 +318,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
         simd_rem, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
-                let res_lane = match lane_layout.ty.kind() {
+            simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
+                match lane_layout.ty.kind() {
                     ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
                     ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
                     ty::Float(FloatTy::F32) => fx.lib_call(
@@ -344,8 +335,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                         &[x_lane, y_lane],
                     )[0],
                     _ => unreachable!("{:?}", lane_layout.ty),
-                };
-                CValue::by_val(res_lane, ret_lane_layout)
+                }
             });
         };
         simd_shl, (c x, c y) {
@@ -403,8 +393,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_round, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
-                let res_lane = match lane_layout.ty.kind() {
+            simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| {
+                match lane_layout.ty.kind() {
                     ty::Float(FloatTy::F32) => fx.lib_call(
                         "roundf",
                         vec![AbiParam::new(types::F32)],
@@ -418,29 +408,25 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                         &[lane],
                     )[0],
                     _ => unreachable!("{:?}", lane_layout.ty),
-                };
-                CValue::by_val(res_lane, ret_lane_layout)
+                }
             });
         };
         simd_ceil, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
-                let ret_lane = fx.bcx.ins().ceil(lane);
-                CValue::by_val(ret_lane, ret_lane_layout)
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+                fx.bcx.ins().ceil(lane)
             });
         };
         simd_floor, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
-                let ret_lane = fx.bcx.ins().floor(lane);
-                CValue::by_val(ret_lane, ret_lane_layout)
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+                fx.bcx.ins().floor(lane)
             });
         };
         simd_trunc, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
-                let ret_lane = fx.bcx.ins().trunc(lane);
-                CValue::by_val(ret_lane, ret_lane_layout)
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+                fx.bcx.ins().trunc(lane)
             });
         };
 

From 2633024850e9b7fa8aa9a856953312bccc3740bc Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:55:57 +0100
Subject: [PATCH 17/18] Don't monomorphize the simd helpers for each closure

This halves the total amount of llvm ir lines for simd related functions
from 18227 to 9604.
---
 src/intrinsics/llvm.rs |  6 +++---
 src/intrinsics/mod.rs  |  8 ++++----
 src/intrinsics/simd.rs | 44 +++++++++++++++++++++---------------------
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/src/intrinsics/llvm.rs b/src/intrinsics/llvm.rs
index 13c7cf677edd..8bcfbc945fbf 100644
--- a/src/intrinsics/llvm.rs
+++ b/src/intrinsics/llvm.rs
@@ -73,7 +73,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
                 kind => unreachable!("kind {:?}", kind),
             };
 
-            simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
+            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
                 let res_lane = match lane_layout.ty.kind() {
                     ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
                     _ => unreachable!("{:?}", lane_layout.ty),
@@ -83,7 +83,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
         };
         "llvm.x86.sse2.psrli.d", (c a, o imm8) {
             let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
                 match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
                     imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
                     _ => fx.bcx.ins().iconst(types::I32, 0),
@@ -92,7 +92,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
         };
         "llvm.x86.sse2.pslli.d", (c a, o imm8) {
             let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
                 match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
                     imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
                     _ => fx.bcx.ins().iconst(types::I32, 0),
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index dee192a69af3..473afd168279 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -108,7 +108,7 @@ fn simd_for_each_lane<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     val: CValue<'tcx>,
     ret: CPlace<'tcx>,
-    f: impl Fn(
+    f: &dyn Fn(
         &mut FunctionCx<'_, '_, 'tcx>,
         TyAndLayout<'tcx>,
         TyAndLayout<'tcx>,
@@ -138,7 +138,7 @@ fn simd_pair_for_each_lane<'tcx>(
     x: CValue<'tcx>,
     y: CValue<'tcx>,
     ret: CPlace<'tcx>,
-    f: impl Fn(
+    f: &dyn Fn(
         &mut FunctionCx<'_, '_, 'tcx>,
         TyAndLayout<'tcx>,
         TyAndLayout<'tcx>,
@@ -171,7 +171,7 @@ fn simd_reduce<'tcx>(
     val: CValue<'tcx>,
     acc: Option<Value>,
     ret: CPlace<'tcx>,
-    f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
 ) {
     let (lane_count, lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
     let lane_layout = fx.layout_of(lane_ty);
@@ -192,7 +192,7 @@ fn simd_reduce_bool<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     val: CValue<'tcx>,
     ret: CPlace<'tcx>,
-    f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, Value, Value) -> Value,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Value, Value) -> Value,
 ) {
     let (lane_count, _lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
     assert!(ret.layout().ty.is_bool());
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index 0ab48aaea1ad..dc04c7643b2a 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -22,7 +22,7 @@ macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident)
         $x,
         $y,
         $ret,
-        |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
+        &|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
             let res_lane = match lane_layout.ty.kind() {
                 ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
                 ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
@@ -45,7 +45,7 @@ macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $r
         $x,
         $y,
         $ret,
-        |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
+        &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
             match lane_layout.ty.kind() {
                 ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
                 ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
@@ -62,7 +62,7 @@ macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident,
         $x,
         $y,
         $ret,
-        |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
+        &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
             match lane_layout.ty.kind() {
                 ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
                 ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
@@ -80,7 +80,7 @@ macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
         $x,
         $y,
         $ret,
-        |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
+        &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
             match lane_layout.ty.kind() {
                 ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
                 _ => unreachable!("{:?}", lane_layout.ty),
@@ -105,7 +105,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_cast, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, lane_layout, ret_lane_layout, lane| {
                 let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();
 
                 let from_signed = type_sign(lane_layout.ty);
@@ -277,7 +277,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_neg, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
                 match lane_layout.ty.kind() {
                     ty::Int(_) => fx.bcx.ins().ineg(lane),
                     ty::Float(_) => fx.bcx.ins().fneg(lane),
@@ -288,14 +288,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_fabs, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
                 fx.bcx.ins().fabs(lane)
             });
         };
 
         simd_fsqrt, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
                 fx.bcx.ins().sqrt(lane)
             });
         };
@@ -318,7 +318,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
         simd_rem, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
+            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
                 match lane_layout.ty.kind() {
                     ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
                     ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
@@ -393,7 +393,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_round, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
                 match lane_layout.ty.kind() {
                     ty::Float(FloatTy::F32) => fx.lib_call(
                         "roundf",
@@ -413,26 +413,26 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
         simd_ceil, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
                 fx.bcx.ins().ceil(lane)
             });
         };
         simd_floor, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
                 fx.bcx.ins().floor(lane)
             });
         };
         simd_trunc, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
                 fx.bcx.ins().trunc(lane)
             });
         };
 
         simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
+            simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
                 if lane_layout.ty.is_floating_point() {
                     fx.bcx.ins().fadd(a, b)
                 } else {
@@ -443,7 +443,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
+            simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
                 if lane_layout.ty.is_floating_point() {
                     fx.bcx.ins().fmul(a, b)
                 } else {
@@ -454,32 +454,32 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_reduce_all, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b));
+            simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().band(a, b));
         };
 
         simd_reduce_any, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
+            simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().bor(a, b));
         };
 
         simd_reduce_and, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b));
+            simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().band(a, b));
         };
 
         simd_reduce_or, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b));
+            simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bor(a, b));
         };
 
         simd_reduce_xor, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
+            simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
         };
 
         simd_reduce_min, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
+            simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
                 let lt = match layout.ty.kind() {
                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
                     ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
@@ -492,7 +492,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_reduce_max, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
+            simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
                 let gt = match layout.ty.kind() {
                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
                     ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),

From b7cda373d585d024b120401b2b796181567e5ae9 Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sun, 9 Jan 2022 19:07:15 +0100
Subject: [PATCH 18/18] Pass Ty instead of TyAndLayout to the closure of
 various simd helpers

This reduces the total amount of llvm ir lines for simd related
functions from 9604 to 9467.
---
 src/intrinsics/llvm.rs |  12 ++--
 src/intrinsics/mod.rs  |  27 +++-----
 src/intrinsics/simd.rs | 146 +++++++++++++++++------------------------
 3 files changed, 75 insertions(+), 110 deletions(-)

diff --git a/src/intrinsics/llvm.rs b/src/intrinsics/llvm.rs
index 8bcfbc945fbf..20f8699d12ab 100644
--- a/src/intrinsics/llvm.rs
+++ b/src/intrinsics/llvm.rs
@@ -73,17 +73,17 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
                 kind => unreachable!("kind {:?}", kind),
             };
 
-            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
-                let res_lane = match lane_layout.ty.kind() {
+            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
+                let res_lane = match lane_ty.kind() {
                     ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
-                    _ => unreachable!("{:?}", lane_layout.ty),
+                    _ => unreachable!("{:?}", lane_ty),
                 };
-                bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
+                bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane)
             });
         };
         "llvm.x86.sse2.psrli.d", (c a, o imm8) {
             let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
                 match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
                     imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
                     _ => fx.bcx.ins().iconst(types::I32, 0),
@@ -92,7 +92,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
         };
         "llvm.x86.sse2.pslli.d", (c a, o imm8) {
             let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
                 match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) {
                     imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
                     _ => fx.bcx.ins().iconst(types::I32, 0),
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 473afd168279..1e384668fc72 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -108,12 +108,7 @@ fn simd_for_each_lane<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     val: CValue<'tcx>,
     ret: CPlace<'tcx>,
-    f: &dyn Fn(
-        &mut FunctionCx<'_, '_, 'tcx>,
-        TyAndLayout<'tcx>,
-        TyAndLayout<'tcx>,
-        Value,
-    ) -> Value,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value) -> Value,
 ) {
     let layout = val.layout();
 
@@ -126,7 +121,7 @@ fn simd_for_each_lane<'tcx>(
     for lane_idx in 0..lane_count {
         let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
 
-        let res_lane = f(fx, lane_layout, ret_lane_layout, lane);
+        let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, lane);
         let res_lane = CValue::by_val(res_lane, ret_lane_layout);
 
         ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
@@ -138,13 +133,7 @@ fn simd_pair_for_each_lane<'tcx>(
     x: CValue<'tcx>,
     y: CValue<'tcx>,
     ret: CPlace<'tcx>,
-    f: &dyn Fn(
-        &mut FunctionCx<'_, '_, 'tcx>,
-        TyAndLayout<'tcx>,
-        TyAndLayout<'tcx>,
-        Value,
-        Value,
-    ) -> Value,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value) -> Value,
 ) {
     assert_eq!(x.layout(), y.layout());
     let layout = x.layout();
@@ -159,7 +148,7 @@ fn simd_pair_for_each_lane<'tcx>(
         let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx);
         let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx);
 
-        let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane);
+        let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, x_lane, y_lane);
         let res_lane = CValue::by_val(res_lane, ret_lane_layout);
 
         ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
@@ -171,7 +160,7 @@ fn simd_reduce<'tcx>(
     val: CValue<'tcx>,
     acc: Option<Value>,
     ret: CPlace<'tcx>,
-    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Value, Value) -> Value,
 ) {
     let (lane_count, lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
     let lane_layout = fx.layout_of(lane_ty);
@@ -181,7 +170,7 @@ fn simd_reduce<'tcx>(
         if let Some(acc) = acc { (acc, 0) } else { (val.value_lane(fx, 0).load_scalar(fx), 1) };
     for lane_idx in start_lane..lane_count {
         let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
-        res_val = f(fx, lane_layout, res_val, lane);
+        res_val = f(fx, lane_layout.ty, res_val, lane);
     }
     let res = CValue::by_val(res_val, lane_layout);
     ret.write_cvalue(fx, res);
@@ -215,10 +204,10 @@ fn simd_reduce_bool<'tcx>(
 
 fn bool_to_zero_or_max_uint<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
-    layout: TyAndLayout<'tcx>,
+    ty: Ty<'tcx>,
     val: Value,
 ) -> Value {
-    let ty = fx.clif_type(layout.ty).unwrap();
+    let ty = fx.clif_type(ty).unwrap();
 
     let int_ty = match ty {
         types::F32 => types::I32,
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index dc04c7643b2a..106a190096db 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -17,76 +17,52 @@ fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span:
 
 macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) {
     // FIXME use vector instructions when possible
-    simd_pair_for_each_lane(
-        $fx,
-        $x,
-        $y,
-        $ret,
-        &|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
-            let res_lane = match lane_layout.ty.kind() {
-                ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
-                ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
-                ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
-                _ => unreachable!("{:?}", lane_layout.ty),
-            };
+    simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
+        let res_lane = match lane_ty.kind() {
+            ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
+            ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
+            ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
+            _ => unreachable!("{:?}", lane_ty),
+        };
 
-            let ty = fx.clif_type(res_lane_layout.ty).unwrap();
+        let ty = fx.clif_type(res_lane_ty).unwrap();
 
-            let res_lane = fx.bcx.ins().bint(ty, res_lane);
-            fx.bcx.ins().ineg(res_lane)
-        },
-    );
+        let res_lane = fx.bcx.ins().bint(ty, res_lane);
+        fx.bcx.ins().ineg(res_lane)
+    });
 }
 
 macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) {
     // FIXME use vector instructions when possible
-    simd_pair_for_each_lane(
-        $fx,
-        $x,
-        $y,
-        $ret,
-        &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
-            match lane_layout.ty.kind() {
-                ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
-                ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
-                _ => unreachable!("{:?}", lane_layout.ty),
-            }
-        },
-    );
+    simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
+        match lane_ty.kind() {
+            ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
+            ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
+            _ => unreachable!("{:?}", lane_ty),
+        }
+    });
 }
 
 macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) {
     // FIXME use vector instructions when possible
-    simd_pair_for_each_lane(
-        $fx,
-        $x,
-        $y,
-        $ret,
-        &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
-            match lane_layout.ty.kind() {
-                ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
-                ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
-                ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
-                _ => unreachable!("{:?}", lane_layout.ty),
-            }
-        },
-    );
+    simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
+        match lane_ty.kind() {
+            ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
+            ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
+            ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
+            _ => unreachable!("{:?}", lane_ty),
+        }
+    });
 }
 
 macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
     // FIXME use vector instructions when possible
-    simd_pair_for_each_lane(
-        $fx,
-        $x,
-        $y,
-        $ret,
-        &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
-            match lane_layout.ty.kind() {
-                ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
-                _ => unreachable!("{:?}", lane_layout.ty),
-            }
-        },
-    );
+    simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
+        match lane_ty.kind() {
+            ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
+            _ => unreachable!("{:?}", lane_ty),
+        }
+    });
 }
 
 pub(super) fn codegen_simd_intrinsic_call<'tcx>(
@@ -105,13 +81,13 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_cast, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, &|fx, lane_layout, ret_lane_layout, lane| {
-                let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();
+            simd_for_each_lane(fx, a, ret, &|fx, lane_ty, ret_lane_ty, lane| {
+                let ret_lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap();
 
-                let from_signed = type_sign(lane_layout.ty);
-                let to_signed = type_sign(ret_lane_layout.ty);
+                let from_signed = type_sign(lane_ty);
+                let to_signed = type_sign(ret_lane_ty);
 
-                clif_int_or_float_cast(fx, lane, from_signed, ret_lane_ty, to_signed)
+                clif_int_or_float_cast(fx, lane, from_signed, ret_lane_clif_ty, to_signed)
             });
         };
 
@@ -277,8 +253,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_neg, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
-                match lane_layout.ty.kind() {
+            simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
+                match lane_ty.kind() {
                     ty::Int(_) => fx.bcx.ins().ineg(lane),
                     ty::Float(_) => fx.bcx.ins().fneg(lane),
                     _ => unreachable!(),
@@ -288,14 +264,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_fabs, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
                 fx.bcx.ins().fabs(lane)
             });
         };
 
         simd_fsqrt, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
                 fx.bcx.ins().sqrt(lane)
             });
         };
@@ -318,8 +294,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
         simd_rem, (c x, c y) {
             validate_simd_type(fx, intrinsic, span, x.layout().ty);
-            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| {
-                match lane_layout.ty.kind() {
+            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
+                match lane_ty.kind() {
                     ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
                     ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
                     ty::Float(FloatTy::F32) => fx.lib_call(
@@ -334,7 +310,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                         vec![AbiParam::new(types::F64)],
                         &[x_lane, y_lane],
                     )[0],
-                    _ => unreachable!("{:?}", lane_layout.ty),
+                    _ => unreachable!("{:?}", lane_ty),
                 }
             });
         };
@@ -393,8 +369,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_round, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| {
-                match lane_layout.ty.kind() {
+            simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
+                match lane_ty.kind() {
                     ty::Float(FloatTy::F32) => fx.lib_call(
                         "roundf",
                         vec![AbiParam::new(types::F32)],
@@ -407,33 +383,33 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                         vec![AbiParam::new(types::F64)],
                         &[lane],
                     )[0],
-                    _ => unreachable!("{:?}", lane_layout.ty),
+                    _ => unreachable!("{:?}", lane_ty),
                 }
             });
         };
         simd_ceil, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
                 fx.bcx.ins().ceil(lane)
             });
         };
         simd_floor, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
                 fx.bcx.ins().floor(lane)
             });
         };
         simd_trunc, (c a) {
             validate_simd_type(fx, intrinsic, span, a.layout().ty);
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| {
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| {
                 fx.bcx.ins().trunc(lane)
             });
         };
 
         simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
-                if lane_layout.ty.is_floating_point() {
+            simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
+                if lane_ty.is_floating_point() {
                     fx.bcx.ins().fadd(a, b)
                 } else {
                     fx.bcx.ins().iadd(a, b)
@@ -443,8 +419,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| {
-                if lane_layout.ty.is_floating_point() {
+            simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
+                if lane_ty.is_floating_point() {
                     fx.bcx.ins().fmul(a, b)
                 } else {
                     fx.bcx.ins().imul(a, b)
@@ -464,23 +440,23 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_reduce_and, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().band(a, b));
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().band(a, b));
         };
 
         simd_reduce_or, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bor(a, b));
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bor(a, b));
         };
 
         simd_reduce_xor, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
         };
 
         simd_reduce_min, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
-                let lt = match layout.ty.kind() {
+            simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
+                let lt = match ty.kind() {
                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
                     ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
                     ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::LessThan, a, b),
@@ -492,8 +468,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
         simd_reduce_max, (c v) {
             validate_simd_type(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, None, ret, &|fx, layout, a, b| {
-                let gt = match layout.ty.kind() {
+            simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
+                let gt = match ty.kind() {
                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
                     ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),
                     ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::GreaterThan, a, b),