From 99136301583c6c88e41ac517b9b4b37dadf1ec83 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 14:54:43 +0100 Subject: [PATCH 01/18] Reduce usage of subst types in the intrinsic code Using the arguments often saves a layout_of call --- src/intrinsics/mod.rs | 114 ++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 59 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index f4703b22ecbc..da9aa45069b8 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -41,19 +41,11 @@ macro intrinsic_arg { } } -macro intrinsic_substs { - ($substs:expr, $index:expr,) => {}, - ($substs:expr, $index:expr, $first:ident $(,$rest:ident)*) => { - let $first = $substs.type_at($index); - intrinsic_substs!($substs, $index+1, $($rest),*); - } -} - macro intrinsic_match { ($fx:expr, $intrinsic:expr, $substs:expr, $args:expr, _ => $unknown:block; $( - $($($name:tt).*)|+ $(if $cond:expr)?, $(<$($subst:ident),*>)? ($($a:ident $arg:ident),*) $content:block; + $($($name:tt).*)|+ $(if $cond:expr)?, ($($a:ident $arg:ident),*) $content:block; )*) => { let _ = $substs; // Silence warning when substs is unused. match $intrinsic { @@ -61,9 +53,6 @@ macro intrinsic_match { $(intrinsic_pat!($($name).*))|* $(if $cond)? => { #[allow(unused_parens, non_snake_case)] { - $( - intrinsic_substs!($substs, 0, $($subst),*); - )? if let [$($arg),*] = $args { let ($($arg,)*) = ( $(intrinsic_arg!($a $fx, $arg),)* @@ -492,7 +481,8 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( breakpoint, () { fx.bcx.ins().debugtrap(); }; - copy | copy_nonoverlapping, (v src, v dst, v count) { + copy | copy_nonoverlapping, (v src, v dst, v count) { + let elem_ty = substs.type_at(0); let elem_size: u64 = fx.layout_of(elem_ty).size.bytes(); assert_eq!(args.len(), 3); let byte_amount = if elem_size != 1 { @@ -510,7 +500,8 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( } }; // NOTE: the volatile variants have src and dst swapped - volatile_copy_memory | volatile_copy_nonoverlapping_memory, (v dst, v src, v count) { + volatile_copy_memory | volatile_copy_nonoverlapping_memory, (v dst, v src, v count) { + let elem_ty = substs.type_at(0); let elem_size: u64 = fx.layout_of(elem_ty).size.bytes(); assert_eq!(args.len(), 3); let byte_amount = if elem_size != 1 { @@ -528,8 +519,8 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( fx.bcx.call_memmove(fx.target_config, dst, src, byte_amount); } }; - size_of_val, (c ptr) { - let layout = fx.layout_of(T); + size_of_val, (c ptr) { + let layout = fx.layout_of(substs.type_at(0)); let size = if layout.is_unsized() { let (_ptr, info) = ptr.load_scalar_pair(fx); let (size, _align) = crate::unsize::size_and_align_of_dst(fx, layout, info); @@ -542,8 +533,8 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( }; ret.write_cvalue(fx, CValue::by_val(size, usize_layout)); }; - min_align_of_val, (c ptr) { - let layout = fx.layout_of(T); + min_align_of_val, (c ptr) { + let layout = fx.layout_of(substs.type_at(0)); let align = if layout.is_unsized() { let (_ptr, info) = ptr.load_scalar_pair(fx); let (_size, align) = crate::unsize::size_and_align_of_dst(fx, layout, info); @@ -589,7 +580,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( ); ret.write_cvalue(fx, res); }; - saturating_add | saturating_sub, (c lhs, c rhs) { + saturating_add | saturating_sub, (c lhs, c rhs) { assert_eq!(lhs.layout().ty, rhs.layout().ty); let bin_op = match intrinsic { sym::saturating_add => BinOp::Add, @@ -597,7 +588,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( _ => unreachable!(), }; - let signed = type_sign(T); + let signed = type_sign(lhs.layout().ty); let checked_res = crate::num::codegen_checked_int_binop( fx, @@ -607,7 +598,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( ); let (val, has_overflow) = checked_res.load_scalar_pair(fx); - let clif_ty = fx.clif_type(T).unwrap(); + let clif_ty = fx.clif_type(lhs.layout().ty).unwrap(); let (min, max) = type_min_max_value(&mut fx.bcx, clif_ty, signed); @@ -629,17 +620,19 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( _ => unreachable!(), }; - let res = CValue::by_val(val, fx.layout_of(T)); + let res = CValue::by_val(val, lhs.layout()); ret.write_cvalue(fx, res); }; - rotate_left, (v x, v y) { - let layout = fx.layout_of(T); + rotate_left, (c x, v y) { + let layout = x.layout(); + let x = x.load_scalar(fx); let res = fx.bcx.ins().rotl(x, y); ret.write_cvalue(fx, CValue::by_val(res, layout)); }; - rotate_right, (v x, v y) { - let layout = fx.layout_of(T); + rotate_right, (c x, v y) { + let layout = x.layout(); + let x = x.load_scalar(fx); let res = fx.bcx.ins().rotr(x, y); ret.write_cvalue(fx, CValue::by_val(res, layout)); }; @@ -675,29 +668,33 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( // FIXME use emit_small_memset fx.bcx.call_memset(fx.target_config, dst_ptr, val, count); }; - ctlz | ctlz_nonzero, (v arg) { + ctlz | ctlz_nonzero, (c arg) { + let val = arg.load_scalar(fx); // FIXME trap on `ctlz_nonzero` with zero arg. - let res = fx.bcx.ins().clz(arg); - let res = CValue::by_val(res, fx.layout_of(T)); + let res = fx.bcx.ins().clz(val); + let res = CValue::by_val(res, arg.layout()); ret.write_cvalue(fx, res); }; - cttz | cttz_nonzero, (v arg) { + cttz | cttz_nonzero, (c arg) { + let val = arg.load_scalar(fx); // FIXME trap on `cttz_nonzero` with zero arg. - let res = fx.bcx.ins().ctz(arg); - let res = CValue::by_val(res, fx.layout_of(T)); + let res = fx.bcx.ins().ctz(val); + let res = CValue::by_val(res, arg.layout()); ret.write_cvalue(fx, res); }; - ctpop, (v arg) { - let res = fx.bcx.ins().popcnt(arg); - let res = CValue::by_val(res, fx.layout_of(T)); + ctpop, (c arg) { + let val = arg.load_scalar(fx); + let res = fx.bcx.ins().popcnt(val); + let res = CValue::by_val(res, arg.layout()); ret.write_cvalue(fx, res); }; - bitreverse, (v arg) { - let res = fx.bcx.ins().bitrev(arg); - let res = CValue::by_val(res, fx.layout_of(T)); + bitreverse, (c arg) { + let val = arg.load_scalar(fx); + let res = fx.bcx.ins().bitrev(val); + let res = CValue::by_val(res, arg.layout()); ret.write_cvalue(fx, res); }; - bswap, (v arg) { + bswap, (c arg) { // FIXME(CraneStation/cranelift#794) add bswap instruction to cranelift fn swap(bcx: &mut FunctionBuilder<'_>, v: Value) -> Value { match bcx.func.dfg.value_type(v) { @@ -773,15 +770,16 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( ty => unreachable!("bswap {}", ty), } } - let res = CValue::by_val(swap(&mut fx.bcx, arg), fx.layout_of(T)); + let val = arg.load_scalar(fx); + let res = CValue::by_val(swap(&mut fx.bcx, val), arg.layout()); ret.write_cvalue(fx, res); }; - assert_inhabited | assert_zero_valid | assert_uninit_valid, () { - let layout = fx.layout_of(T); + assert_inhabited | assert_zero_valid | assert_uninit_valid, () { + let layout = fx.layout_of(substs.type_at(0)); if layout.abi.is_uninhabited() { with_no_trimmed_paths(|| crate::base::codegen_panic( fx, - &format!("attempted to instantiate uninhabited type `{}`", T), + &format!("attempted to instantiate uninhabited type `{}`", layout.ty), span, )); return; @@ -790,7 +788,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( if intrinsic == sym::assert_zero_valid && !layout.might_permit_raw_init(fx, /*zero:*/ true) { with_no_trimmed_paths(|| crate::base::codegen_panic( fx, - &format!("attempted to zero-initialize type `{}`, which is invalid", T), + &format!("attempted to zero-initialize type `{}`, which is invalid", layout.ty), span, )); return; @@ -799,7 +797,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( if intrinsic == sym::assert_uninit_valid && !layout.might_permit_raw_init(fx, /*zero:*/ false) { with_no_trimmed_paths(|| crate::base::codegen_panic( fx, - &format!("attempted to leave type `{}` uninitialized, which is invalid", T), + &format!("attempted to leave type `{}` uninitialized, which is invalid", layout.ty), span, )); return; @@ -832,10 +830,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( ret.write_cvalue(fx, val); }; - ptr_offset_from, (v ptr, v base) { + ptr_offset_from, (v ptr, v base) { + let ty = substs.type_at(0); let isize_layout = fx.layout_of(fx.tcx.types.isize); - let pointee_size: u64 = fx.layout_of(T).size.bytes(); + let pointee_size: u64 = fx.layout_of(ty).size.bytes(); let diff = fx.bcx.ins().isub(ptr, base); // FIXME this can be an exact division. let val = CValue::by_val(fx.bcx.ins().sdiv_imm(diff, pointee_size as i64), isize_layout); @@ -864,13 +863,14 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( // FIXME use a compiler fence once Cranelift supports it fx.bcx.ins().fence(); }; - _ if intrinsic.as_str().starts_with("atomic_load"), (v ptr) { - validate_atomic_type!(fx, intrinsic, span, T); - let ty = fx.clif_type(T).unwrap(); + _ if intrinsic.as_str().starts_with("atomic_load"), (v ptr) { + let ty = substs.type_at(0); + validate_atomic_type!(fx, intrinsic, span, ty); + let clif_ty = fx.clif_type(ty).unwrap(); - let val = fx.bcx.ins().atomic_load(ty, MemFlags::trusted(), ptr); + let val = fx.bcx.ins().atomic_load(clif_ty, MemFlags::trusted(), ptr); - let val = CValue::by_val(val, fx.layout_of(T)); + let val = CValue::by_val(val, fx.layout_of(ty)); ret.write_cvalue(fx, val); }; _ if intrinsic.as_str().starts_with("atomic_store"), (v ptr, c val) { @@ -1101,18 +1101,14 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( ret.write_cvalue(fx, CValue::by_val(res, ret.layout())); }; - raw_eq, (v lhs_ref, v rhs_ref) { - fn type_by_size(size: Size) -> Option { - Type::int(size.bits().try_into().ok()?) - } - - let size = fx.layout_of(T).layout.size; + raw_eq, (v lhs_ref, v rhs_ref) { + let size = fx.layout_of(substs.type_at(0)).layout.size; // FIXME add and use emit_small_memcmp let is_eq_value = if size == Size::ZERO { // No bytes means they're trivially equal fx.bcx.ins().iconst(types::I8, 1) - } else if let Some(clty) = type_by_size(size) { + } else if let Some(clty) = size.bits().try_into().ok().and_then(Type::int) { // Can't use `trusted` for these loads; they could be unaligned. let mut flags = MemFlags::new(); flags.set_notrap(); From c5b969583ff12cfd0cd85e2923753101024b72ad Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 15:17:42 +0100 Subject: [PATCH 02/18] Split codegen_intrinsic_call function This should reduce compile times of cg_clif --- src/intrinsics/mod.rs | 49 +++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index da9aa45069b8..517deba58050 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -73,7 +73,7 @@ macro intrinsic_match { } macro call_intrinsic_match { - ($fx:expr, $intrinsic:expr, $substs:expr, $ret:expr, $destination:expr, $args:expr, $( + ($fx:expr, $intrinsic:expr, $substs:expr, $ret:expr, $args:expr, $( $name:ident($($arg:ident),*) -> $ty:ident => $func:ident, )*) => { match $intrinsic { @@ -87,19 +87,13 @@ macro call_intrinsic_match { let res = $fx.easy_call(stringify!($func), &[$($arg),*], $fx.tcx.types.$ty); $ret.write_cvalue($fx, res); - if let Some((_, dest)) = $destination { - let ret_block = $fx.get_block(dest); - $fx.bcx.ins().jump(ret_block, &[]); - return; - } else { - unreachable!(); - } + return true; } else { bug!("wrong number of args for intrinsic {:?}", $intrinsic); } } )* - _ => {} + _ => false, } } } @@ -397,7 +391,6 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( span: Span, ) { let intrinsic = fx.tcx.item_name(instance.def_id()); - let substs = instance.substs; let ret = match destination { Some((place, _)) => place, @@ -420,13 +413,27 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( self::simd::codegen_simd_intrinsic_call(fx, instance, args, ret, span); let ret_block = fx.get_block(destination.expect("SIMD intrinsics don't diverge").1); fx.bcx.ins().jump(ret_block, &[]); - return; + } else if codegen_float_intrinsic_call(fx, instance, args, ret) { + let ret_block = fx.get_block(destination.expect("Float intrinsics don't diverge").1); + fx.bcx.ins().jump(ret_block, &[]); + } else { + codegen_regular_intrinsic_call(fx, instance, args, ret, span, destination); } +} - let usize_layout = fx.layout_of(fx.tcx.types.usize); +fn codegen_float_intrinsic_call<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + instance: Instance<'tcx>, + args: &[mir::Operand<'tcx>], + ret: CPlace<'tcx>, +) -> bool { + let def_id = instance.def_id(); + let substs = instance.substs; + + let intrinsic = fx.tcx.item_name(def_id); call_intrinsic_match! { - fx, intrinsic, substs, ret, destination, args, + fx, intrinsic, substs, ret, args, expf32(flt) -> f32 => expf, expf64(flt) -> f64 => exp, exp2f32(flt) -> f32 => exp2f, @@ -467,6 +474,22 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( cosf32(flt) -> f32 => cosf, cosf64(flt) -> f64 => cos, } +} + +fn codegen_regular_intrinsic_call<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + instance: Instance<'tcx>, + args: &[mir::Operand<'tcx>], + ret: CPlace<'tcx>, + span: Span, + destination: Option<(CPlace<'tcx>, BasicBlock)>, +) { + let def_id = instance.def_id(); + let substs = instance.substs; + + let intrinsic = fx.tcx.item_name(def_id); + + let usize_layout = fx.layout_of(fx.tcx.types.usize); intrinsic_match! { fx, intrinsic, substs, args, From 70cc24254500f783ddd08e65e6abcf21e27c0c27 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 15:22:46 +0100 Subject: [PATCH 03/18] Remove a couple of duplicate calls --- src/intrinsics/mod.rs | 25 ++++++++++--------------- src/intrinsics/simd.rs | 11 +++++------ 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 517deba58050..cfe3e7bb9201 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -9,7 +9,8 @@ pub(crate) use cpuid::codegen_cpuid_call; pub(crate) use llvm::codegen_llvm_intrinsic_call; use rustc_middle::ty::print::with_no_trimmed_paths; -use rustc_span::symbol::{kw, sym}; +use rustc_middle::ty::subst::SubstsRef; +use rustc_span::symbol::{kw, sym, Symbol}; use crate::prelude::*; use cranelift_codegen::ir::AtomicRmwOp; @@ -391,6 +392,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( span: Span, ) { let intrinsic = fx.tcx.item_name(instance.def_id()); + let substs = instance.substs; let ret = match destination { Some((place, _)) => place, @@ -410,28 +412,24 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( }; if intrinsic.as_str().starts_with("simd_") { - self::simd::codegen_simd_intrinsic_call(fx, instance, args, ret, span); + self::simd::codegen_simd_intrinsic_call(fx, intrinsic, substs, args, ret, span); let ret_block = fx.get_block(destination.expect("SIMD intrinsics don't diverge").1); fx.bcx.ins().jump(ret_block, &[]); - } else if codegen_float_intrinsic_call(fx, instance, args, ret) { + } else if codegen_float_intrinsic_call(fx, intrinsic, substs, args, ret) { let ret_block = fx.get_block(destination.expect("Float intrinsics don't diverge").1); fx.bcx.ins().jump(ret_block, &[]); } else { - codegen_regular_intrinsic_call(fx, instance, args, ret, span, destination); + codegen_regular_intrinsic_call(fx, instance, intrinsic, substs, args, ret, span, destination); } } fn codegen_float_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, - instance: Instance<'tcx>, + intrinsic: Symbol, + substs: SubstsRef<'tcx>, args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, ) -> bool { - let def_id = instance.def_id(); - let substs = instance.substs; - - let intrinsic = fx.tcx.item_name(def_id); - call_intrinsic_match! { fx, intrinsic, substs, ret, args, expf32(flt) -> f32 => expf, @@ -479,16 +477,13 @@ fn codegen_float_intrinsic_call<'tcx>( fn codegen_regular_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, instance: Instance<'tcx>, + intrinsic: Symbol, + substs: SubstsRef<'tcx>, args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, span: Span, destination: Option<(CPlace<'tcx>, BasicBlock)>, ) { - let def_id = instance.def_id(); - let substs = instance.substs; - - let intrinsic = fx.tcx.item_name(def_id); - let usize_layout = fx.layout_of(fx.tcx.types.usize); intrinsic_match! { diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index 6c0631d9ecbd..d8dcf5d0ab98 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -1,20 +1,19 @@ //! Codegen `extern "platform-intrinsic"` intrinsics. +use rustc_middle::ty::subst::SubstsRef; +use rustc_span::Symbol; + use super::*; use crate::prelude::*; pub(super) fn codegen_simd_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, - instance: Instance<'tcx>, + intrinsic: Symbol, + substs: SubstsRef<'tcx>, args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, span: Span, ) { - let def_id = instance.def_id(); - let substs = instance.substs; - - let intrinsic = fx.tcx.item_name(def_id); - intrinsic_match! { fx, intrinsic, substs, args, _ => { From 409e3eb2cbc7a18e1a9e6bc607766ef18cd79dfc Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 15:24:10 +0100 Subject: [PATCH 04/18] Remove unnecessary argument --- src/intrinsics/mod.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index cfe3e7bb9201..24e9ed338223 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -74,13 +74,12 @@ macro intrinsic_match { } macro call_intrinsic_match { - ($fx:expr, $intrinsic:expr, $substs:expr, $ret:expr, $args:expr, $( + ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $( $name:ident($($arg:ident),*) -> $ty:ident => $func:ident, )*) => { match $intrinsic { $( sym::$name => { - assert!($substs.is_noop()); if let [$(ref $arg),*] = *$args { let ($($arg,)*) = ( $(codegen_operand($fx, $arg),)* @@ -415,7 +414,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( self::simd::codegen_simd_intrinsic_call(fx, intrinsic, substs, args, ret, span); let ret_block = fx.get_block(destination.expect("SIMD intrinsics don't diverge").1); fx.bcx.ins().jump(ret_block, &[]); - } else if codegen_float_intrinsic_call(fx, intrinsic, substs, args, ret) { + } else if codegen_float_intrinsic_call(fx, intrinsic, args, ret) { let ret_block = fx.get_block(destination.expect("Float intrinsics don't diverge").1); fx.bcx.ins().jump(ret_block, &[]); } else { @@ -426,12 +425,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( fn codegen_float_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, intrinsic: Symbol, - substs: SubstsRef<'tcx>, args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, ) -> bool { call_intrinsic_match! { - fx, intrinsic, substs, ret, args, + fx, intrinsic, ret, args, expf32(flt) -> f32 => expf, expf64(flt) -> f64 => exp, exp2f32(flt) -> f32 => exp2f, From 046e094842b4c1b046aad66750838304c017796b Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 15:31:44 +0100 Subject: [PATCH 05/18] Only use a single bug!() invocation in call_intrinsic_match This reduces code size --- src/intrinsics/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 24e9ed338223..29b30631d0fc 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -88,13 +88,13 @@ macro call_intrinsic_match { $ret.write_cvalue($fx, res); return true; - } else { - bug!("wrong number of args for intrinsic {:?}", $intrinsic); } } )* - _ => false, + _ => return false, } + + bug!("wrong number of args for intrinsic {:?}", $intrinsic); } } From a1a164083ea9cdf8f3d6f053cdfb6b3355787c44 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 16:44:54 +0100 Subject: [PATCH 06/18] Move call_intrinsic_match macro into codegen_float_intrinsic_call --- src/intrinsics/mod.rs | 50 +++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 29b30631d0fc..27e3b1b11f1d 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -73,31 +73,6 @@ macro intrinsic_match { } } -macro call_intrinsic_match { - ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $( - $name:ident($($arg:ident),*) -> $ty:ident => $func:ident, - )*) => { - match $intrinsic { - $( - sym::$name => { - if let [$(ref $arg),*] = *$args { - let ($($arg,)*) = ( - $(codegen_operand($fx, $arg),)* - ); - let res = $fx.easy_call(stringify!($func), &[$($arg),*], $fx.tcx.types.$ty); - $ret.write_cvalue($fx, res); - - return true; - } - } - )* - _ => return false, - } - - bug!("wrong number of args for intrinsic {:?}", $intrinsic); - } -} - macro validate_atomic_type($fx:ident, $intrinsic:ident, $span:ident, $ty:expr) { match $ty.kind() { ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {} @@ -428,6 +403,31 @@ fn codegen_float_intrinsic_call<'tcx>( args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, ) -> bool { + macro call_intrinsic_match { + ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $( + $name:ident($($arg:ident),*) -> $ty:ident => $func:ident, + )*) => { + match $intrinsic { + $( + sym::$name => { + if let [$(ref $arg),*] = *$args { + let ($($arg,)*) = ( + $(codegen_operand($fx, $arg),)* + ); + let res = $fx.easy_call(stringify!($func), &[$($arg),*], $fx.tcx.types.$ty); + $ret.write_cvalue($fx, res); + + return true; + } + } + )* + _ => return false, + } + + bug!("wrong number of args for intrinsic {:?}", $intrinsic); + } + } + call_intrinsic_match! { fx, intrinsic, ret, args, expf32(flt) -> f32 => expf, From 300974714c96524806b44e36c5d6a7d0e854fc3e Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 17:11:28 +0100 Subject: [PATCH 07/18] Dedup write_cvalue calls in codegen_float_intrinsic_call Also directly use an array instead of going through a tuple. This reduces the amount of llvm ir lines for this function by almost half from 3086 to 1662. --- src/intrinsics/mod.rs | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 27e3b1b11f1d..0d667847b9aa 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -393,7 +393,16 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( let ret_block = fx.get_block(destination.expect("Float intrinsics don't diverge").1); fx.bcx.ins().jump(ret_block, &[]); } else { - codegen_regular_intrinsic_call(fx, instance, intrinsic, substs, args, ret, span, destination); + codegen_regular_intrinsic_call( + fx, + instance, + intrinsic, + substs, + args, + ret, + span, + destination, + ); } } @@ -407,24 +416,27 @@ fn codegen_float_intrinsic_call<'tcx>( ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $( $name:ident($($arg:ident),*) -> $ty:ident => $func:ident, )*) => { - match $intrinsic { + let res = match $intrinsic { $( sym::$name => { if let [$(ref $arg),*] = *$args { - let ($($arg,)*) = ( - $(codegen_operand($fx, $arg),)* - ); - let res = $fx.easy_call(stringify!($func), &[$($arg),*], $fx.tcx.types.$ty); - $ret.write_cvalue($fx, res); - - return true; + let args = [$(codegen_operand($fx, $arg),)*]; + Some($fx.easy_call(stringify!($func), &args, $fx.tcx.types.$ty)) + } else { + None } } )* _ => return false, + }; + + if let Some(res) = res { + $ret.write_cvalue($fx, res); + } else { + bug!("wrong number of args for intrinsic {:?}", $intrinsic); } - bug!("wrong number of args for intrinsic {:?}", $intrinsic); + true } } From baad993daead9ddc127dd897035f6616f4e367f6 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 17:19:11 +0100 Subject: [PATCH 08/18] Dedup codegen_operand calls in codegen_float_intrinsic_call This reduces the amount of llvm ir lines for this function by a little over half from 1662 to 781. --- src/intrinsics/mod.rs | 105 +++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 0d667847b9aa..8da6c7ae9eb8 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -414,73 +414,84 @@ fn codegen_float_intrinsic_call<'tcx>( ) -> bool { macro call_intrinsic_match { ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $( - $name:ident($($arg:ident),*) -> $ty:ident => $func:ident, + $name:ident($arg_count:literal) -> $ty:ident => $func:ident, )*) => { - let res = match $intrinsic { + let (name, arg_count, ty) = match $intrinsic { $( - sym::$name => { - if let [$(ref $arg),*] = *$args { - let args = [$(codegen_operand($fx, $arg),)*]; - Some($fx.easy_call(stringify!($func), &args, $fx.tcx.types.$ty)) - } else { - None - } - } + sym::$name => (stringify!($func), $arg_count, $fx.tcx.types.$ty), )* _ => return false, }; - if let Some(res) = res { - $ret.write_cvalue($fx, res); - } else { + if $args.len() != arg_count { bug!("wrong number of args for intrinsic {:?}", $intrinsic); } + let (a, b, c); + let args = match $args { + [x] => { + a = [codegen_operand($fx, x)]; + &a as &[_] + } + [x, y] => { + b = [codegen_operand($fx, x), codegen_operand($fx, y)]; + &b + } + [x, y, z] => { + c = [codegen_operand($fx, x), codegen_operand($fx, y), codegen_operand($fx, z)]; + &c + } + _ => unreachable!(), + }; + + let res = $fx.easy_call(name, &args, ty); + $ret.write_cvalue($fx, res); + true } } call_intrinsic_match! { fx, intrinsic, ret, args, - expf32(flt) -> f32 => expf, - expf64(flt) -> f64 => exp, - exp2f32(flt) -> f32 => exp2f, - exp2f64(flt) -> f64 => exp2, - sqrtf32(flt) -> f32 => sqrtf, - sqrtf64(flt) -> f64 => sqrt, - powif32(a, x) -> f32 => __powisf2, // compiler-builtins - powif64(a, x) -> f64 => __powidf2, // compiler-builtins - powf32(a, x) -> f32 => powf, - powf64(a, x) -> f64 => pow, - logf32(flt) -> f32 => logf, - logf64(flt) -> f64 => log, - log2f32(flt) -> f32 => log2f, - log2f64(flt) -> f64 => log2, - log10f32(flt) -> f32 => log10f, - log10f64(flt) -> f64 => log10, - fabsf32(flt) -> f32 => fabsf, - fabsf64(flt) -> f64 => fabs, - fmaf32(x, y, z) -> f32 => fmaf, - fmaf64(x, y, z) -> f64 => fma, - copysignf32(x, y) -> f32 => copysignf, - copysignf64(x, y) -> f64 => copysign, + expf32(1) -> f32 => expf, + expf64(1) -> f64 => exp, + exp2f32(1) -> f32 => exp2f, + exp2f64(1) -> f64 => exp2, + sqrtf32(1) -> f32 => sqrtf, + sqrtf64(1) -> f64 => sqrt, + powif32(2) -> f32 => __powisf2, // compiler-builtins + powif64(2) -> f64 => __powidf2, // compiler-builtins + powf32(2) -> f32 => powf, + powf64(2) -> f64 => pow, + logf32(1) -> f32 => logf, + logf64(1) -> f64 => log, + log2f32(1) -> f32 => log2f, + log2f64(1) -> f64 => log2, + log10f32(1) -> f32 => log10f, + log10f64(1) -> f64 => log10, + fabsf32(1) -> f32 => fabsf, + fabsf64(1) -> f64 => fabs, + fmaf32(3) -> f32 => fmaf, + fmaf64(3) -> f64 => fma, + copysignf32(2) -> f32 => copysignf, + copysignf64(2) -> f64 => copysign, // rounding variants // FIXME use clif insts - floorf32(flt) -> f32 => floorf, - floorf64(flt) -> f64 => floor, - ceilf32(flt) -> f32 => ceilf, - ceilf64(flt) -> f64 => ceil, - truncf32(flt) -> f32 => truncf, - truncf64(flt) -> f64 => trunc, - roundf32(flt) -> f32 => roundf, - roundf64(flt) -> f64 => round, + floorf32(1) -> f32 => floorf, + floorf64(1) -> f64 => floor, + ceilf32(1) -> f32 => ceilf, + ceilf64(1) -> f64 => ceil, + truncf32(1) -> f32 => truncf, + truncf64(1) -> f64 => trunc, + roundf32(1) -> f32 => roundf, + roundf64(1) -> f64 => round, // trigonometry - sinf32(flt) -> f32 => sinf, - sinf64(flt) -> f64 => sin, - cosf32(flt) -> f32 => cosf, - cosf64(flt) -> f64 => cos, + sinf32(1) -> f32 => sinf, + sinf64(1) -> f64 => sin, + cosf32(1) -> f32 => cosf, + cosf64(1) -> f64 => cos, } } From 9e6d8c1b244213c0a7677504ffeced9cc9c97e27 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 17:22:23 +0100 Subject: [PATCH 09/18] Remove the call_intrinsic_match macro --- src/intrinsics/mod.rs | 134 ++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 76 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 8da6c7ae9eb8..bd6ef41ef66c 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -412,87 +412,69 @@ fn codegen_float_intrinsic_call<'tcx>( args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, ) -> bool { - macro call_intrinsic_match { - ($fx:expr, $intrinsic:expr, $ret:expr, $args:expr, $( - $name:ident($arg_count:literal) -> $ty:ident => $func:ident, - )*) => { - let (name, arg_count, ty) = match $intrinsic { - $( - sym::$name => (stringify!($func), $arg_count, $fx.tcx.types.$ty), - )* - _ => return false, - }; + let (name, arg_count, ty) = match intrinsic { + sym::expf32 => ("expf", 1, fx.tcx.types.f32), + sym::expf64 => ("exp", 1, fx.tcx.types.f64), + sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32), + sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64), + sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32), + sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64), + sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32), // compiler-builtins + sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64), // compiler-builtins + sym::powf32 => ("powf", 2, fx.tcx.types.f32), + sym::powf64 => ("pow", 2, fx.tcx.types.f64), + sym::logf32 => ("logf", 1, fx.tcx.types.f32), + sym::logf64 => ("log", 1, fx.tcx.types.f64), + sym::log2f32 => ("log2f", 1, fx.tcx.types.f32), + sym::log2f64 => ("log2", 1, fx.tcx.types.f64), + sym::log10f32 => ("log10f", 1, fx.tcx.types.f32), + sym::log10f64 => ("log10", 1, fx.tcx.types.f64), + sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32), + sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64), + sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32), + sym::fmaf64 => ("fma", 3, fx.tcx.types.f64), + sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32), + sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64), + sym::floorf32 => ("floorf", 1, fx.tcx.types.f32), + sym::floorf64 => ("floor", 1, fx.tcx.types.f64), + sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32), + sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64), + sym::truncf32 => ("truncf", 1, fx.tcx.types.f32), + sym::truncf64 => ("trunc", 1, fx.tcx.types.f64), + sym::roundf32 => ("roundf", 1, fx.tcx.types.f32), + sym::roundf64 => ("round", 1, fx.tcx.types.f64), + sym::sinf32 => ("sinf", 1, fx.tcx.types.f32), + sym::sinf64 => ("sin", 1, fx.tcx.types.f64), + sym::cosf32 => ("cosf", 1, fx.tcx.types.f32), + sym::cosf64 => ("cos", 1, fx.tcx.types.f64), + _ => return false, + }; - if $args.len() != arg_count { - bug!("wrong number of args for intrinsic {:?}", $intrinsic); - } + if args.len() != arg_count { + bug!("wrong number of args for intrinsic {:?}", intrinsic); + } - let (a, b, c); - let args = match $args { - [x] => { - a = [codegen_operand($fx, x)]; - &a as &[_] - } - [x, y] => { - b = [codegen_operand($fx, x), codegen_operand($fx, y)]; - &b - } - [x, y, z] => { - c = [codegen_operand($fx, x), codegen_operand($fx, y), codegen_operand($fx, z)]; - &c - } - _ => unreachable!(), - }; - - let res = $fx.easy_call(name, &args, ty); - $ret.write_cvalue($fx, res); - - true + let (a, b, c); + let args = match args { + [x] => { + a = [codegen_operand(fx, x)]; + &a as &[_] } - } + [x, y] => { + b = [codegen_operand(fx, x), codegen_operand(fx, y)]; + &b + } + [x, y, z] => { + c = [codegen_operand(fx, x), codegen_operand(fx, y), codegen_operand(fx, z)]; + &c + } + _ => unreachable!(), + }; - call_intrinsic_match! { - fx, intrinsic, ret, args, - expf32(1) -> f32 => expf, - expf64(1) -> f64 => exp, - exp2f32(1) -> f32 => exp2f, - exp2f64(1) -> f64 => exp2, - sqrtf32(1) -> f32 => sqrtf, - sqrtf64(1) -> f64 => sqrt, - powif32(2) -> f32 => __powisf2, // compiler-builtins - powif64(2) -> f64 => __powidf2, // compiler-builtins - powf32(2) -> f32 => powf, - powf64(2) -> f64 => pow, - logf32(1) -> f32 => logf, - logf64(1) -> f64 => log, - log2f32(1) -> f32 => log2f, - log2f64(1) -> f64 => log2, - log10f32(1) -> f32 => log10f, - log10f64(1) -> f64 => log10, - fabsf32(1) -> f32 => fabsf, - fabsf64(1) -> f64 => fabs, - fmaf32(3) -> f32 => fmaf, - fmaf64(3) -> f64 => fma, - copysignf32(2) -> f32 => copysignf, - copysignf64(2) -> f64 => copysign, + let res = fx.easy_call(name, &args, ty); + ret.write_cvalue(fx, res); - // rounding variants - // FIXME use clif insts - floorf32(1) -> f32 => floorf, - floorf64(1) -> f64 => floor, - ceilf32(1) -> f32 => ceilf, - ceilf64(1) -> f64 => ceil, - truncf32(1) -> f32 => truncf, - truncf64(1) -> f64 => trunc, - roundf32(1) -> f32 => roundf, - roundf64(1) -> f64 => round, - - // trigonometry - sinf32(1) -> f32 => sinf, - sinf64(1) -> f64 => sin, - cosf32(1) -> f32 => cosf, - cosf64(1) -> f64 => cos, - } + true } fn codegen_regular_intrinsic_call<'tcx>( From 9295b086f6e2a42a7739bd522051060b0c12f885 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 17:29:16 +0100 Subject: [PATCH 10/18] Turn validate_simd_type into a function This effectively outlines it, significantly reducing the size of the codegen_simd_intrinsic_call llvm ir from 10419 lines to 6378 lines. --- src/intrinsics/mod.rs | 8 ++--- src/intrinsics/simd.rs | 80 +++++++++++++++++++++--------------------- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index bd6ef41ef66c..f305942a8c3b 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -91,11 +91,11 @@ macro validate_atomic_type($fx:ident, $intrinsic:ident, $span:ident, $ty:expr) { } } -macro validate_simd_type($fx:ident, $intrinsic:ident, $span:ident, $ty:expr) { - if !$ty.is_simd() { - $fx.tcx.sess.span_err($span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", $intrinsic, $ty)); +fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: Span, ty: Ty<'_>) { + if !ty.is_simd() { + fx.tcx.sess.span_err(span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty)); // Prevent verifier error - crate::trap::trap_unreachable($fx, "compilation should not have succeeded"); + crate::trap::trap_unreachable(fx, "compilation should not have succeeded"); return; } } diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index d8dcf5d0ab98..9e42ff587bd7 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -21,7 +21,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_cast, (c a) { - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| { let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap(); @@ -34,27 +34,27 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_eq, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_cmp!(fx, Equal|Equal(x, y) -> ret); }; simd_ne, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_cmp!(fx, NotEqual|NotEqual(x, y) -> ret); }; simd_lt, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_cmp!(fx, UnsignedLessThan|SignedLessThan|LessThan(x, y) -> ret); }; simd_le, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_cmp!(fx, UnsignedLessThanOrEqual|SignedLessThanOrEqual|LessThanOrEqual(x, y) -> ret); }; simd_gt, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_cmp!(fx, UnsignedGreaterThan|SignedGreaterThan|GreaterThan(x, y) -> ret); }; simd_ge, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_cmp!( fx, UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual|GreaterThanOrEqual @@ -64,7 +64,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( // simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U _ if intrinsic.as_str().starts_with("simd_shuffle"), (c x, c y, o idx) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); // If this intrinsic is the older "simd_shuffleN" form, simply parse the integer. // If there is no suffix, use the index array length. @@ -166,7 +166,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_extract, (c v, o idx) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); let idx_const = if let Some(idx_const) = crate::constant::mir_operand_get_const_val(fx, idx) { idx_const } else { @@ -194,7 +194,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_neg, (c a) { - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| { let ret_lane = match lane_layout.ty.kind() { ty::Int(_) => fx.bcx.ins().ineg(lane), @@ -206,7 +206,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_fabs, (c a) { - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { let ret_lane = fx.bcx.ins().fabs(lane); CValue::by_val(ret_lane, ret_lane_layout) @@ -214,7 +214,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_fsqrt, (c a) { - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { let ret_lane = fx.bcx.ins().sqrt(lane); CValue::by_val(ret_lane, ret_lane_layout) @@ -222,23 +222,23 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_add, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret); }; simd_sub, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_int_flt_binop!(fx, isub|fsub(x, y) -> ret); }; simd_mul, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_int_flt_binop!(fx, imul|fmul(x, y) -> ret); }; simd_div, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_int_flt_binop!(fx, udiv|sdiv|fdiv(x, y) -> ret); }; simd_rem, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { let res_lane = match lane_layout.ty.kind() { ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane), @@ -261,28 +261,28 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }); }; simd_shl, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_int_binop!(fx, ishl(x, y) -> ret); }; simd_shr, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_int_binop!(fx, ushr|sshr(x, y) -> ret); }; simd_and, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_int_binop!(fx, band(x, y) -> ret); }; simd_or, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_int_binop!(fx, bor(x, y) -> ret); }; simd_xor, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_int_binop!(fx, bxor(x, y) -> ret); }; simd_fma, (c a, c b, c c) { - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); assert_eq!(a.layout(), b.layout()); assert_eq!(a.layout(), c.layout()); let layout = a.layout(); @@ -305,16 +305,16 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_fmin, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_flt_binop!(fx, fmin(x, y) -> ret); }; simd_fmax, (c x, c y) { - validate_simd_type!(fx, intrinsic, span, x.layout().ty); + validate_simd_type(fx, intrinsic, span, x.layout().ty); simd_flt_binop!(fx, fmax(x, y) -> ret); }; simd_round, (c a) { - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| { let res_lane = match lane_layout.ty.kind() { ty::Float(FloatTy::F32) => fx.lib_call( @@ -335,21 +335,21 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }); }; simd_ceil, (c a) { - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { let ret_lane = fx.bcx.ins().ceil(lane); CValue::by_val(ret_lane, ret_lane_layout) }); }; simd_floor, (c a) { - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { let ret_lane = fx.bcx.ins().floor(lane); CValue::by_val(ret_lane, ret_lane_layout) }); }; simd_trunc, (c a) { - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { let ret_lane = fx.bcx.ins().trunc(lane); CValue::by_val(ret_lane, ret_lane_layout) @@ -357,7 +357,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| { if lane_layout.ty.is_floating_point() { fx.bcx.ins().fadd(a, b) @@ -368,7 +368,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| { if lane_layout.ty.is_floating_point() { fx.bcx.ins().fmul(a, b) @@ -379,32 +379,32 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_reduce_all, (c v) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b)); }; simd_reduce_any, (c v) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b)); }; simd_reduce_and, (c v) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b)); }; simd_reduce_or, (c v) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b)); }; simd_reduce_xor, (c v) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b)); }; simd_reduce_min, (c v) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); simd_reduce(fx, v, None, ret, |fx, layout, a, b| { let lt = match layout.ty.kind() { ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b), @@ -417,7 +417,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_reduce_max, (c v) { - validate_simd_type!(fx, intrinsic, span, v.layout().ty); + validate_simd_type(fx, intrinsic, span, v.layout().ty); simd_reduce(fx, v, None, ret, |fx, layout, a, b| { let gt = match layout.ty.kind() { ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b), @@ -430,8 +430,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_select, (c m, c a, c b) { - validate_simd_type!(fx, intrinsic, span, m.layout().ty); - validate_simd_type!(fx, intrinsic, span, a.layout().ty); + validate_simd_type(fx, intrinsic, span, m.layout().ty); + validate_simd_type(fx, intrinsic, span, a.layout().ty); assert_eq!(a.layout(), b.layout()); let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); From 4e3a8d5fb90dca1dd8462ea45b73d23ba0603f76 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 17:30:01 +0100 Subject: [PATCH 11/18] Move validate_simd_type from intrinsics to intrinsics::simd --- src/intrinsics/mod.rs | 9 --------- src/intrinsics/simd.rs | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index f305942a8c3b..36490c370009 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -91,15 +91,6 @@ macro validate_atomic_type($fx:ident, $intrinsic:ident, $span:ident, $ty:expr) { } } -fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: Span, ty: Ty<'_>) { - if !ty.is_simd() { - fx.tcx.sess.span_err(span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty)); - // Prevent verifier error - crate::trap::trap_unreachable(fx, "compilation should not have succeeded"); - return; - } -} - pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Option { let (element, count) = match layout.abi { Abi::Vector { element, count } => (element, count), diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index 9e42ff587bd7..8bc4dd8e615b 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -6,6 +6,15 @@ use rustc_span::Symbol; use super::*; use crate::prelude::*; +fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: Span, ty: Ty<'_>) { + if !ty.is_simd() { + fx.tcx.sess.span_err(span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty)); + // Prevent verifier error + crate::trap::trap_unreachable(fx, "compilation should not have succeeded"); + return; + } +} + pub(super) fn codegen_simd_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, intrinsic: Symbol, From 8ace43e65012a5e4a3e07d399a2a5832e18cf917 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 17:34:55 +0100 Subject: [PATCH 12/18] Move a couple of macros to intrinsics::simd --- src/intrinsics/mod.rs | 115 ----------------------------------------- src/intrinsics/simd.rs | 115 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 115 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 36490c370009..d6b35bba9e7f 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -234,121 +234,6 @@ fn bool_to_zero_or_max_uint<'tcx>( CValue::by_val(res, layout) } -macro simd_cmp { - ($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => { - let vector_ty = clif_vector_type($fx.tcx, $x.layout()); - - if let Some(vector_ty) = vector_ty { - let x = $x.load_scalar($fx); - let y = $y.load_scalar($fx); - let val = if vector_ty.lane_type().is_float() { - $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y) - } else { - $fx.bcx.ins().icmp(IntCC::$cc, x, y) - }; - - // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1. - let val = $fx.bcx.ins().raw_bitcast(vector_ty, val); - - $ret.write_cvalue($fx, CValue::by_val(val, $ret.layout())); - } else { - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, res_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane), - ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) - }, - ); - } - }, - ($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => { - // FIXME use vector icmp when possible - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, res_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane), - ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane), - ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) - }, - ); - }, -} - -macro simd_int_binop { - ($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_int_binop!($fx, $op|$op($x, $y) -> $ret); - }, - ($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), - ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) - }, - ); - }, -} - -macro simd_int_flt_binop { - ($fx:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_int_flt_binop!($fx, $op|$op|$op_f($x, $y) -> $ret); - }, - ($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), - ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), - ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) - }, - ); - }, -} - -macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) { - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) - }, - ); -} - pub(crate) fn codegen_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, instance: Instance<'tcx>, diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index 8bc4dd8e615b..181b45a87409 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -15,6 +15,121 @@ fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: } } +macro simd_cmp { + ($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => { + let vector_ty = clif_vector_type($fx.tcx, $x.layout()); + + if let Some(vector_ty) = vector_ty { + let x = $x.load_scalar($fx); + let y = $y.load_scalar($fx); + let val = if vector_ty.lane_type().is_float() { + $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y) + } else { + $fx.bcx.ins().icmp(IntCC::$cc, x, y) + }; + + // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1. + let val = $fx.bcx.ins().raw_bitcast(vector_ty, val); + + $ret.write_cvalue($fx, CValue::by_val(val, $ret.layout())); + } else { + simd_pair_for_each_lane( + $fx, + $x, + $y, + $ret, + |fx, lane_layout, res_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) + }, + ); + } + }, + ($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => { + // FIXME use vector icmp when possible + simd_pair_for_each_lane( + $fx, + $x, + $y, + $ret, + |fx, lane_layout, res_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) + }, + ); + }, +} + +macro simd_int_binop { + ($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_int_binop!($fx, $op|$op($x, $y) -> $ret); + }, + ($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_pair_for_each_lane( + $fx, + $x, + $y, + $ret, + |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }, + ); + }, +} + +macro simd_int_flt_binop { + ($fx:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_int_flt_binop!($fx, $op|$op|$op_f($x, $y) -> $ret); + }, + ($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { + simd_pair_for_each_lane( + $fx, + $x, + $y, + $ret, + |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }, + ); + }, +} + +macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) { + simd_pair_for_each_lane( + $fx, + $x, + $y, + $ret, + |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }, + ); +} + pub(super) fn codegen_simd_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, intrinsic: Symbol, From 78e2d4a275caec70a0b64bdc97084bacc3610076 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 17:39:00 +0100 Subject: [PATCH 13/18] Remove support for vector icmp for now Real simd support will need an overhaul in the future anyway. For now it only complicates the code. --- src/intrinsics/simd.rs | 46 ++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index 181b45a87409..443e2954e511 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -17,37 +17,21 @@ fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: macro simd_cmp { ($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => { - let vector_ty = clif_vector_type($fx.tcx, $x.layout()); - - if let Some(vector_ty) = vector_ty { - let x = $x.load_scalar($fx); - let y = $y.load_scalar($fx); - let val = if vector_ty.lane_type().is_float() { - $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y) - } else { - $fx.bcx.ins().icmp(IntCC::$cc, x, y) - }; - - // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1. - let val = $fx.bcx.ins().raw_bitcast(vector_ty, val); - - $ret.write_cvalue($fx, CValue::by_val(val, $ret.layout())); - } else { - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, res_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane), - ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) - }, - ); - } + // FIXME use vector icmp when possible + simd_pair_for_each_lane( + $fx, + $x, + $y, + $ret, + |fx, lane_layout, res_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) + }, + ); }, ($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => { // FIXME use vector icmp when possible From d4d2b24d5530c50aa80985938fe13e51e6db8750 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 17:44:55 +0100 Subject: [PATCH 14/18] Slightly simplify some macros by removing an extra case for when signedness doesn't matter This is slightly more verbose when invoking the macro. --- src/intrinsics/simd.rs | 146 +++++++++++++++++------------------------ 1 file changed, 60 insertions(+), 86 deletions(-) diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index 443e2954e511..bea99346b0a8 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -15,90 +15,64 @@ fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: } } -macro simd_cmp { - ($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => { - // FIXME use vector icmp when possible - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, res_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane), - ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) - }, - ); - }, - ($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => { - // FIXME use vector icmp when possible - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, res_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane), - ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane), - ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) - }, - ); - }, +macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) { + // FIXME use vector instructions when possible + simd_pair_for_each_lane( + $fx, + $x, + $y, + $ret, + |fx, lane_layout, res_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) + }, + ); } -macro simd_int_binop { - ($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_int_binop!($fx, $op|$op($x, $y) -> $ret); - }, - ($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), - ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) - }, - ); - }, +macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) { + // FIXME use vector instructions when possible + simd_pair_for_each_lane( + $fx, + $x, + $y, + $ret, + |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }, + ); } -macro simd_int_flt_binop { - ($fx:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_int_flt_binop!($fx, $op|$op|$op_f($x, $y) -> $ret); - }, - ($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), - ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), - ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) - }, - ); - }, +macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) { + // FIXME use vector instructions when possible + simd_pair_for_each_lane( + $fx, + $x, + $y, + $ret, + |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }, + ); } macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) { + // FIXME use vector instructions when possible simd_pair_for_each_lane( $fx, $x, @@ -143,11 +117,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_eq, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_cmp!(fx, Equal|Equal(x, y) -> ret); + simd_cmp!(fx, Equal|Equal|Equal(x, y) -> ret); }; simd_ne, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_cmp!(fx, NotEqual|NotEqual(x, y) -> ret); + simd_cmp!(fx, NotEqual|NotEqual|NotEqual(x, y) -> ret); }; simd_lt, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); @@ -331,15 +305,15 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_add, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret); + simd_int_flt_binop!(fx, iadd|iadd|fadd(x, y) -> ret); }; simd_sub, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_int_flt_binop!(fx, isub|fsub(x, y) -> ret); + simd_int_flt_binop!(fx, isub|isub|fsub(x, y) -> ret); }; simd_mul, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_int_flt_binop!(fx, imul|fmul(x, y) -> ret); + simd_int_flt_binop!(fx, imul|imul|fmul(x, y) -> ret); }; simd_div, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); @@ -370,7 +344,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_shl, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_int_binop!(fx, ishl(x, y) -> ret); + simd_int_binop!(fx, ishl|ishl(x, y) -> ret); }; simd_shr, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); @@ -378,15 +352,15 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_and, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_int_binop!(fx, band(x, y) -> ret); + simd_int_binop!(fx, band|band(x, y) -> ret); }; simd_or, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_int_binop!(fx, bor(x, y) -> ret); + simd_int_binop!(fx, bor|bor(x, y) -> ret); }; simd_xor, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_int_binop!(fx, bxor(x, y) -> ret); + simd_int_binop!(fx, bxor|bxor(x, y) -> ret); }; simd_fma, (c a, c b, c c) { From 57d25ef60e7237e18092aea4081a11a4d5a28c1c Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 18:32:27 +0100 Subject: [PATCH 15/18] Use simplified version of bool_to_zero_or_max_uint in simd_cmp --- src/intrinsics/simd.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index bea99346b0a8..f38a30011d39 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -29,7 +29,13 @@ macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), _ => unreachable!("{:?}", lane_layout.ty), }; - bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) + + let ty = fx.clif_type(res_lane_layout.ty).unwrap(); + + let res_lane = fx.bcx.ins().bint(ty, res_lane); + let res_lane = fx.bcx.ins().ineg(res_lane); + + CValue::by_val(res_lane, res_lane_layout) }, ); } From b60eced4057791f9a5c94ac5c9a1c26d015dd2b7 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 18:43:08 +0100 Subject: [PATCH 16/18] Return Value instead of CValue from the simd_for_each_lane closure --- src/intrinsics/llvm.rs | 14 ++++---- src/intrinsics/mod.rs | 10 +++--- src/intrinsics/simd.rs | 74 +++++++++++++++++------------------------- 3 files changed, 42 insertions(+), 56 deletions(-) diff --git a/src/intrinsics/llvm.rs b/src/intrinsics/llvm.rs index be3704ca2768..13c7cf677edd 100644 --- a/src/intrinsics/llvm.rs +++ b/src/intrinsics/llvm.rs @@ -83,22 +83,20 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( }; "llvm.x86.sse2.psrli.d", (c a, o imm8) { let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const"); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, res_lane_layout, lane| { - let res_lane = match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) { + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| { + match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) { imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), _ => fx.bcx.ins().iconst(types::I32, 0), - }; - CValue::by_val(res_lane, res_lane_layout) + } }); }; "llvm.x86.sse2.pslli.d", (c a, o imm8) { let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const"); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, res_lane_layout, lane| { - let res_lane = match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) { + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| { + match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) { imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), _ => fx.bcx.ins().iconst(types::I32, 0), - }; - CValue::by_val(res_lane, res_lane_layout) + } }); }; "llvm.x86.sse2.storeu.dq", (v mem_addr, c a) { diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index d6b35bba9e7f..dee192a69af3 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -113,7 +113,7 @@ fn simd_for_each_lane<'tcx>( TyAndLayout<'tcx>, TyAndLayout<'tcx>, Value, - ) -> CValue<'tcx>, + ) -> Value, ) { let layout = val.layout(); @@ -127,6 +127,7 @@ fn simd_for_each_lane<'tcx>( let lane = val.value_lane(fx, lane_idx).load_scalar(fx); let res_lane = f(fx, lane_layout, ret_lane_layout, lane); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); } @@ -143,7 +144,7 @@ fn simd_pair_for_each_lane<'tcx>( TyAndLayout<'tcx>, Value, Value, - ) -> CValue<'tcx>, + ) -> Value, ) { assert_eq!(x.layout(), y.layout()); let layout = x.layout(); @@ -159,6 +160,7 @@ fn simd_pair_for_each_lane<'tcx>( let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx); let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); } @@ -215,7 +217,7 @@ fn bool_to_zero_or_max_uint<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, layout: TyAndLayout<'tcx>, val: Value, -) -> CValue<'tcx> { +) -> Value { let ty = fx.clif_type(layout.ty).unwrap(); let int_ty = match ty { @@ -231,7 +233,7 @@ fn bool_to_zero_or_max_uint<'tcx>( res = fx.bcx.ins().bitcast(ty, res); } - CValue::by_val(res, layout) + res } pub(crate) fn codegen_intrinsic_call<'tcx>( diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index f38a30011d39..0ab48aaea1ad 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -33,9 +33,7 @@ macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) let ty = fx.clif_type(res_lane_layout.ty).unwrap(); let res_lane = fx.bcx.ins().bint(ty, res_lane); - let res_lane = fx.bcx.ins().ineg(res_lane); - - CValue::by_val(res_lane, res_lane_layout) + fx.bcx.ins().ineg(res_lane) }, ); } @@ -47,13 +45,12 @@ macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $r $x, $y, $ret, - |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { + |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { + match lane_layout.ty.kind() { ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) + } }, ); } @@ -65,14 +62,13 @@ macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $x, $y, $ret, - |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { + |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { + match lane_layout.ty.kind() { ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane), _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) + } }, ); } @@ -84,12 +80,11 @@ macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) { $x, $y, $ret, - |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { + |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { + match lane_layout.ty.kind() { ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane), _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) + } }, ); } @@ -116,8 +111,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( let from_signed = type_sign(lane_layout.ty); let to_signed = type_sign(ret_lane_layout.ty); - let ret_lane = clif_int_or_float_cast(fx, lane, from_signed, ret_lane_ty, to_signed); - CValue::by_val(ret_lane, ret_lane_layout) + clif_int_or_float_cast(fx, lane, from_signed, ret_lane_ty, to_signed) }); }; @@ -283,29 +277,26 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_neg, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| { - let ret_lane = match lane_layout.ty.kind() { + simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| { + match lane_layout.ty.kind() { ty::Int(_) => fx.bcx.ins().ineg(lane), ty::Float(_) => fx.bcx.ins().fneg(lane), _ => unreachable!(), - }; - CValue::by_val(ret_lane, ret_lane_layout) + } }); }; simd_fabs, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { - let ret_lane = fx.bcx.ins().fabs(lane); - CValue::by_val(ret_lane, ret_lane_layout) + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + fx.bcx.ins().fabs(lane) }); }; simd_fsqrt, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { - let ret_lane = fx.bcx.ins().sqrt(lane); - CValue::by_val(ret_lane, ret_lane_layout) + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + fx.bcx.ins().sqrt(lane) }); }; @@ -327,8 +318,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_rem, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { + simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { + match lane_layout.ty.kind() { ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane), ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane), ty::Float(FloatTy::F32) => fx.lib_call( @@ -344,8 +335,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( &[x_lane, y_lane], )[0], _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) + } }); }; simd_shl, (c x, c y) { @@ -403,8 +393,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_round, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| { - let res_lane = match lane_layout.ty.kind() { + simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| { + match lane_layout.ty.kind() { ty::Float(FloatTy::F32) => fx.lib_call( "roundf", vec![AbiParam::new(types::F32)], @@ -418,29 +408,25 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( &[lane], )[0], _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) + } }); }; simd_ceil, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { - let ret_lane = fx.bcx.ins().ceil(lane); - CValue::by_val(ret_lane, ret_lane_layout) + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + fx.bcx.ins().ceil(lane) }); }; simd_floor, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { - let ret_lane = fx.bcx.ins().floor(lane); - CValue::by_val(ret_lane, ret_lane_layout) + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + fx.bcx.ins().floor(lane) }); }; simd_trunc, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { - let ret_lane = fx.bcx.ins().trunc(lane); - CValue::by_val(ret_lane, ret_lane_layout) + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + fx.bcx.ins().trunc(lane) }); }; From 2633024850e9b7fa8aa9a856953312bccc3740bc Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 18:55:57 +0100 Subject: [PATCH 17/18] Don't monomorphize the simd helpers for each closure This halves the total amount of llvm ir lines for simd related functions from 18227 to 9604. --- src/intrinsics/llvm.rs | 6 +++--- src/intrinsics/mod.rs | 8 ++++---- src/intrinsics/simd.rs | 44 +++++++++++++++++++++--------------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/intrinsics/llvm.rs b/src/intrinsics/llvm.rs index 13c7cf677edd..8bcfbc945fbf 100644 --- a/src/intrinsics/llvm.rs +++ b/src/intrinsics/llvm.rs @@ -73,7 +73,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( kind => unreachable!("kind {:?}", kind), }; - simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| { + simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, res_lane_layout, x_lane, y_lane| { let res_lane = match lane_layout.ty.kind() { ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane), _ => unreachable!("{:?}", lane_layout.ty), @@ -83,7 +83,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( }; "llvm.x86.sse2.psrli.d", (c a, o imm8) { let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const"); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| { match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) { imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), _ => fx.bcx.ins().iconst(types::I32, 0), @@ -92,7 +92,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( }; "llvm.x86.sse2.pslli.d", (c a, o imm8) { let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const"); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _res_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| { match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) { imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), _ => fx.bcx.ins().iconst(types::I32, 0), diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index dee192a69af3..473afd168279 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -108,7 +108,7 @@ fn simd_for_each_lane<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, val: CValue<'tcx>, ret: CPlace<'tcx>, - f: impl Fn( + f: &dyn Fn( &mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, TyAndLayout<'tcx>, @@ -138,7 +138,7 @@ fn simd_pair_for_each_lane<'tcx>( x: CValue<'tcx>, y: CValue<'tcx>, ret: CPlace<'tcx>, - f: impl Fn( + f: &dyn Fn( &mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, TyAndLayout<'tcx>, @@ -171,7 +171,7 @@ fn simd_reduce<'tcx>( val: CValue<'tcx>, acc: Option, ret: CPlace<'tcx>, - f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value, ) { let (lane_count, lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); let lane_layout = fx.layout_of(lane_ty); @@ -192,7 +192,7 @@ fn simd_reduce_bool<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, val: CValue<'tcx>, ret: CPlace<'tcx>, - f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, Value, Value) -> Value, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Value, Value) -> Value, ) { let (lane_count, _lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); assert!(ret.layout().ty.is_bool()); diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index 0ab48aaea1ad..dc04c7643b2a 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -22,7 +22,7 @@ macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) $x, $y, $ret, - |fx, lane_layout, res_lane_layout, x_lane, y_lane| { + &|fx, lane_layout, res_lane_layout, x_lane, y_lane| { let res_lane = match lane_layout.ty.kind() { ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane), ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane), @@ -45,7 +45,7 @@ macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $r $x, $y, $ret, - |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { + &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { match lane_layout.ty.kind() { ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), @@ -62,7 +62,7 @@ macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $x, $y, $ret, - |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { + &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { match lane_layout.ty.kind() { ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), @@ -80,7 +80,7 @@ macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) { $x, $y, $ret, - |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { + &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { match lane_layout.ty.kind() { ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane), _ => unreachable!("{:?}", lane_layout.ty), @@ -105,7 +105,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_cast, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, lane_layout, ret_lane_layout, lane| { let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap(); let from_signed = type_sign(lane_layout.ty); @@ -277,7 +277,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_neg, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| { match lane_layout.ty.kind() { ty::Int(_) => fx.bcx.ins().ineg(lane), ty::Float(_) => fx.bcx.ins().fneg(lane), @@ -288,14 +288,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_fabs, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { fx.bcx.ins().fabs(lane) }); }; simd_fsqrt, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { fx.bcx.ins().sqrt(lane) }); }; @@ -318,7 +318,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_rem, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { + simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { match lane_layout.ty.kind() { ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane), ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane), @@ -393,7 +393,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_round, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| { match lane_layout.ty.kind() { ty::Float(FloatTy::F32) => fx.lib_call( "roundf", @@ -413,26 +413,26 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_ceil, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { fx.bcx.ins().ceil(lane) }); }; simd_floor, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { fx.bcx.ins().floor(lane) }); }; simd_trunc, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, |fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { fx.bcx.ins().trunc(lane) }); }; simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| { + simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| { if lane_layout.ty.is_floating_point() { fx.bcx.ins().fadd(a, b) } else { @@ -443,7 +443,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| { + simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| { if lane_layout.ty.is_floating_point() { fx.bcx.ins().fmul(a, b) } else { @@ -454,32 +454,32 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_reduce_all, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b)); + simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().band(a, b)); }; simd_reduce_any, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b)); + simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().bor(a, b)); }; simd_reduce_and, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b)); + simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().band(a, b)); }; simd_reduce_or, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b)); + simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bor(a, b)); }; simd_reduce_xor, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b)); + simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bxor(a, b)); }; simd_reduce_min, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, |fx, layout, a, b| { + simd_reduce(fx, v, None, ret, &|fx, layout, a, b| { let lt = match layout.ty.kind() { ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b), ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b), @@ -492,7 +492,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_reduce_max, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, |fx, layout, a, b| { + simd_reduce(fx, v, None, ret, &|fx, layout, a, b| { let gt = match layout.ty.kind() { ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b), ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b), From b7cda373d585d024b120401b2b796181567e5ae9 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 9 Jan 2022 19:07:15 +0100 Subject: [PATCH 18/18] Pass Ty instead of TyAndLayout to the closure of various simd helpers This reduces the total amount of llvm ir lines for simd related functions from 9604 to 9467. --- src/intrinsics/llvm.rs | 12 ++-- src/intrinsics/mod.rs | 27 +++----- src/intrinsics/simd.rs | 146 +++++++++++++++++------------------------ 3 files changed, 75 insertions(+), 110 deletions(-) diff --git a/src/intrinsics/llvm.rs b/src/intrinsics/llvm.rs index 8bcfbc945fbf..20f8699d12ab 100644 --- a/src/intrinsics/llvm.rs +++ b/src/intrinsics/llvm.rs @@ -73,17 +73,17 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( kind => unreachable!("kind {:?}", kind), }; - simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, res_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { + simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| { + let res_lane = match lane_ty.kind() { ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), + _ => unreachable!("{:?}", lane_ty), }; - bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) + bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane) }); }; "llvm.x86.sse2.psrli.d", (c a, o imm8) { let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const"); - simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| { match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) { imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), _ => fx.bcx.ins().iconst(types::I32, 0), @@ -92,7 +92,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( }; "llvm.x86.sse2.pslli.d", (c a, o imm8) { let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const"); - simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _res_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| { match imm8.try_to_bits(Size::from_bytes(4)).unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) { imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), _ => fx.bcx.ins().iconst(types::I32, 0), diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 473afd168279..1e384668fc72 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -108,12 +108,7 @@ fn simd_for_each_lane<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, val: CValue<'tcx>, ret: CPlace<'tcx>, - f: &dyn Fn( - &mut FunctionCx<'_, '_, 'tcx>, - TyAndLayout<'tcx>, - TyAndLayout<'tcx>, - Value, - ) -> Value, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value) -> Value, ) { let layout = val.layout(); @@ -126,7 +121,7 @@ fn simd_for_each_lane<'tcx>( for lane_idx in 0..lane_count { let lane = val.value_lane(fx, lane_idx).load_scalar(fx); - let res_lane = f(fx, lane_layout, ret_lane_layout, lane); + let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, lane); let res_lane = CValue::by_val(res_lane, ret_lane_layout); ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); @@ -138,13 +133,7 @@ fn simd_pair_for_each_lane<'tcx>( x: CValue<'tcx>, y: CValue<'tcx>, ret: CPlace<'tcx>, - f: &dyn Fn( - &mut FunctionCx<'_, '_, 'tcx>, - TyAndLayout<'tcx>, - TyAndLayout<'tcx>, - Value, - Value, - ) -> Value, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value) -> Value, ) { assert_eq!(x.layout(), y.layout()); let layout = x.layout(); @@ -159,7 +148,7 @@ fn simd_pair_for_each_lane<'tcx>( let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx); let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx); - let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane); + let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, x_lane, y_lane); let res_lane = CValue::by_val(res_lane, ret_lane_layout); ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); @@ -171,7 +160,7 @@ fn simd_reduce<'tcx>( val: CValue<'tcx>, acc: Option, ret: CPlace<'tcx>, - f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Value, Value) -> Value, ) { let (lane_count, lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); let lane_layout = fx.layout_of(lane_ty); @@ -181,7 +170,7 @@ fn simd_reduce<'tcx>( if let Some(acc) = acc { (acc, 0) } else { (val.value_lane(fx, 0).load_scalar(fx), 1) }; for lane_idx in start_lane..lane_count { let lane = val.value_lane(fx, lane_idx).load_scalar(fx); - res_val = f(fx, lane_layout, res_val, lane); + res_val = f(fx, lane_layout.ty, res_val, lane); } let res = CValue::by_val(res_val, lane_layout); ret.write_cvalue(fx, res); @@ -215,10 +204,10 @@ fn simd_reduce_bool<'tcx>( fn bool_to_zero_or_max_uint<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, - layout: TyAndLayout<'tcx>, + ty: Ty<'tcx>, val: Value, ) -> Value { - let ty = fx.clif_type(layout.ty).unwrap(); + let ty = fx.clif_type(ty).unwrap(); let int_ty = match ty { types::F32 => types::I32, diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index dc04c7643b2a..106a190096db 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -17,76 +17,52 @@ fn validate_simd_type(fx: &mut FunctionCx<'_, '_, '_>, intrinsic: Symbol, span: macro simd_cmp($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) { // FIXME use vector instructions when possible - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - &|fx, lane_layout, res_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind() { - ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane), - ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane), - ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; + simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| { + let res_lane = match lane_ty.kind() { + ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane), + _ => unreachable!("{:?}", lane_ty), + }; - let ty = fx.clif_type(res_lane_layout.ty).unwrap(); + let ty = fx.clif_type(res_lane_ty).unwrap(); - let res_lane = fx.bcx.ins().bint(ty, res_lane); - fx.bcx.ins().ineg(res_lane) - }, - ); + let res_lane = fx.bcx.ins().bint(ty, res_lane); + fx.bcx.ins().ineg(res_lane) + }); } macro simd_int_binop($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) { // FIXME use vector instructions when possible - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { - match lane_layout.ty.kind() { - ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), - ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - } - }, - ); + simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| { + match lane_ty.kind() { + ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), + _ => unreachable!("{:?}", lane_ty), + } + }); } macro simd_int_flt_binop($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) { // FIXME use vector instructions when possible - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { - match lane_layout.ty.kind() { - ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), - ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), - ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - } - }, - ); + simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| { + match lane_ty.kind() { + ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane), + ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane), + _ => unreachable!("{:?}", lane_ty), + } + }); } macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) { // FIXME use vector instructions when possible - simd_pair_for_each_lane( - $fx, - $x, - $y, - $ret, - &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { - match lane_layout.ty.kind() { - ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - } - }, - ); + simd_pair_for_each_lane($fx, $x, $y, $ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| { + match lane_ty.kind() { + ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane), + _ => unreachable!("{:?}", lane_ty), + } + }); } pub(super) fn codegen_simd_intrinsic_call<'tcx>( @@ -105,13 +81,13 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_cast, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, &|fx, lane_layout, ret_lane_layout, lane| { - let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap(); + simd_for_each_lane(fx, a, ret, &|fx, lane_ty, ret_lane_ty, lane| { + let ret_lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap(); - let from_signed = type_sign(lane_layout.ty); - let to_signed = type_sign(ret_lane_layout.ty); + let from_signed = type_sign(lane_ty); + let to_signed = type_sign(ret_lane_ty); - clif_int_or_float_cast(fx, lane, from_signed, ret_lane_ty, to_signed) + clif_int_or_float_cast(fx, lane, from_signed, ret_lane_clif_ty, to_signed) }); }; @@ -277,8 +253,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_neg, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| { - match lane_layout.ty.kind() { + simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| { + match lane_ty.kind() { ty::Int(_) => fx.bcx.ins().ineg(lane), ty::Float(_) => fx.bcx.ins().fneg(lane), _ => unreachable!(), @@ -288,14 +264,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_fabs, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| { fx.bcx.ins().fabs(lane) }); }; simd_fsqrt, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| { fx.bcx.ins().sqrt(lane) }); }; @@ -318,8 +294,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; simd_rem, (c x, c y) { validate_simd_type(fx, intrinsic, span, x.layout().ty); - simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_layout, _ret_lane_layout, x_lane, y_lane| { - match lane_layout.ty.kind() { + simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| { + match lane_ty.kind() { ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane), ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane), ty::Float(FloatTy::F32) => fx.lib_call( @@ -334,7 +310,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( vec![AbiParam::new(types::F64)], &[x_lane, y_lane], )[0], - _ => unreachable!("{:?}", lane_layout.ty), + _ => unreachable!("{:?}", lane_ty), } }); }; @@ -393,8 +369,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_round, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, &|fx, lane_layout, _ret_lane_layout, lane| { - match lane_layout.ty.kind() { + simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| { + match lane_ty.kind() { ty::Float(FloatTy::F32) => fx.lib_call( "roundf", vec![AbiParam::new(types::F32)], @@ -407,33 +383,33 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( vec![AbiParam::new(types::F64)], &[lane], )[0], - _ => unreachable!("{:?}", lane_layout.ty), + _ => unreachable!("{:?}", lane_ty), } }); }; simd_ceil, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| { fx.bcx.ins().ceil(lane) }); }; simd_floor, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| { fx.bcx.ins().floor(lane) }); }; simd_trunc, (c a) { validate_simd_type(fx, intrinsic, span, a.layout().ty); - simd_for_each_lane(fx, a, ret, &|fx, _lane_layout, _ret_lane_layout, lane| { + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _ret_lane_ty, lane| { fx.bcx.ins().trunc(lane) }); }; simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| { - if lane_layout.ty.is_floating_point() { + simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| { + if lane_ty.is_floating_point() { fx.bcx.ins().fadd(a, b) } else { fx.bcx.ins().iadd(a, b) @@ -443,8 +419,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, Some(acc), ret, &|fx, lane_layout, a, b| { - if lane_layout.ty.is_floating_point() { + simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| { + if lane_ty.is_floating_point() { fx.bcx.ins().fmul(a, b) } else { fx.bcx.ins().imul(a, b) @@ -464,23 +440,23 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_reduce_and, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().band(a, b)); + simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().band(a, b)); }; simd_reduce_or, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bor(a, b)); + simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bor(a, b)); }; simd_reduce_xor, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, &|fx, _layout, a, b| fx.bcx.ins().bxor(a, b)); + simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b)); }; simd_reduce_min, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, &|fx, layout, a, b| { - let lt = match layout.ty.kind() { + simd_reduce(fx, v, None, ret, &|fx, ty, a, b| { + let lt = match ty.kind() { ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b), ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b), ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::LessThan, a, b), @@ -492,8 +468,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_reduce_max, (c v) { validate_simd_type(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, None, ret, &|fx, layout, a, b| { - let gt = match layout.ty.kind() { + simd_reduce(fx, v, None, ret, &|fx, ty, a, b| { + let gt = match ty.kind() { ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b), ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b), ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::GreaterThan, a, b),