Merge pull request #1277 from bjorn3/simd_improvements
Implement a couple of portable simd intrinsics
This commit is contained in:
commit
fa7660440b
5 changed files with 190 additions and 218 deletions
|
|
@ -1,4 +1,4 @@
|
|||
From 82f597cf81b169b0e72a576ac8751f598c059c48 Mon Sep 17 00:00:00 2001
|
||||
From b742f03694b920cc14400727d54424e8e1b60928 Mon Sep 17 00:00:00 2001
|
||||
From: bjorn3 <bjorn3@users.noreply.github.com>
|
||||
Date: Thu, 18 Nov 2021 19:28:40 +0100
|
||||
Subject: [PATCH] Disable unsupported tests
|
||||
|
|
@ -6,170 +6,11 @@ Subject: [PATCH] Disable unsupported tests
|
|||
---
|
||||
crates/core_simd/src/elements/int.rs | 8 ++++++++
|
||||
crates/core_simd/src/elements/uint.rs | 4 ++++
|
||||
crates/core_simd/src/masks/full_masks.rs | 9 +++++++++
|
||||
crates/core_simd/src/masks/full_masks.rs | 6 ++++++
|
||||
crates/core_simd/src/vector.rs | 2 ++
|
||||
crates/core_simd/tests/masks.rs | 2 ++
|
||||
5 files changed, 25 insertions(+)
|
||||
crates/core_simd/tests/masks.rs | 3 ---
|
||||
5 files changed, 20 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs
|
||||
index 9b8c37e..ea95f08 100644
|
||||
--- a/crates/core_simd/src/elements/int.rs
|
||||
+++ b/crates/core_simd/src/elements/int.rs
|
||||
@@ -11,6 +11,7 @@ pub trait SimdInt: Copy + Sealed {
|
||||
/// Scalar type contained by this SIMD vector type.
|
||||
type Scalar;
|
||||
|
||||
+ /*
|
||||
/// Lanewise saturating add.
|
||||
///
|
||||
/// # Examples
|
||||
@@ -45,6 +46,7 @@ pub trait SimdInt: Copy + Sealed {
|
||||
/// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0]));
|
||||
/// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
|
||||
fn saturating_sub(self, second: Self) -> Self;
|
||||
+ */
|
||||
|
||||
/// Lanewise absolute value, implemented in Rust.
|
||||
/// Every lane becomes its absolute value.
|
||||
@@ -61,6 +63,7 @@ pub trait SimdInt: Copy + Sealed {
|
||||
/// ```
|
||||
fn abs(self) -> Self;
|
||||
|
||||
+ /*
|
||||
/// Lanewise saturating absolute value, implemented in Rust.
|
||||
/// As abs(), except the MIN value becomes MAX instead of itself.
|
||||
///
|
||||
@@ -96,6 +99,7 @@ pub trait SimdInt: Copy + Sealed {
|
||||
/// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1]));
|
||||
/// ```
|
||||
fn saturating_neg(self) -> Self;
|
||||
+ */
|
||||
|
||||
/// Returns true for each positive lane and false if it is zero or negative.
|
||||
fn is_positive(self) -> Self::Mask;
|
||||
@@ -199,6 +203,7 @@ macro_rules! impl_trait {
|
||||
type Mask = Mask<<$ty as SimdElement>::Mask, LANES>;
|
||||
type Scalar = $ty;
|
||||
|
||||
+ /*
|
||||
#[inline]
|
||||
fn saturating_add(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
@@ -210,6 +215,7 @@ macro_rules! impl_trait {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { intrinsics::simd_saturating_sub(self, second) }
|
||||
}
|
||||
+ */
|
||||
|
||||
#[inline]
|
||||
fn abs(self) -> Self {
|
||||
@@ -218,6 +224,7 @@ macro_rules! impl_trait {
|
||||
(self^m) - m
|
||||
}
|
||||
|
||||
+ /*
|
||||
#[inline]
|
||||
fn saturating_abs(self) -> Self {
|
||||
// arith shift for -1 or 0 mask based on sign bit, giving 2s complement
|
||||
@@ -230,6 +237,7 @@ macro_rules! impl_trait {
|
||||
fn saturating_neg(self) -> Self {
|
||||
Self::splat(0).saturating_sub(self)
|
||||
}
|
||||
+ */
|
||||
|
||||
#[inline]
|
||||
fn is_positive(self) -> Self::Mask {
|
||||
diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs
|
||||
index 21e7e76..0d6dee2 100644
|
||||
--- a/crates/core_simd/src/elements/uint.rs
|
||||
+++ b/crates/core_simd/src/elements/uint.rs
|
||||
@@ -6,6 +6,7 @@ pub trait SimdUint: Copy + Sealed {
|
||||
/// Scalar type contained by this SIMD vector type.
|
||||
type Scalar;
|
||||
|
||||
+ /*
|
||||
/// Lanewise saturating add.
|
||||
///
|
||||
/// # Examples
|
||||
@@ -40,6 +41,7 @@ pub trait SimdUint: Copy + Sealed {
|
||||
/// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0]));
|
||||
/// assert_eq!(sat, Simd::splat(0));
|
||||
fn saturating_sub(self, second: Self) -> Self;
|
||||
+ */
|
||||
|
||||
/// Returns the sum of the lanes of the vector, with wrapping addition.
|
||||
fn reduce_sum(self) -> Self::Scalar;
|
||||
@@ -78,6 +80,7 @@ macro_rules! impl_trait {
|
||||
{
|
||||
type Scalar = $ty;
|
||||
|
||||
+ /*
|
||||
#[inline]
|
||||
fn saturating_add(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
@@ -89,6 +92,7 @@ macro_rules! impl_trait {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { intrinsics::simd_saturating_sub(self, second) }
|
||||
}
|
||||
+ */
|
||||
|
||||
#[inline]
|
||||
fn reduce_sum(self) -> Self::Scalar {
|
||||
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
|
||||
index adf0fcb..5b10292 100644
|
||||
--- a/crates/core_simd/src/masks/full_masks.rs
|
||||
+++ b/crates/core_simd/src/masks/full_masks.rs
|
||||
@@ -150,6 +150,7 @@ where
|
||||
super::Mask<T, LANES>: ToBitMaskArray,
|
||||
[(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized,
|
||||
{
|
||||
+ /*
|
||||
assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
|
||||
|
||||
// Safety: N is the correct bitmask size
|
||||
@@ -170,6 +171,8 @@ where
|
||||
|
||||
bitmask
|
||||
}
|
||||
+ */
|
||||
+ panic!();
|
||||
}
|
||||
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
@@ -209,6 +212,7 @@ where
|
||||
where
|
||||
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
|
||||
{
|
||||
+ /*
|
||||
// Safety: U is required to be the appropriate bitmask type
|
||||
let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
|
||||
|
||||
@@ -218,6 +222,8 @@ where
|
||||
} else {
|
||||
bitmask
|
||||
}
|
||||
+ */
|
||||
+ panic!();
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -225,6 +231,7 @@ where
|
||||
where
|
||||
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
|
||||
{
|
||||
+ /*
|
||||
// LLVM assumes bit order should match endianness
|
||||
let bitmask = if cfg!(target_endian = "big") {
|
||||
bitmask.reverse_bits(LANES)
|
||||
@@ -240,6 +247,8 @@ where
|
||||
Self::splat(false).to_int(),
|
||||
))
|
||||
}
|
||||
+ */
|
||||
+ panic!();
|
||||
}
|
||||
|
||||
#[inline]
|
||||
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
|
||||
index e8e8f68..7173c24 100644
|
||||
--- a/crates/core_simd/src/vector.rs
|
||||
|
|
@ -190,25 +31,5 @@ index e8e8f68..7173c24 100644
|
|||
}
|
||||
|
||||
impl<T, const LANES: usize> Copy for Simd<T, LANES>
|
||||
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
|
||||
index 673d0db..0d68b01 100644
|
||||
--- a/crates/core_simd/tests/masks.rs
|
||||
+++ b/crates/core_simd/tests/masks.rs
|
||||
@@ -59,6 +59,7 @@ macro_rules! test_mask_api {
|
||||
assert!(!v.all());
|
||||
}
|
||||
|
||||
+ /*
|
||||
#[test]
|
||||
fn roundtrip_int_conversion() {
|
||||
let values = [true, false, false, true, false, false, true, false];
|
||||
@@ -99,6 +100,7 @@ macro_rules! test_mask_api {
|
||||
assert_eq!(bitmask, 0b01);
|
||||
assert_eq!(core_simd::Mask::<$type, 2>::from_bitmask(bitmask), mask);
|
||||
}
|
||||
+ */
|
||||
|
||||
#[test]
|
||||
fn cast() {
|
||||
--
|
||||
2.25.1
|
||||
|
|
|
|||
|
|
@ -29,8 +29,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
|
|||
let mut res = fx.bcx.ins().iconst(types::I32, 0);
|
||||
|
||||
for lane in (0..lane_count).rev() {
|
||||
let a_lane =
|
||||
a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx);
|
||||
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
|
||||
|
||||
// cast float to int
|
||||
let a_lane = match lane_ty {
|
||||
|
|
|
|||
|
|
@ -84,6 +84,30 @@ fn simd_for_each_lane<'tcx>(
|
|||
}
|
||||
}
|
||||
|
||||
fn simd_pair_for_each_lane_typed<'tcx>(
|
||||
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||
x: CValue<'tcx>,
|
||||
y: CValue<'tcx>,
|
||||
ret: CPlace<'tcx>,
|
||||
f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, CValue<'tcx>, CValue<'tcx>) -> CValue<'tcx>,
|
||||
) {
|
||||
assert_eq!(x.layout(), y.layout());
|
||||
let layout = x.layout();
|
||||
|
||||
let (lane_count, _lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, _ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_count, ret_lane_count);
|
||||
|
||||
for lane_idx in 0..lane_count {
|
||||
let x_lane = x.value_lane(fx, lane_idx);
|
||||
let y_lane = y.value_lane(fx, lane_idx);
|
||||
|
||||
let res_lane = f(fx, x_lane, y_lane);
|
||||
|
||||
ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
|
||||
fn simd_pair_for_each_lane<'tcx>(
|
||||
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||
x: CValue<'tcx>,
|
||||
|
|
@ -507,37 +531,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
|
|||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let signed = type_sign(lhs.layout().ty);
|
||||
|
||||
let checked_res = crate::num::codegen_checked_int_binop(fx, bin_op, lhs, rhs);
|
||||
|
||||
let (val, has_overflow) = checked_res.load_scalar_pair(fx);
|
||||
let clif_ty = fx.clif_type(lhs.layout().ty).unwrap();
|
||||
|
||||
let (min, max) = type_min_max_value(&mut fx.bcx, clif_ty, signed);
|
||||
|
||||
let val = match (intrinsic, signed) {
|
||||
(sym::saturating_add, false) => fx.bcx.ins().select(has_overflow, max, val),
|
||||
(sym::saturating_sub, false) => fx.bcx.ins().select(has_overflow, min, val),
|
||||
(sym::saturating_add, true) => {
|
||||
let rhs = rhs.load_scalar(fx);
|
||||
let rhs_ge_zero =
|
||||
fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0);
|
||||
let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
|
||||
fx.bcx.ins().select(has_overflow, sat_val, val)
|
||||
}
|
||||
(sym::saturating_sub, true) => {
|
||||
let rhs = rhs.load_scalar(fx);
|
||||
let rhs_ge_zero =
|
||||
fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0);
|
||||
let sat_val = fx.bcx.ins().select(rhs_ge_zero, min, max);
|
||||
fx.bcx.ins().select(has_overflow, sat_val, val)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let res = CValue::by_val(val, lhs.layout());
|
||||
|
||||
let res = crate::num::codegen_saturating_int_binop(fx, bin_op, lhs, rhs);
|
||||
ret.write_cvalue(fx, res);
|
||||
}
|
||||
sym::rotate_left => {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
use rustc_middle::ty::subst::SubstsRef;
|
||||
use rustc_span::Symbol;
|
||||
use rustc_target::abi::Endian;
|
||||
|
||||
use super::*;
|
||||
use crate::prelude::*;
|
||||
|
|
@ -162,6 +163,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
}
|
||||
}
|
||||
} else {
|
||||
// FIXME remove this case
|
||||
intrinsic.as_str()["simd_shuffle".len()..].parse().unwrap()
|
||||
};
|
||||
|
||||
|
|
@ -650,8 +652,128 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
}
|
||||
}
|
||||
|
||||
// simd_saturating_*
|
||||
// simd_bitmask
|
||||
sym::simd_select_bitmask => {
|
||||
intrinsic_args!(fx, args => (m, a, b); intrinsic);
|
||||
|
||||
if !a.layout().ty.is_simd() {
|
||||
report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
|
||||
return;
|
||||
}
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
|
||||
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
|
||||
let lane_layout = fx.layout_of(lane_ty);
|
||||
|
||||
let m = m.load_scalar(fx);
|
||||
|
||||
for lane in 0..lane_count {
|
||||
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64);
|
||||
let m_lane = fx.bcx.ins().band_imm(m_lane, 1);
|
||||
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, lane).load_scalar(fx);
|
||||
|
||||
let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
|
||||
let res_lane =
|
||||
CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
|
||||
|
||||
ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
|
||||
sym::simd_bitmask => {
|
||||
intrinsic_args!(fx, args => (a); intrinsic);
|
||||
|
||||
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
|
||||
let lane_clif_ty = fx.clif_type(lane_ty).unwrap();
|
||||
|
||||
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
|
||||
// vector mask and returns the most significant bit (MSB) of each lane in the form
|
||||
// of either:
|
||||
// * an unsigned integer
|
||||
// * an array of `u8`
|
||||
// If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
|
||||
//
|
||||
// The bit order of the result depends on the byte endianness, LSB-first for little
|
||||
// endian and MSB-first for big endian.
|
||||
let expected_int_bits = lane_count.max(8);
|
||||
let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
|
||||
|
||||
match lane_ty.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {}
|
||||
_ => {
|
||||
fx.tcx.sess.span_fatal(
|
||||
span,
|
||||
&format!(
|
||||
"invalid monomorphization of `simd_bitmask` intrinsic: \
|
||||
vector argument `{}`'s element type `{}`, expected integer element \
|
||||
type",
|
||||
a.layout().ty,
|
||||
lane_ty
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let res_type =
|
||||
Type::int_with_byte_size(u16::try_from(expected_bytes).unwrap()).unwrap();
|
||||
let mut res = fx.bcx.ins().iconst(res_type, 0);
|
||||
|
||||
let lanes = match fx.tcx.sess.target.endian {
|
||||
Endian::Big => Box::new(0..lane_count) as Box<dyn Iterator<Item = u64>>,
|
||||
Endian::Little => Box::new((0..lane_count).rev()) as Box<dyn Iterator<Item = u64>>,
|
||||
};
|
||||
for lane in lanes {
|
||||
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
|
||||
|
||||
// extract sign bit of an int
|
||||
let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_clif_ty.bits() - 1));
|
||||
|
||||
// shift sign bit into result
|
||||
let a_lane_sign = clif_intcast(fx, a_lane_sign, res_type, false);
|
||||
res = fx.bcx.ins().ishl_imm(res, 1);
|
||||
res = fx.bcx.ins().bor(res, a_lane_sign);
|
||||
}
|
||||
|
||||
match ret.layout().ty.kind() {
|
||||
ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {}
|
||||
ty::Array(elem, len)
|
||||
if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
|
||||
&& len.try_eval_usize(fx.tcx, ty::ParamEnv::reveal_all())
|
||||
== Some(expected_bytes) => {}
|
||||
_ => {
|
||||
fx.tcx.sess.span_fatal(
|
||||
span,
|
||||
&format!(
|
||||
"invalid monomorphization of `simd_bitmask` intrinsic: \
|
||||
cannot return `{}`, expected `u{}` or `[u8; {}]`",
|
||||
ret.layout().ty,
|
||||
expected_int_bits,
|
||||
expected_bytes
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let res = CValue::by_val(res, ret.layout());
|
||||
ret.write_cvalue(fx, res);
|
||||
}
|
||||
|
||||
sym::simd_saturating_add | sym::simd_saturating_sub => {
|
||||
intrinsic_args!(fx, args => (x, y); intrinsic);
|
||||
|
||||
let bin_op = match intrinsic {
|
||||
sym::simd_saturating_add => BinOp::Add,
|
||||
sym::simd_saturating_sub => BinOp::Sub,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// FIXME use vector instructions when possible
|
||||
simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
|
||||
crate::num::codegen_saturating_int_binop(fx, bin_op, x_lane, y_lane)
|
||||
});
|
||||
}
|
||||
|
||||
// simd_arith_offset
|
||||
// simd_scatter
|
||||
// simd_gather
|
||||
_ => {
|
||||
|
|
|
|||
36
src/num.rs
36
src/num.rs
|
|
@ -309,6 +309,42 @@ pub(crate) fn codegen_checked_int_binop<'tcx>(
|
|||
CValue::by_val_pair(res, has_overflow, out_layout)
|
||||
}
|
||||
|
||||
pub(crate) fn codegen_saturating_int_binop<'tcx>(
|
||||
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||
bin_op: BinOp,
|
||||
lhs: CValue<'tcx>,
|
||||
rhs: CValue<'tcx>,
|
||||
) -> CValue<'tcx> {
|
||||
assert_eq!(lhs.layout().ty, rhs.layout().ty);
|
||||
|
||||
let signed = type_sign(lhs.layout().ty);
|
||||
let clif_ty = fx.clif_type(lhs.layout().ty).unwrap();
|
||||
let (min, max) = type_min_max_value(&mut fx.bcx, clif_ty, signed);
|
||||
|
||||
let checked_res = crate::num::codegen_checked_int_binop(fx, bin_op, lhs, rhs);
|
||||
let (val, has_overflow) = checked_res.load_scalar_pair(fx);
|
||||
|
||||
let val = match (bin_op, signed) {
|
||||
(BinOp::Add, false) => fx.bcx.ins().select(has_overflow, max, val),
|
||||
(BinOp::Sub, false) => fx.bcx.ins().select(has_overflow, min, val),
|
||||
(BinOp::Add, true) => {
|
||||
let rhs = rhs.load_scalar(fx);
|
||||
let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0);
|
||||
let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
|
||||
fx.bcx.ins().select(has_overflow, sat_val, val)
|
||||
}
|
||||
(BinOp::Sub, true) => {
|
||||
let rhs = rhs.load_scalar(fx);
|
||||
let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0);
|
||||
let sat_val = fx.bcx.ins().select(rhs_ge_zero, min, max);
|
||||
fx.bcx.ins().select(has_overflow, sat_val, val)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
CValue::by_val(val, lhs.layout())
|
||||
}
|
||||
|
||||
pub(crate) fn codegen_float_binop<'tcx>(
|
||||
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||
bin_op: BinOp,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue