Auto merge of #151794 - JonathanBrouwer:rollup-rGbYGX2, r=JonathanBrouwer
Rollup of 12 pull requests Successful merges: - rust-lang/rust#150491 (resolve: Mark items under exported ambiguous imports as exported) - rust-lang/rust#150720 (Do not suggest `derive` if there is already an impl) - rust-lang/rust#150968 (compiler-builtins: Remove the no-f16-f128 feature) - rust-lang/rust#151493 ([RFC] rustc_parse: improve the error diagnostic for "missing let in let chain") - rust-lang/rust#151660 (Bump `std`'s `backtrace`'s `rustc-demangle`) - rust-lang/rust#151696 (Borrowck: Simplify SCC annotation computation, placeholder rewriting) - rust-lang/rust#151704 (Implement `set_output_kind` for Emscripten linker) - rust-lang/rust#151706 (Remove Fuchsia from target OS list in unix.rs for sleep) - rust-lang/rust#151769 (fix undefined behavior in VecDeque::splice) - rust-lang/rust#151779 (stdarch subtree update) - rust-lang/rust#151449 ([rustdoc] Add regression test for rust-lang/rust#151411) - rust-lang/rust#151773 (clean up checks for constant promotion of integer division/remainder operations)
This commit is contained in:
commit
ba284f468c
72 changed files with 7314 additions and 3873 deletions
|
|
@ -62,57 +62,23 @@ impl scc::Annotations<RegionVid> for SccAnnotations<'_, '_, RegionTracker> {
|
|||
}
|
||||
|
||||
#[derive(Copy, Debug, Clone, PartialEq, Eq)]
|
||||
enum PlaceholderReachability {
|
||||
/// This SCC reaches no placeholders.
|
||||
NoPlaceholders,
|
||||
/// This SCC reaches at least one placeholder.
|
||||
Placeholders {
|
||||
/// The largest-universed placeholder we can reach
|
||||
max_universe: (UniverseIndex, RegionVid),
|
||||
struct PlaceholderReachability {
|
||||
/// The largest-universed placeholder we can reach
|
||||
max_universe: (UniverseIndex, RegionVid),
|
||||
|
||||
/// The placeholder with the smallest ID
|
||||
min_placeholder: RegionVid,
|
||||
/// The placeholder with the smallest ID
|
||||
min_placeholder: RegionVid,
|
||||
|
||||
/// The placeholder with the largest ID
|
||||
max_placeholder: RegionVid,
|
||||
},
|
||||
/// The placeholder with the largest ID
|
||||
max_placeholder: RegionVid,
|
||||
}
|
||||
|
||||
impl PlaceholderReachability {
|
||||
/// Merge the reachable placeholders of two graph components.
|
||||
fn merge(self, other: PlaceholderReachability) -> PlaceholderReachability {
|
||||
use PlaceholderReachability::*;
|
||||
match (self, other) {
|
||||
(NoPlaceholders, NoPlaceholders) => NoPlaceholders,
|
||||
(NoPlaceholders, p @ Placeholders { .. })
|
||||
| (p @ Placeholders { .. }, NoPlaceholders) => p,
|
||||
(
|
||||
Placeholders {
|
||||
min_placeholder: min_pl,
|
||||
max_placeholder: max_pl,
|
||||
max_universe: max_u,
|
||||
},
|
||||
Placeholders { min_placeholder, max_placeholder, max_universe },
|
||||
) => Placeholders {
|
||||
min_placeholder: min_pl.min(min_placeholder),
|
||||
max_placeholder: max_pl.max(max_placeholder),
|
||||
max_universe: max_u.max(max_universe),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn max_universe(&self) -> Option<(UniverseIndex, RegionVid)> {
|
||||
match self {
|
||||
Self::NoPlaceholders => None,
|
||||
Self::Placeholders { max_universe, .. } => Some(*max_universe),
|
||||
}
|
||||
}
|
||||
|
||||
/// If we have reached placeholders, determine if they can
|
||||
/// be named from this universe.
|
||||
fn can_be_named_by(&self, from: UniverseIndex) -> bool {
|
||||
self.max_universe()
|
||||
.is_none_or(|(max_placeholder_universe, _)| from.can_name(max_placeholder_universe))
|
||||
fn merge(&mut self, other: &Self) {
|
||||
self.max_universe = self.max_universe.max(other.max_universe);
|
||||
self.min_placeholder = self.min_placeholder.min(other.min_placeholder);
|
||||
self.max_placeholder = self.max_placeholder.max(other.max_placeholder);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +86,7 @@ impl PlaceholderReachability {
|
|||
/// the values of its elements. This annotates a single SCC.
|
||||
#[derive(Copy, Debug, Clone)]
|
||||
pub(crate) struct RegionTracker {
|
||||
reachable_placeholders: PlaceholderReachability,
|
||||
reachable_placeholders: Option<PlaceholderReachability>,
|
||||
|
||||
/// The largest universe nameable from this SCC.
|
||||
/// It is the smallest nameable universes of all
|
||||
|
|
@ -135,13 +101,13 @@ impl RegionTracker {
|
|||
pub(crate) fn new(rvid: RegionVid, definition: &RegionDefinition<'_>) -> Self {
|
||||
let reachable_placeholders =
|
||||
if matches!(definition.origin, NllRegionVariableOrigin::Placeholder(_)) {
|
||||
PlaceholderReachability::Placeholders {
|
||||
Some(PlaceholderReachability {
|
||||
max_universe: (definition.universe, rvid),
|
||||
min_placeholder: rvid,
|
||||
max_placeholder: rvid,
|
||||
}
|
||||
})
|
||||
} else {
|
||||
PlaceholderReachability::NoPlaceholders
|
||||
None
|
||||
};
|
||||
|
||||
Self {
|
||||
|
|
@ -159,43 +125,46 @@ impl RegionTracker {
|
|||
}
|
||||
|
||||
pub(crate) fn max_placeholder_universe_reached(self) -> UniverseIndex {
|
||||
if let Some((universe, _)) = self.reachable_placeholders.max_universe() {
|
||||
universe
|
||||
} else {
|
||||
UniverseIndex::ROOT
|
||||
}
|
||||
self.reachable_placeholders.map(|pls| pls.max_universe.0).unwrap_or(UniverseIndex::ROOT)
|
||||
}
|
||||
|
||||
/// Can all reachable placeholders be named from `from`?
|
||||
/// True vacuously in case no placeholders were reached.
|
||||
fn placeholders_can_be_named_by(&self, from: UniverseIndex) -> bool {
|
||||
self.reachable_placeholders.is_none_or(|pls| from.can_name(pls.max_universe.0))
|
||||
}
|
||||
|
||||
/// Determine if we can name all the placeholders in `other`.
|
||||
pub(crate) fn can_name_all_placeholders(&self, other: Self) -> bool {
|
||||
other.reachable_placeholders.can_be_named_by(self.max_nameable_universe.0)
|
||||
// HACK: We first check whether we can name the highest existential universe
|
||||
// of `other`. This only exists to avoid errors in case that scc already
|
||||
// depends on a placeholder it cannot name itself.
|
||||
self.max_nameable_universe().can_name(other.max_nameable_universe())
|
||||
|| other.placeholders_can_be_named_by(self.max_nameable_universe.0)
|
||||
}
|
||||
|
||||
/// If this SCC reaches a placeholder it can't name, return it.
|
||||
fn unnameable_placeholder(&self) -> Option<(UniverseIndex, RegionVid)> {
|
||||
self.reachable_placeholders.max_universe().filter(|&(placeholder_universe, _)| {
|
||||
!self.max_nameable_universe().can_name(placeholder_universe)
|
||||
})
|
||||
self.reachable_placeholders
|
||||
.filter(|pls| !self.max_nameable_universe().can_name(pls.max_universe.0))
|
||||
.map(|pls| pls.max_universe)
|
||||
}
|
||||
}
|
||||
|
||||
impl scc::Annotation for RegionTracker {
|
||||
fn merge_scc(self, other: Self) -> Self {
|
||||
fn update_scc(&mut self, other: &Self) {
|
||||
trace!("{:?} << {:?}", self.representative, other.representative);
|
||||
|
||||
Self {
|
||||
representative: self.representative.min(other.representative),
|
||||
max_nameable_universe: self.max_nameable_universe.min(other.max_nameable_universe),
|
||||
reachable_placeholders: self.reachable_placeholders.merge(other.reachable_placeholders),
|
||||
}
|
||||
self.representative = self.representative.min(other.representative);
|
||||
self.update_reachable(other);
|
||||
}
|
||||
|
||||
fn merge_reached(self, other: Self) -> Self {
|
||||
Self {
|
||||
max_nameable_universe: self.max_nameable_universe.min(other.max_nameable_universe),
|
||||
reachable_placeholders: self.reachable_placeholders.merge(other.reachable_placeholders),
|
||||
representative: self.representative,
|
||||
}
|
||||
fn update_reachable(&mut self, other: &Self) {
|
||||
self.max_nameable_universe = self.max_nameable_universe.min(other.max_nameable_universe);
|
||||
match (self.reachable_placeholders.as_mut(), other.reachable_placeholders.as_ref()) {
|
||||
(None, None) | (Some(_), None) => (),
|
||||
(None, Some(theirs)) => self.reachable_placeholders = Some(*theirs),
|
||||
(Some(ours), Some(theirs)) => ours.merge(theirs),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1208,10 +1208,23 @@ impl<'a> Linker for EmLinker<'a> {
|
|||
|
||||
fn set_output_kind(
|
||||
&mut self,
|
||||
_output_kind: LinkOutputKind,
|
||||
output_kind: LinkOutputKind,
|
||||
_crate_type: CrateType,
|
||||
_out_filename: &Path,
|
||||
) {
|
||||
match output_kind {
|
||||
LinkOutputKind::DynamicNoPicExe | LinkOutputKind::DynamicPicExe => {
|
||||
self.cmd.arg("-sMAIN_MODULE=2");
|
||||
}
|
||||
LinkOutputKind::DynamicDylib | LinkOutputKind::StaticDylib => {
|
||||
self.cmd.arg("-sSIDE_MODULE=2");
|
||||
}
|
||||
// -fno-pie is the default on Emscripten.
|
||||
LinkOutputKind::StaticNoPicExe | LinkOutputKind::StaticPicExe => {}
|
||||
LinkOutputKind::WasiReactorExe => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn link_dylib_by_name(&mut self, name: &str, _verbatim: bool, _as_needed: bool) {
|
||||
|
|
|
|||
|
|
@ -27,26 +27,18 @@ mod tests;
|
|||
/// the max/min element of the SCC, or all of the above.
|
||||
///
|
||||
/// Concretely, the both merge operations must commute, e.g. where `merge`
|
||||
/// is `merge_scc` and `merge_reached`: `a.merge(b) == b.merge(a)`
|
||||
/// is `update_scc` and `update_reached`: `a.merge(b) == b.merge(a)`
|
||||
///
|
||||
/// In general, what you want is probably always min/max according
|
||||
/// to some ordering, potentially with side constraints (min x such
|
||||
/// that P holds).
|
||||
pub trait Annotation: Debug + Copy {
|
||||
/// Merge two existing annotations into one during
|
||||
/// path compression.o
|
||||
fn merge_scc(self, other: Self) -> Self;
|
||||
/// path compression.
|
||||
fn update_scc(&mut self, other: &Self);
|
||||
|
||||
/// Merge a successor into this annotation.
|
||||
fn merge_reached(self, other: Self) -> Self;
|
||||
|
||||
fn update_scc(&mut self, other: Self) {
|
||||
*self = self.merge_scc(other)
|
||||
}
|
||||
|
||||
fn update_reachable(&mut self, other: Self) {
|
||||
*self = self.merge_reached(other)
|
||||
}
|
||||
fn update_reachable(&mut self, other: &Self);
|
||||
}
|
||||
|
||||
/// An accumulator for annotations.
|
||||
|
|
@ -70,12 +62,8 @@ impl<N: Idx, S: Idx + Ord> Annotations<N> for NoAnnotations<S> {
|
|||
|
||||
/// The empty annotation, which does nothing.
|
||||
impl Annotation for () {
|
||||
fn merge_reached(self, _other: Self) -> Self {
|
||||
()
|
||||
}
|
||||
fn merge_scc(self, _other: Self) -> Self {
|
||||
()
|
||||
}
|
||||
fn update_reachable(&mut self, _other: &Self) {}
|
||||
fn update_scc(&mut self, _other: &Self) {}
|
||||
}
|
||||
|
||||
/// Strongly connected components (SCC) of a graph. The type `N` is
|
||||
|
|
@ -614,7 +602,7 @@ where
|
|||
*min_depth = successor_min_depth;
|
||||
*min_cycle_root = successor_node;
|
||||
}
|
||||
current_component_annotation.update_scc(successor_annotation);
|
||||
current_component_annotation.update_scc(&successor_annotation);
|
||||
}
|
||||
// The starting node `node` is succeeded by a fully identified SCC
|
||||
// which is now added to the set under `scc_index`.
|
||||
|
|
@ -629,7 +617,7 @@ where
|
|||
// the `successors_stack` for later.
|
||||
trace!(?node, ?successor_scc_index);
|
||||
successors_stack.push(successor_scc_index);
|
||||
current_component_annotation.update_reachable(successor_annotation);
|
||||
current_component_annotation.update_reachable(&successor_annotation);
|
||||
}
|
||||
// `node` has no more (direct) successors; search recursively.
|
||||
None => {
|
||||
|
|
|
|||
|
|
@ -32,12 +32,12 @@ impl Maxes {
|
|||
}
|
||||
|
||||
impl Annotation for MaxReached {
|
||||
fn merge_scc(self, other: Self) -> Self {
|
||||
Self(std::cmp::max(other.0, self.0))
|
||||
fn update_scc(&mut self, other: &Self) {
|
||||
self.0 = self.0.max(other.0);
|
||||
}
|
||||
|
||||
fn merge_reached(self, other: Self) -> Self {
|
||||
Self(std::cmp::max(other.0, self.0))
|
||||
fn update_reachable(&mut self, other: &Self) {
|
||||
self.0 = self.0.max(other.0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -75,13 +75,12 @@ impl Annotations<usize> for MinMaxes {
|
|||
}
|
||||
|
||||
impl Annotation for MinMaxIn {
|
||||
fn merge_scc(self, other: Self) -> Self {
|
||||
Self { min: std::cmp::min(self.min, other.min), max: std::cmp::max(self.max, other.max) }
|
||||
fn update_scc(&mut self, other: &Self) {
|
||||
self.min = self.min.min(other.min);
|
||||
self.max = self.max.max(other.max);
|
||||
}
|
||||
|
||||
fn merge_reached(self, _other: Self) -> Self {
|
||||
self
|
||||
}
|
||||
fn update_reachable(&mut self, _other: &Self) {}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -3281,6 +3281,63 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Checks if we can suggest a derive macro for the unmet trait bound.
|
||||
/// Returns Some(list_of_derives) if possible, or None if not.
|
||||
fn consider_suggesting_derives_for_ty(
|
||||
&self,
|
||||
trait_pred: ty::TraitPredicate<'tcx>,
|
||||
adt: ty::AdtDef<'tcx>,
|
||||
) -> Option<Vec<(String, Span, Symbol)>> {
|
||||
let diagnostic_name = self.tcx.get_diagnostic_name(trait_pred.def_id())?;
|
||||
|
||||
let can_derive = match diagnostic_name {
|
||||
sym::Default
|
||||
| sym::Eq
|
||||
| sym::PartialEq
|
||||
| sym::Ord
|
||||
| sym::PartialOrd
|
||||
| sym::Clone
|
||||
| sym::Copy
|
||||
| sym::Hash
|
||||
| sym::Debug => true,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
if !can_derive {
|
||||
return None;
|
||||
}
|
||||
|
||||
let trait_def_id = trait_pred.def_id();
|
||||
let self_ty = trait_pred.self_ty();
|
||||
|
||||
// We need to check if there is already a manual implementation of the trait
|
||||
// for this specific ADT to avoid suggesting `#[derive(..)]` that would conflict.
|
||||
if self.tcx.non_blanket_impls_for_ty(trait_def_id, self_ty).any(|impl_def_id| {
|
||||
self.tcx
|
||||
.type_of(impl_def_id)
|
||||
.instantiate_identity()
|
||||
.ty_adt_def()
|
||||
.is_some_and(|def| def.did() == adt.did())
|
||||
}) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut derives = Vec::new();
|
||||
let self_name = self_ty.to_string();
|
||||
let self_span = self.tcx.def_span(adt.did());
|
||||
|
||||
for super_trait in supertraits(self.tcx, ty::Binder::dummy(trait_pred.trait_ref)) {
|
||||
if let Some(parent_diagnostic_name) = self.tcx.get_diagnostic_name(super_trait.def_id())
|
||||
{
|
||||
derives.push((self_name.clone(), self_span, parent_diagnostic_name));
|
||||
}
|
||||
}
|
||||
|
||||
derives.push((self_name, self_span, diagnostic_name));
|
||||
|
||||
Some(derives)
|
||||
}
|
||||
|
||||
fn note_predicate_source_and_get_derives(
|
||||
&self,
|
||||
err: &mut Diag<'_>,
|
||||
|
|
@ -3298,35 +3355,8 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
|
|||
Some(adt) if adt.did().is_local() => adt,
|
||||
_ => continue,
|
||||
};
|
||||
if let Some(diagnostic_name) = self.tcx.get_diagnostic_name(trait_pred.def_id()) {
|
||||
let can_derive = match diagnostic_name {
|
||||
sym::Default
|
||||
| sym::Eq
|
||||
| sym::PartialEq
|
||||
| sym::Ord
|
||||
| sym::PartialOrd
|
||||
| sym::Clone
|
||||
| sym::Copy
|
||||
| sym::Hash
|
||||
| sym::Debug => true,
|
||||
_ => false,
|
||||
};
|
||||
if can_derive {
|
||||
let self_name = trait_pred.self_ty().to_string();
|
||||
let self_span = self.tcx.def_span(adt.did());
|
||||
for super_trait in
|
||||
supertraits(self.tcx, ty::Binder::dummy(trait_pred.trait_ref))
|
||||
{
|
||||
if let Some(parent_diagnostic_name) =
|
||||
self.tcx.get_diagnostic_name(super_trait.def_id())
|
||||
{
|
||||
derives.push((self_name.clone(), self_span, parent_diagnostic_name));
|
||||
}
|
||||
}
|
||||
derives.push((self_name, self_span, diagnostic_name));
|
||||
} else {
|
||||
traits.push(trait_pred.def_id());
|
||||
}
|
||||
if let Some(new_derives) = self.consider_suggesting_derives_for_ty(trait_pred, adt) {
|
||||
derives.extend(new_derives);
|
||||
} else {
|
||||
traits.push(trait_pred.def_id());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -485,47 +485,33 @@ impl<'tcx> Validator<'_, 'tcx> {
|
|||
if lhs_ty.is_integral() {
|
||||
let sz = lhs_ty.primitive_size(self.tcx);
|
||||
// Integer division: the RHS must be a non-zero const.
|
||||
let rhs_val = match rhs {
|
||||
Operand::Constant(c)
|
||||
if self.should_evaluate_for_promotion_checks(c.const_) =>
|
||||
{
|
||||
c.const_.try_eval_scalar_int(self.tcx, self.typing_env)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
match rhs_val.map(|x| x.to_uint(sz)) {
|
||||
let rhs_val = if let Operand::Constant(rhs_c) = rhs
|
||||
&& self.should_evaluate_for_promotion_checks(rhs_c.const_)
|
||||
&& let Some(rhs_val) =
|
||||
rhs_c.const_.try_eval_scalar_int(self.tcx, self.typing_env)
|
||||
// for the zero test, int vs uint does not matter
|
||||
Some(x) if x != 0 => {} // okay
|
||||
_ => return Err(Unpromotable), // value not known or 0 -- not okay
|
||||
}
|
||||
&& rhs_val.to_uint(sz) != 0
|
||||
{
|
||||
rhs_val
|
||||
} else {
|
||||
// value not known or 0 -- not okay
|
||||
return Err(Unpromotable);
|
||||
};
|
||||
// Furthermore, for signed division, we also have to exclude `int::MIN /
|
||||
// -1`.
|
||||
if lhs_ty.is_signed() {
|
||||
match rhs_val.map(|x| x.to_int(sz)) {
|
||||
Some(-1) | None => {
|
||||
// The RHS is -1 or unknown, so we have to be careful.
|
||||
// But is the LHS int::MIN?
|
||||
let lhs_val = match lhs {
|
||||
Operand::Constant(c)
|
||||
if self.should_evaluate_for_promotion_checks(
|
||||
c.const_,
|
||||
) =>
|
||||
{
|
||||
c.const_
|
||||
.try_eval_scalar_int(self.tcx, self.typing_env)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
let lhs_min = sz.signed_int_min();
|
||||
match lhs_val.map(|x| x.to_int(sz)) {
|
||||
// okay
|
||||
Some(x) if x != lhs_min => {}
|
||||
|
||||
// value not known or int::MIN -- not okay
|
||||
_ => return Err(Unpromotable),
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
if lhs_ty.is_signed() && rhs_val.to_int(sz) == -1 {
|
||||
// The RHS is -1, so we have to be careful. But is the LHS int::MIN?
|
||||
if let Operand::Constant(lhs_c) = lhs
|
||||
&& self.should_evaluate_for_promotion_checks(lhs_c.const_)
|
||||
&& let Some(lhs_val) =
|
||||
lhs_c.const_.try_eval_scalar_int(self.tcx, self.typing_env)
|
||||
&& let lhs_min = sz.signed_int_min()
|
||||
&& lhs_val.to_int(sz) != lhs_min
|
||||
{
|
||||
// okay
|
||||
} else {
|
||||
// value not known or int::MIN -- not okay
|
||||
return Err(Unpromotable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2760,9 +2760,13 @@ impl<'a> Parser<'a> {
|
|||
let (mut cond, _) =
|
||||
self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL | Restrictions::ALLOW_LET, attrs)?;
|
||||
|
||||
CondChecker::new(self, let_chains_policy).visit_expr(&mut cond);
|
||||
|
||||
Ok(cond)
|
||||
let mut checker = CondChecker::new(self, let_chains_policy);
|
||||
checker.visit_expr(&mut cond);
|
||||
Ok(if let Some(guar) = checker.found_incorrect_let_chain {
|
||||
self.mk_expr_err(cond.span, guar)
|
||||
} else {
|
||||
cond
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses a `let $pat = $expr` pseudo-expression.
|
||||
|
|
@ -3484,13 +3488,19 @@ impl<'a> Parser<'a> {
|
|||
let if_span = self.prev_token.span;
|
||||
let mut cond = self.parse_match_guard_condition()?;
|
||||
|
||||
CondChecker::new(self, LetChainsPolicy::AlwaysAllowed).visit_expr(&mut cond);
|
||||
let mut checker = CondChecker::new(self, LetChainsPolicy::AlwaysAllowed);
|
||||
checker.visit_expr(&mut cond);
|
||||
|
||||
if has_let_expr(&cond) {
|
||||
let span = if_span.to(cond.span);
|
||||
self.psess.gated_spans.gate(sym::if_let_guard, span);
|
||||
}
|
||||
Ok(Some(cond))
|
||||
|
||||
Ok(Some(if let Some(guar) = checker.found_incorrect_let_chain {
|
||||
self.mk_expr_err(cond.span, guar)
|
||||
} else {
|
||||
cond
|
||||
}))
|
||||
}
|
||||
|
||||
fn parse_match_arm_pat_and_guard(&mut self) -> PResult<'a, (Pat, Option<Box<Expr>>)> {
|
||||
|
|
@ -3511,13 +3521,23 @@ impl<'a> Parser<'a> {
|
|||
let ast::PatKind::Paren(subpat) = pat.kind else { unreachable!() };
|
||||
let ast::PatKind::Guard(_, mut cond) = subpat.kind else { unreachable!() };
|
||||
self.psess.gated_spans.ungate_last(sym::guard_patterns, cond.span);
|
||||
CondChecker::new(self, LetChainsPolicy::AlwaysAllowed).visit_expr(&mut cond);
|
||||
let mut checker = CondChecker::new(self, LetChainsPolicy::AlwaysAllowed);
|
||||
checker.visit_expr(&mut cond);
|
||||
|
||||
let right = self.prev_token.span;
|
||||
self.dcx().emit_err(errors::ParenthesesInMatchPat {
|
||||
span: vec![left, right],
|
||||
sugg: errors::ParenthesesInMatchPatSugg { left, right },
|
||||
});
|
||||
Ok((self.mk_pat(span, ast::PatKind::Wild), Some(cond)))
|
||||
|
||||
Ok((
|
||||
self.mk_pat(span, ast::PatKind::Wild),
|
||||
(if let Some(guar) = checker.found_incorrect_let_chain {
|
||||
Some(self.mk_expr_err(cond.span, guar))
|
||||
} else {
|
||||
Some(cond)
|
||||
}),
|
||||
))
|
||||
} else {
|
||||
Ok((pat, self.parse_match_arm_guard()?))
|
||||
}
|
||||
|
|
@ -4208,6 +4228,7 @@ struct CondChecker<'a> {
|
|||
forbid_let_reason: Option<ForbiddenLetReason>,
|
||||
missing_let: Option<errors::MaybeMissingLet>,
|
||||
comparison: Option<errors::MaybeComparison>,
|
||||
found_incorrect_let_chain: Option<ErrorGuaranteed>,
|
||||
}
|
||||
|
||||
impl<'a> CondChecker<'a> {
|
||||
|
|
@ -4218,6 +4239,7 @@ impl<'a> CondChecker<'a> {
|
|||
missing_let: None,
|
||||
comparison: None,
|
||||
let_chains_policy,
|
||||
found_incorrect_let_chain: None,
|
||||
depth: 0,
|
||||
}
|
||||
}
|
||||
|
|
@ -4236,12 +4258,19 @@ impl MutVisitor for CondChecker<'_> {
|
|||
NotSupportedOr(or_span) => {
|
||||
self.parser.dcx().emit_err(errors::OrInLetChain { span: or_span })
|
||||
}
|
||||
_ => self.parser.dcx().emit_err(errors::ExpectedExpressionFoundLet {
|
||||
span,
|
||||
reason,
|
||||
missing_let: self.missing_let,
|
||||
comparison: self.comparison,
|
||||
}),
|
||||
_ => {
|
||||
let guar =
|
||||
self.parser.dcx().emit_err(errors::ExpectedExpressionFoundLet {
|
||||
span,
|
||||
reason,
|
||||
missing_let: self.missing_let,
|
||||
comparison: self.comparison,
|
||||
});
|
||||
if let Some(_) = self.missing_let {
|
||||
self.found_incorrect_let_chain = Some(guar);
|
||||
}
|
||||
guar
|
||||
}
|
||||
};
|
||||
*recovered = Recovered::Yes(error);
|
||||
} else if self.depth > 1 {
|
||||
|
|
|
|||
|
|
@ -96,13 +96,10 @@ impl<'a, 'ra, 'tcx> EffectiveVisibilitiesVisitor<'a, 'ra, 'tcx> {
|
|||
// is the maximum value among visibilities of declarations corresponding to that def id.
|
||||
for (decl, eff_vis) in visitor.import_effective_visibilities.iter() {
|
||||
let DeclKind::Import { import, .. } = decl.kind else { unreachable!() };
|
||||
if !decl.is_ambiguity_recursive() {
|
||||
if let Some(node_id) = import.id() {
|
||||
r.effective_visibilities.update_eff_vis(r.local_def_id(node_id), eff_vis, r.tcx)
|
||||
}
|
||||
} else if decl.ambiguity.get().is_some()
|
||||
&& eff_vis.is_public_at_level(Level::Reexported)
|
||||
{
|
||||
if let Some(node_id) = import.id() {
|
||||
r.effective_visibilities.update_eff_vis(r.local_def_id(node_id), eff_vis, r.tcx)
|
||||
}
|
||||
if decl.ambiguity.get().is_some() && eff_vis.is_public_at_level(Level::Reexported) {
|
||||
exported_ambiguities.insert(*decl);
|
||||
}
|
||||
}
|
||||
|
|
@ -123,31 +120,13 @@ impl<'a, 'ra, 'tcx> EffectiveVisibilitiesVisitor<'a, 'ra, 'tcx> {
|
|||
// Set the given effective visibility level to `Level::Direct` and
|
||||
// sets the rest of the `use` chain to `Level::Reexported` until
|
||||
// we hit the actual exported item.
|
||||
//
|
||||
// If the binding is ambiguous, put the root ambiguity binding and all reexports
|
||||
// leading to it into the table. They are used by the `ambiguous_glob_reexports`
|
||||
// lint. For all bindings added to the table this way `is_ambiguity` returns true.
|
||||
let is_ambiguity =
|
||||
|decl: Decl<'ra>, warn: bool| decl.ambiguity.get().is_some() && !warn;
|
||||
let mut parent_id = ParentId::Def(module_id);
|
||||
let mut warn_ambiguity = decl.warn_ambiguity.get();
|
||||
while let DeclKind::Import { source_decl, .. } = decl.kind {
|
||||
self.update_import(decl, parent_id);
|
||||
|
||||
if is_ambiguity(decl, warn_ambiguity) {
|
||||
// Stop at the root ambiguity, further bindings in the chain should not
|
||||
// be reexported because the root ambiguity blocks any access to them.
|
||||
// (Those further bindings are most likely not ambiguities themselves.)
|
||||
break;
|
||||
}
|
||||
|
||||
parent_id = ParentId::Import(decl);
|
||||
decl = source_decl;
|
||||
warn_ambiguity |= source_decl.warn_ambiguity.get();
|
||||
}
|
||||
if !is_ambiguity(decl, warn_ambiguity)
|
||||
&& let Some(def_id) = decl.res().opt_def_id().and_then(|id| id.as_local())
|
||||
{
|
||||
if let Some(def_id) = decl.res().opt_def_id().and_then(|id| id.as_local()) {
|
||||
self.update_def(def_id, decl.vis().expect_local(), parent_id);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ pub(crate) fn target() -> Target {
|
|||
pre_link_args,
|
||||
post_link_args,
|
||||
relocation_model: RelocModel::Pic,
|
||||
crt_static_respected: true,
|
||||
crt_static_default: true,
|
||||
panic_strategy: PanicStrategy::Unwind,
|
||||
no_default_libraries: false,
|
||||
families: cvs!["unix", "wasm"],
|
||||
|
|
|
|||
|
|
@ -274,9 +274,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.26"
|
||||
version = "0.1.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
|
||||
checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
|
||||
dependencies = [
|
||||
"rustc-std-workspace-core",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ compiler_builtins = { path = "../compiler-builtins/compiler-builtins", features
|
|||
[features]
|
||||
compiler-builtins-mem = ['compiler_builtins/mem']
|
||||
compiler-builtins-c = ["compiler_builtins/c"]
|
||||
compiler-builtins-no-f16-f128 = ["compiler_builtins/no-f16-f128"]
|
||||
# Choose algorithms that are optimized for binary size instead of runtime performance
|
||||
optimize_for_size = ["core/optimize_for_size"]
|
||||
|
||||
|
|
|
|||
|
|
@ -143,7 +143,11 @@ impl<T, A: Allocator> Drain<'_, T, A> {
|
|||
|
||||
let new_tail_start = tail_start + additional;
|
||||
unsafe {
|
||||
deque.wrap_copy(tail_start, new_tail_start, self.tail_len);
|
||||
deque.wrap_copy(
|
||||
deque.to_physical_idx(tail_start),
|
||||
deque.to_physical_idx(new_tail_start),
|
||||
self.tail_len,
|
||||
);
|
||||
}
|
||||
self.drain_len += additional;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2336,3 +2336,14 @@ fn test_splice_forget() {
|
|||
std::mem::forget(v.splice(2..4, a));
|
||||
assert_eq!(v, &[1, 2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_splice_wrapping() {
|
||||
let mut vec = VecDeque::with_capacity(10);
|
||||
vec.push_front(7u8);
|
||||
vec.push_back(9);
|
||||
|
||||
vec.splice(1..1, [8]);
|
||||
|
||||
assert_eq!(Vec::from(vec), [7, 8, 9]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,10 +47,6 @@ c = ["dep:cc"]
|
|||
# the generic versions on all platforms.
|
||||
no-asm = []
|
||||
|
||||
# Workaround for codegen backends which haven't yet implemented `f16` and
|
||||
# `f128` support. Disabled any intrinsics which use those types.
|
||||
no-f16-f128 = []
|
||||
|
||||
# Flag this library as the unstable compiler-builtins lib
|
||||
compiler-builtins = []
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ utest-macros = { git = "https://github.com/japaric/utest" }
|
|||
default = ["mangled-names"]
|
||||
c = ["compiler_builtins/c"]
|
||||
no-asm = ["compiler_builtins/no-asm"]
|
||||
no-f16-f128 = ["compiler_builtins/no-f16-f128"]
|
||||
mem = ["compiler_builtins/mem"]
|
||||
mangled-names = ["compiler_builtins/mangled-names"]
|
||||
# Skip tests that rely on f128 symbols being available on the system
|
||||
|
|
|
|||
|
|
@ -36,8 +36,6 @@ else
|
|||
"${test_builtins[@]}" --features c --release
|
||||
"${test_builtins[@]}" --features no-asm
|
||||
"${test_builtins[@]}" --features no-asm --release
|
||||
"${test_builtins[@]}" --features no-f16-f128
|
||||
"${test_builtins[@]}" --features no-f16-f128 --release
|
||||
"${test_builtins[@]}" --benches
|
||||
"${test_builtins[@]}" --benches --release
|
||||
|
||||
|
|
@ -63,8 +61,6 @@ symcheck+=(-- build-and-check)
|
|||
"${symcheck[@]}" "$target" -- -p compiler_builtins --features c --release
|
||||
"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm
|
||||
"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm --release
|
||||
"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128
|
||||
"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128 --release
|
||||
|
||||
run_intrinsics_test() {
|
||||
build_args=(--verbose --manifest-path builtins-test-intrinsics/Cargo.toml)
|
||||
|
|
|
|||
|
|
@ -45,10 +45,6 @@ c = ["dep:cc"]
|
|||
# the generic versions on all platforms.
|
||||
no-asm = []
|
||||
|
||||
# Workaround for codegen backends which haven't yet implemented `f16` and
|
||||
# `f128` support. Disabled any intrinsics which use those types.
|
||||
no-f16-f128 = []
|
||||
|
||||
# Flag this library as the unstable compiler-builtins lib
|
||||
compiler-builtins = []
|
||||
|
||||
|
|
|
|||
|
|
@ -95,16 +95,13 @@ pub fn configure_aliases(target: &Target) {
|
|||
* * https://github.com/rust-lang/rustc_codegen_cranelift/blob/c713ffab3c6e28ab4b4dd4e392330f786ea657ad/src/lib.rs#L196-L226
|
||||
*/
|
||||
|
||||
// If the feature is set, disable both of these types.
|
||||
let no_f16_f128 = target.cargo_features.iter().any(|s| s == "no-f16-f128");
|
||||
|
||||
println!("cargo::rustc-check-cfg=cfg(f16_enabled)");
|
||||
if target.reliable_f16 && !no_f16_f128 {
|
||||
if target.reliable_f16 {
|
||||
println!("cargo::rustc-cfg=f16_enabled");
|
||||
}
|
||||
|
||||
println!("cargo::rustc-check-cfg=cfg(f128_enabled)");
|
||||
if target.reliable_f128 && !no_f16_f128 {
|
||||
if target.reliable_f128 {
|
||||
println!("cargo::rustc-cfg=f128_enabled");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -143,16 +143,13 @@ fn emit_f16_f128_cfg(cfg: &Config) {
|
|||
|
||||
/* See the compiler-builtins configure file for info about the meaning of these options */
|
||||
|
||||
// If the feature is set, disable both of these types.
|
||||
let no_f16_f128 = cfg.cargo_features.iter().any(|s| s == "no-f16-f128");
|
||||
|
||||
println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
|
||||
if cfg.reliable_f16 && !no_f16_f128 {
|
||||
if cfg.reliable_f16 {
|
||||
println!("cargo:rustc-cfg=f16_enabled");
|
||||
}
|
||||
|
||||
println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
|
||||
if cfg.reliable_f128 && !no_f16_f128 {
|
||||
if cfg.reliable_f128 {
|
||||
println!("cargo:rustc-cfg=f128_enabled");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ hashbrown = { version = "0.16.1", default-features = false, features = [
|
|||
std_detect = { path = "../std_detect", public = true }
|
||||
|
||||
# Dependencies of the `backtrace` crate
|
||||
rustc-demangle = { version = "0.1.24", features = ['rustc-dep-of-std'] }
|
||||
rustc-demangle = { version = "0.1.27", features = ['rustc-dep-of-std'] }
|
||||
|
||||
[target.'cfg(not(all(windows, target_env = "msvc", not(target_vendor = "uwp"))))'.dependencies]
|
||||
miniz_oxide = { version = "0.8.0", optional = true, default-features = false }
|
||||
|
|
@ -115,7 +115,6 @@ backtrace-trace-only = []
|
|||
panic-unwind = ["dep:panic_unwind"]
|
||||
compiler-builtins-c = ["alloc/compiler-builtins-c"]
|
||||
compiler-builtins-mem = ["alloc/compiler-builtins-mem"]
|
||||
compiler-builtins-no-f16-f128 = ["alloc/compiler-builtins-no-f16-f128"]
|
||||
llvm-libunwind = ["unwind/llvm-libunwind"]
|
||||
system-llvm-libunwind = ["unwind/system-llvm-libunwind"]
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,6 @@ cfg_select! {
|
|||
target_os = "illumos",
|
||||
target_os = "dragonfly",
|
||||
target_os = "hurd",
|
||||
target_os = "fuchsia",
|
||||
target_os = "vxworks",
|
||||
target_os = "wasi",
|
||||
target_vendor = "apple",
|
||||
|
|
@ -131,7 +130,6 @@ cfg_select! {
|
|||
target_os = "illumos",
|
||||
target_os = "dragonfly",
|
||||
target_os = "hurd",
|
||||
target_os = "fuchsia",
|
||||
target_os = "vxworks",
|
||||
target_os = "wasi",
|
||||
target_vendor = "apple",
|
||||
|
|
|
|||
|
|
@ -542,7 +542,6 @@ pub fn sleep(dur: Duration) {
|
|||
target_os = "illumos",
|
||||
target_os = "dragonfly",
|
||||
target_os = "hurd",
|
||||
target_os = "fuchsia",
|
||||
target_os = "vxworks",
|
||||
target_os = "wasi",
|
||||
) => {
|
||||
|
|
@ -640,7 +639,6 @@ pub fn sleep(dur: Duration) {
|
|||
target_os = "illumos",
|
||||
target_os = "dragonfly",
|
||||
target_os = "hurd",
|
||||
target_os = "fuchsia",
|
||||
target_os = "vxworks",
|
||||
target_os = "wasi",
|
||||
))]
|
||||
|
|
|
|||
|
|
@ -316,7 +316,6 @@ pub fn sleep(dur: Duration) {
|
|||
/// | Illumos | [clock_nanosleep] (Monotonic Clock)] |
|
||||
/// | Dragonfly | [clock_nanosleep] (Monotonic Clock)] |
|
||||
/// | Hurd | [clock_nanosleep] (Monotonic Clock)] |
|
||||
/// | Fuchsia | [clock_nanosleep] (Monotonic Clock)] |
|
||||
/// | Vxworks | [clock_nanosleep] (Monotonic Clock)] |
|
||||
/// | Apple | `mach_wait_until` |
|
||||
/// | Other | `sleep_until` uses [`sleep`] and does not issue a syscall itself |
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -3363,6 +3363,7 @@ unsafe extern "C" {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::assert_eq_const as assert_eq;
|
||||
use crate::core_arch::simd::*;
|
||||
use crate::hint::black_box;
|
||||
use crate::ptr;
|
||||
use stdarch_test::simd_test;
|
||||
|
|
@ -3458,7 +3459,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
unsafe fn test_mm256_max_pd() {
|
||||
fn test_mm256_max_pd() {
|
||||
let a = _mm256_setr_pd(1., 4., 5., 8.);
|
||||
let b = _mm256_setr_pd(2., 3., 6., 7.);
|
||||
let r = _mm256_max_pd(a, b);
|
||||
|
|
@ -3468,23 +3469,22 @@ mod tests {
|
|||
// > value in the second operand (source operand) is returned.
|
||||
let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
|
||||
let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
|
||||
let wu: [u64; 4] = transmute(w);
|
||||
let xu: [u64; 4] = transmute(x);
|
||||
assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
|
||||
assert_eq!(xu, [0u64; 4]);
|
||||
let wu = _mm256_castpd_si256(w).as_u64x4();
|
||||
let xu = _mm256_castpd_si256(x).as_u64x4();
|
||||
assert_eq!(wu, u64x4::splat(0x8000_0000_0000_0000u64));
|
||||
assert_eq!(xu, u64x4::splat(0u64));
|
||||
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
|
||||
// > second operand (source operand), either a NaN or a valid
|
||||
// > floating-point value, is written to the result.
|
||||
let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
|
||||
let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
|
||||
let yf: [f64; 4] = transmute(y);
|
||||
let zf: [f64; 4] = transmute(z);
|
||||
assert_eq!(yf, [0.0; 4]);
|
||||
assert_eq_m256d(y, _mm256_set1_pd(0.0));
|
||||
let zf = *z.as_f64x4().as_array();
|
||||
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
unsafe fn test_mm256_max_ps() {
|
||||
fn test_mm256_max_ps() {
|
||||
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
|
||||
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
|
||||
let r = _mm256_max_ps(a, b);
|
||||
|
|
@ -3494,23 +3494,22 @@ mod tests {
|
|||
// > value in the second operand (source operand) is returned.
|
||||
let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
|
||||
let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
|
||||
let wu: [u32; 8] = transmute(w);
|
||||
let xu: [u32; 8] = transmute(x);
|
||||
assert_eq!(wu, [0x8000_0000u32; 8]);
|
||||
assert_eq!(xu, [0u32; 8]);
|
||||
let wu = _mm256_castps_si256(w).as_u32x8();
|
||||
let xu = _mm256_castps_si256(x).as_u32x8();
|
||||
assert_eq!(wu, u32x8::splat(0x8000_0000u32));
|
||||
assert_eq!(xu, u32x8::splat(0u32));
|
||||
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
|
||||
// > second operand (source operand), either a NaN or a valid
|
||||
// > floating-point value, is written to the result.
|
||||
let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
|
||||
let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
|
||||
let yf: [f32; 8] = transmute(y);
|
||||
let zf: [f32; 8] = transmute(z);
|
||||
assert_eq!(yf, [0.0; 8]);
|
||||
assert_eq_m256(y, _mm256_set1_ps(0.0));
|
||||
let zf = *z.as_f32x8().as_array();
|
||||
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
unsafe fn test_mm256_min_pd() {
|
||||
fn test_mm256_min_pd() {
|
||||
let a = _mm256_setr_pd(1., 4., 5., 8.);
|
||||
let b = _mm256_setr_pd(2., 3., 6., 7.);
|
||||
let r = _mm256_min_pd(a, b);
|
||||
|
|
@ -3520,23 +3519,22 @@ mod tests {
|
|||
// > value in the second operand (source operand) is returned.
|
||||
let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
|
||||
let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
|
||||
let wu: [u64; 4] = transmute(w);
|
||||
let xu: [u64; 4] = transmute(x);
|
||||
assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
|
||||
assert_eq!(xu, [0u64; 4]);
|
||||
let wu = _mm256_castpd_si256(w).as_u64x4();
|
||||
let xu = _mm256_castpd_si256(x).as_u64x4();
|
||||
assert_eq!(wu, u64x4::splat(0x8000_0000_0000_0000u64));
|
||||
assert_eq!(xu, u64x4::splat(0u64));
|
||||
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
|
||||
// > second operand (source operand), either a NaN or a valid
|
||||
// > floating-point value, is written to the result.
|
||||
let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
|
||||
let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
|
||||
let yf: [f64; 4] = transmute(y);
|
||||
let zf: [f64; 4] = transmute(z);
|
||||
assert_eq!(yf, [0.0; 4]);
|
||||
assert_eq_m256d(y, _mm256_set1_pd(0.0));
|
||||
let zf = *z.as_f64x4().as_array();
|
||||
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
unsafe fn test_mm256_min_ps() {
|
||||
fn test_mm256_min_ps() {
|
||||
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
|
||||
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
|
||||
let r = _mm256_min_ps(a, b);
|
||||
|
|
@ -3546,18 +3544,17 @@ mod tests {
|
|||
// > value in the second operand (source operand) is returned.
|
||||
let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
|
||||
let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
|
||||
let wu: [u32; 8] = transmute(w);
|
||||
let xu: [u32; 8] = transmute(x);
|
||||
assert_eq!(wu, [0x8000_0000u32; 8]);
|
||||
assert_eq!(xu, [0u32; 8]);
|
||||
let wu = _mm256_castps_si256(w).as_u32x8();
|
||||
let xu = _mm256_castps_si256(x).as_u32x8();
|
||||
assert_eq!(wu, u32x8::splat(0x8000_0000u32));
|
||||
assert_eq!(xu, u32x8::splat(0u32));
|
||||
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
|
||||
// > second operand (source operand), either a NaN or a valid
|
||||
// > floating-point value, is written to the result.
|
||||
let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
|
||||
let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
|
||||
let yf: [f32; 8] = transmute(y);
|
||||
let zf: [f32; 8] = transmute(z);
|
||||
assert_eq!(yf, [0.0; 8]);
|
||||
assert_eq_m256(y, _mm256_set1_ps(0.0));
|
||||
let zf = *z.as_f32x8().as_array();
|
||||
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
|
||||
}
|
||||
|
||||
|
|
@ -4241,183 +4238,203 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_load_pd() {
|
||||
const fn test_mm256_load_pd() {
|
||||
let a = _mm256_setr_pd(1., 2., 3., 4.);
|
||||
let p = ptr::addr_of!(a) as *const f64;
|
||||
let r = _mm256_load_pd(p);
|
||||
let r = unsafe { _mm256_load_pd(p) };
|
||||
let e = _mm256_setr_pd(1., 2., 3., 4.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_store_pd() {
|
||||
const fn test_mm256_store_pd() {
|
||||
let a = _mm256_setr_pd(1., 2., 3., 4.);
|
||||
let mut r = _mm256_undefined_pd();
|
||||
_mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
|
||||
unsafe {
|
||||
_mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
|
||||
}
|
||||
assert_eq_m256d(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_load_ps() {
|
||||
const fn test_mm256_load_ps() {
|
||||
let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
|
||||
let p = ptr::addr_of!(a) as *const f32;
|
||||
let r = _mm256_load_ps(p);
|
||||
let r = unsafe { _mm256_load_ps(p) };
|
||||
let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_store_ps() {
|
||||
const fn test_mm256_store_ps() {
|
||||
let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
|
||||
let mut r = _mm256_undefined_ps();
|
||||
_mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
|
||||
unsafe {
|
||||
_mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
|
||||
}
|
||||
assert_eq_m256(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_loadu_pd() {
|
||||
const fn test_mm256_loadu_pd() {
|
||||
let a = &[1.0f64, 2., 3., 4.];
|
||||
let p = a.as_ptr();
|
||||
let r = _mm256_loadu_pd(black_box(p));
|
||||
let r = unsafe { _mm256_loadu_pd(black_box(p)) };
|
||||
let e = _mm256_setr_pd(1., 2., 3., 4.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_storeu_pd() {
|
||||
const fn test_mm256_storeu_pd() {
|
||||
let a = _mm256_set1_pd(9.);
|
||||
let mut r = _mm256_undefined_pd();
|
||||
_mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
|
||||
unsafe {
|
||||
_mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
|
||||
}
|
||||
assert_eq_m256d(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_loadu_ps() {
|
||||
const fn test_mm256_loadu_ps() {
|
||||
let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
|
||||
let p = a.as_ptr();
|
||||
let r = _mm256_loadu_ps(black_box(p));
|
||||
let r = unsafe { _mm256_loadu_ps(black_box(p)) };
|
||||
let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_storeu_ps() {
|
||||
const fn test_mm256_storeu_ps() {
|
||||
let a = _mm256_set1_ps(9.);
|
||||
let mut r = _mm256_undefined_ps();
|
||||
_mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
|
||||
unsafe {
|
||||
_mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
|
||||
}
|
||||
assert_eq_m256(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_load_si256() {
|
||||
const fn test_mm256_load_si256() {
|
||||
let a = _mm256_setr_epi64x(1, 2, 3, 4);
|
||||
let p = ptr::addr_of!(a);
|
||||
let r = _mm256_load_si256(p);
|
||||
let r = unsafe { _mm256_load_si256(p) };
|
||||
let e = _mm256_setr_epi64x(1, 2, 3, 4);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_store_si256() {
|
||||
const fn test_mm256_store_si256() {
|
||||
let a = _mm256_setr_epi64x(1, 2, 3, 4);
|
||||
let mut r = _mm256_undefined_si256();
|
||||
_mm256_store_si256(ptr::addr_of_mut!(r), a);
|
||||
unsafe {
|
||||
_mm256_store_si256(ptr::addr_of_mut!(r), a);
|
||||
}
|
||||
assert_eq_m256i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_loadu_si256() {
|
||||
const fn test_mm256_loadu_si256() {
|
||||
let a = _mm256_setr_epi64x(1, 2, 3, 4);
|
||||
let p = ptr::addr_of!(a);
|
||||
let r = _mm256_loadu_si256(black_box(p));
|
||||
let r = unsafe { _mm256_loadu_si256(black_box(p)) };
|
||||
let e = _mm256_setr_epi64x(1, 2, 3, 4);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_storeu_si256() {
|
||||
const fn test_mm256_storeu_si256() {
|
||||
let a = _mm256_set1_epi8(9);
|
||||
let mut r = _mm256_undefined_si256();
|
||||
_mm256_storeu_si256(ptr::addr_of_mut!(r), a);
|
||||
unsafe {
|
||||
_mm256_storeu_si256(ptr::addr_of_mut!(r), a);
|
||||
}
|
||||
assert_eq_m256i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_maskload_pd() {
|
||||
const fn test_mm256_maskload_pd() {
|
||||
let a = &[1.0f64, 2., 3., 4.];
|
||||
let p = a.as_ptr();
|
||||
let mask = _mm256_setr_epi64x(0, !0, 0, !0);
|
||||
let r = _mm256_maskload_pd(black_box(p), mask);
|
||||
let r = unsafe { _mm256_maskload_pd(black_box(p), mask) };
|
||||
let e = _mm256_setr_pd(0., 2., 0., 4.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_maskstore_pd() {
|
||||
const fn test_mm256_maskstore_pd() {
|
||||
let mut r = _mm256_set1_pd(0.);
|
||||
let mask = _mm256_setr_epi64x(0, !0, 0, !0);
|
||||
let a = _mm256_setr_pd(1., 2., 3., 4.);
|
||||
_mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
|
||||
unsafe {
|
||||
_mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
|
||||
}
|
||||
let e = _mm256_setr_pd(0., 2., 0., 4.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm_maskload_pd() {
|
||||
const fn test_mm_maskload_pd() {
|
||||
let a = &[1.0f64, 2.];
|
||||
let p = a.as_ptr();
|
||||
let mask = _mm_setr_epi64x(0, !0);
|
||||
let r = _mm_maskload_pd(black_box(p), mask);
|
||||
let r = unsafe { _mm_maskload_pd(black_box(p), mask) };
|
||||
let e = _mm_setr_pd(0., 2.);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm_maskstore_pd() {
|
||||
const fn test_mm_maskstore_pd() {
|
||||
let mut r = _mm_set1_pd(0.);
|
||||
let mask = _mm_setr_epi64x(0, !0);
|
||||
let a = _mm_setr_pd(1., 2.);
|
||||
_mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
|
||||
unsafe {
|
||||
_mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
|
||||
}
|
||||
let e = _mm_setr_pd(0., 2.);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_maskload_ps() {
|
||||
const fn test_mm256_maskload_ps() {
|
||||
let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
|
||||
let p = a.as_ptr();
|
||||
let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
|
||||
let r = _mm256_maskload_ps(black_box(p), mask);
|
||||
let r = unsafe { _mm256_maskload_ps(black_box(p), mask) };
|
||||
let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_maskstore_ps() {
|
||||
const fn test_mm256_maskstore_ps() {
|
||||
let mut r = _mm256_set1_ps(0.);
|
||||
let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
|
||||
let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
_mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
|
||||
unsafe {
|
||||
_mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
|
||||
}
|
||||
let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm_maskload_ps() {
|
||||
const fn test_mm_maskload_ps() {
|
||||
let a = &[1.0f32, 2., 3., 4.];
|
||||
let p = a.as_ptr();
|
||||
let mask = _mm_setr_epi32(0, !0, 0, !0);
|
||||
let r = _mm_maskload_ps(black_box(p), mask);
|
||||
let r = unsafe { _mm_maskload_ps(black_box(p), mask) };
|
||||
let e = _mm_setr_ps(0., 2., 0., 4.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm_maskstore_ps() {
|
||||
const fn test_mm_maskstore_ps() {
|
||||
let mut r = _mm_set1_ps(0.);
|
||||
let mask = _mm_setr_epi32(0, !0, 0, !0);
|
||||
let a = _mm_setr_ps(1., 2., 3., 4.);
|
||||
_mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
|
||||
unsafe {
|
||||
_mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
|
||||
}
|
||||
let e = _mm_setr_ps(0., 2., 0., 4.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
|
@ -4447,7 +4464,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
unsafe fn test_mm256_lddqu_si256() {
|
||||
fn test_mm256_lddqu_si256() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_setr_epi8(
|
||||
1, 2, 3, 4, 5, 6, 7, 8,
|
||||
|
|
@ -4456,7 +4473,7 @@ mod tests {
|
|||
25, 26, 27, 28, 29, 30, 31, 32,
|
||||
);
|
||||
let p = ptr::addr_of!(a);
|
||||
let r = _mm256_lddqu_si256(black_box(p));
|
||||
let r = unsafe { _mm256_lddqu_si256(black_box(p)) };
|
||||
#[rustfmt::skip]
|
||||
let e = _mm256_setr_epi8(
|
||||
1, 2, 3, 4, 5, 6, 7, 8,
|
||||
|
|
@ -4469,17 +4486,19 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx")]
|
||||
#[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
|
||||
unsafe fn test_mm256_stream_si256() {
|
||||
fn test_mm256_stream_si256() {
|
||||
let a = _mm256_setr_epi64x(1, 2, 3, 4);
|
||||
let mut r = _mm256_undefined_si256();
|
||||
_mm256_stream_si256(ptr::addr_of_mut!(r), a);
|
||||
unsafe {
|
||||
_mm256_stream_si256(ptr::addr_of_mut!(r), a);
|
||||
}
|
||||
_mm_sfence();
|
||||
assert_eq_m256i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
#[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
|
||||
unsafe fn test_mm256_stream_pd() {
|
||||
fn test_mm256_stream_pd() {
|
||||
#[repr(align(32))]
|
||||
struct Memory {
|
||||
pub data: [f64; 4],
|
||||
|
|
@ -4487,7 +4506,9 @@ mod tests {
|
|||
let a = _mm256_set1_pd(7.0);
|
||||
let mut mem = Memory { data: [-1.0; 4] };
|
||||
|
||||
_mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
|
||||
unsafe {
|
||||
_mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
|
||||
}
|
||||
_mm_sfence();
|
||||
for i in 0..4 {
|
||||
assert_eq!(mem.data[i], get_m256d(a, i));
|
||||
|
|
@ -4496,7 +4517,7 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx")]
|
||||
#[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
|
||||
unsafe fn test_mm256_stream_ps() {
|
||||
fn test_mm256_stream_ps() {
|
||||
#[repr(align(32))]
|
||||
struct Memory {
|
||||
pub data: [f32; 8],
|
||||
|
|
@ -4504,7 +4525,9 @@ mod tests {
|
|||
let a = _mm256_set1_ps(7.0);
|
||||
let mut mem = Memory { data: [-1.0; 8] };
|
||||
|
||||
_mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
|
||||
unsafe {
|
||||
_mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
|
||||
}
|
||||
_mm_sfence();
|
||||
for i in 0..8 {
|
||||
assert_eq!(mem.data[i], get_m256(a, i));
|
||||
|
|
@ -5141,29 +5164,29 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_loadu2_m128() {
|
||||
const fn test_mm256_loadu2_m128() {
|
||||
let hi = &[5., 6., 7., 8.];
|
||||
let hiaddr = hi.as_ptr();
|
||||
let lo = &[1., 2., 3., 4.];
|
||||
let loaddr = lo.as_ptr();
|
||||
let r = _mm256_loadu2_m128(hiaddr, loaddr);
|
||||
let r = unsafe { _mm256_loadu2_m128(hiaddr, loaddr) };
|
||||
let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_loadu2_m128d() {
|
||||
const fn test_mm256_loadu2_m128d() {
|
||||
let hi = &[3., 4.];
|
||||
let hiaddr = hi.as_ptr();
|
||||
let lo = &[1., 2.];
|
||||
let loaddr = lo.as_ptr();
|
||||
let r = _mm256_loadu2_m128d(hiaddr, loaddr);
|
||||
let r = unsafe { _mm256_loadu2_m128d(hiaddr, loaddr) };
|
||||
let e = _mm256_setr_pd(1., 2., 3., 4.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_loadu2_m128i() {
|
||||
const fn test_mm256_loadu2_m128i() {
|
||||
#[rustfmt::skip]
|
||||
let hi = _mm_setr_epi8(
|
||||
17, 18, 19, 20, 21, 22, 23, 24,
|
||||
|
|
@ -5174,7 +5197,9 @@ mod tests {
|
|||
1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16,
|
||||
);
|
||||
let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
|
||||
let r = unsafe {
|
||||
_mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _)
|
||||
};
|
||||
#[rustfmt::skip]
|
||||
let e = _mm256_setr_epi8(
|
||||
1, 2, 3, 4, 5, 6, 7, 8,
|
||||
|
|
@ -5186,35 +5211,39 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_storeu2_m128() {
|
||||
const fn test_mm256_storeu2_m128() {
|
||||
let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
let mut hi = _mm_undefined_ps();
|
||||
let mut lo = _mm_undefined_ps();
|
||||
_mm256_storeu2_m128(
|
||||
ptr::addr_of_mut!(hi) as *mut f32,
|
||||
ptr::addr_of_mut!(lo) as *mut f32,
|
||||
a,
|
||||
);
|
||||
unsafe {
|
||||
_mm256_storeu2_m128(
|
||||
ptr::addr_of_mut!(hi) as *mut f32,
|
||||
ptr::addr_of_mut!(lo) as *mut f32,
|
||||
a,
|
||||
);
|
||||
}
|
||||
assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
|
||||
assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_storeu2_m128d() {
|
||||
const fn test_mm256_storeu2_m128d() {
|
||||
let a = _mm256_setr_pd(1., 2., 3., 4.);
|
||||
let mut hi = _mm_undefined_pd();
|
||||
let mut lo = _mm_undefined_pd();
|
||||
_mm256_storeu2_m128d(
|
||||
ptr::addr_of_mut!(hi) as *mut f64,
|
||||
ptr::addr_of_mut!(lo) as *mut f64,
|
||||
a,
|
||||
);
|
||||
unsafe {
|
||||
_mm256_storeu2_m128d(
|
||||
ptr::addr_of_mut!(hi) as *mut f64,
|
||||
ptr::addr_of_mut!(lo) as *mut f64,
|
||||
a,
|
||||
);
|
||||
}
|
||||
assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
|
||||
assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
const unsafe fn test_mm256_storeu2_m128i() {
|
||||
const fn test_mm256_storeu2_m128i() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_setr_epi8(
|
||||
1, 2, 3, 4, 5, 6, 7, 8,
|
||||
|
|
@ -5224,7 +5253,9 @@ mod tests {
|
|||
);
|
||||
let mut hi = _mm_undefined_si128();
|
||||
let mut lo = _mm_undefined_si128();
|
||||
_mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
|
||||
unsafe {
|
||||
_mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
|
||||
}
|
||||
#[rustfmt::skip]
|
||||
let e_hi = _mm_setr_epi8(
|
||||
17, 18, 19, 20, 21, 22, 23, 24,
|
||||
|
|
|
|||
|
|
@ -4672,81 +4672,89 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
const unsafe fn test_mm_maskload_epi32() {
|
||||
const fn test_mm_maskload_epi32() {
|
||||
let nums = [1, 2, 3, 4];
|
||||
let a = &nums as *const i32;
|
||||
let mask = _mm_setr_epi32(-1, 0, 0, -1);
|
||||
let r = _mm_maskload_epi32(a, mask);
|
||||
let r = unsafe { _mm_maskload_epi32(a, mask) };
|
||||
let e = _mm_setr_epi32(1, 0, 0, 4);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
const unsafe fn test_mm256_maskload_epi32() {
|
||||
const fn test_mm256_maskload_epi32() {
|
||||
let nums = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let a = &nums as *const i32;
|
||||
let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
|
||||
let r = _mm256_maskload_epi32(a, mask);
|
||||
let r = unsafe { _mm256_maskload_epi32(a, mask) };
|
||||
let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
const unsafe fn test_mm_maskload_epi64() {
|
||||
const fn test_mm_maskload_epi64() {
|
||||
let nums = [1_i64, 2_i64];
|
||||
let a = &nums as *const i64;
|
||||
let mask = _mm_setr_epi64x(0, -1);
|
||||
let r = _mm_maskload_epi64(a, mask);
|
||||
let r = unsafe { _mm_maskload_epi64(a, mask) };
|
||||
let e = _mm_setr_epi64x(0, 2);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
const unsafe fn test_mm256_maskload_epi64() {
|
||||
const fn test_mm256_maskload_epi64() {
|
||||
let nums = [1_i64, 2_i64, 3_i64, 4_i64];
|
||||
let a = &nums as *const i64;
|
||||
let mask = _mm256_setr_epi64x(0, -1, -1, 0);
|
||||
let r = _mm256_maskload_epi64(a, mask);
|
||||
let r = unsafe { _mm256_maskload_epi64(a, mask) };
|
||||
let e = _mm256_setr_epi64x(0, 2, 3, 0);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
const unsafe fn test_mm_maskstore_epi32() {
|
||||
const fn test_mm_maskstore_epi32() {
|
||||
let a = _mm_setr_epi32(1, 2, 3, 4);
|
||||
let mut arr = [-1, -1, -1, -1];
|
||||
let mask = _mm_setr_epi32(-1, 0, 0, -1);
|
||||
_mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
|
||||
unsafe {
|
||||
_mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
|
||||
}
|
||||
let e = [1, -1, -1, 4];
|
||||
assert_eq!(arr, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
const unsafe fn test_mm256_maskstore_epi32() {
|
||||
const fn test_mm256_maskstore_epi32() {
|
||||
let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
|
||||
let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
|
||||
let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
|
||||
_mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
|
||||
unsafe {
|
||||
_mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
|
||||
}
|
||||
let e = [1, -1, -1, 42, -1, 6, 7, -1];
|
||||
assert_eq!(arr, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
const unsafe fn test_mm_maskstore_epi64() {
|
||||
const fn test_mm_maskstore_epi64() {
|
||||
let a = _mm_setr_epi64x(1_i64, 2_i64);
|
||||
let mut arr = [-1_i64, -1_i64];
|
||||
let mask = _mm_setr_epi64x(0, -1);
|
||||
_mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
|
||||
unsafe {
|
||||
_mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
|
||||
}
|
||||
let e = [-1, 2];
|
||||
assert_eq!(arr, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
const unsafe fn test_mm256_maskstore_epi64() {
|
||||
const fn test_mm256_maskstore_epi64() {
|
||||
let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
|
||||
let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
|
||||
let mask = _mm256_setr_epi64x(0, -1, -1, 0);
|
||||
_mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
|
||||
unsafe {
|
||||
_mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
|
||||
}
|
||||
let e = [-1, 2, 3, -1];
|
||||
assert_eq!(arr, e);
|
||||
}
|
||||
|
|
@ -5301,9 +5309,9 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_stream_load_si256() {
|
||||
fn test_mm256_stream_load_si256() {
|
||||
let a = _mm256_set_epi64x(5, 6, 7, 8);
|
||||
let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
|
||||
let r = unsafe { _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _) };
|
||||
assert_eq_m256i(a, r);
|
||||
}
|
||||
|
||||
|
|
@ -5506,88 +5514,98 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i32gather_epi32() {
|
||||
fn test_mm_i32gather_epi32() {
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
|
||||
let r = unsafe { _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i32gather_epi32() {
|
||||
fn test_mm_mask_i32gather_epi32() {
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm_mask_i32gather_epi32::<4>(
|
||||
_mm_set1_epi32(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(0, 16, 64, 96),
|
||||
_mm_setr_epi32(-1, -1, -1, 0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm_mask_i32gather_epi32::<4>(
|
||||
_mm_set1_epi32(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(0, 16, 64, 96),
|
||||
_mm_setr_epi32(-1, -1, -1, 0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i32gather_epi32() {
|
||||
fn test_mm256_i32gather_epi32() {
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r =
|
||||
_mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
|
||||
let r = unsafe {
|
||||
_mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
|
||||
};
|
||||
assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i32gather_epi32() {
|
||||
fn test_mm256_mask_i32gather_epi32() {
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm256_mask_i32gather_epi32::<4>(
|
||||
_mm256_set1_epi32(256),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
|
||||
_mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm256_mask_i32gather_epi32::<4>(
|
||||
_mm256_set1_epi32(256),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
|
||||
_mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
|
||||
)
|
||||
};
|
||||
assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i32gather_ps() {
|
||||
fn test_mm_i32gather_ps() {
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
|
||||
let r = unsafe { _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
|
||||
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i32gather_ps() {
|
||||
fn test_mm_mask_i32gather_ps() {
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm_mask_i32gather_ps::<4>(
|
||||
_mm_set1_ps(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(0, 16, 64, 96),
|
||||
_mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm_mask_i32gather_ps::<4>(
|
||||
_mm_set1_ps(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(0, 16, 64, 96),
|
||||
_mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i32gather_ps() {
|
||||
fn test_mm256_i32gather_ps() {
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r =
|
||||
_mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
|
||||
let r = unsafe {
|
||||
_mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
|
||||
};
|
||||
assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i32gather_ps() {
|
||||
fn test_mm256_mask_i32gather_ps() {
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm256_mask_i32gather_ps::<4>(
|
||||
_mm256_set1_ps(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
|
||||
_mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm256_mask_i32gather_ps::<4>(
|
||||
_mm256_set1_ps(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
|
||||
_mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
|
||||
)
|
||||
};
|
||||
assert_eq_m256(
|
||||
r,
|
||||
_mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
|
||||
|
|
@ -5595,254 +5613,282 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i32gather_epi64() {
|
||||
fn test_mm_i32gather_epi64() {
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
|
||||
let r = unsafe { _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i32gather_epi64() {
|
||||
fn test_mm_mask_i32gather_epi64() {
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm_mask_i32gather_epi64::<8>(
|
||||
_mm_set1_epi64x(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(16, 16, 16, 16),
|
||||
_mm_setr_epi64x(-1, 0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm_mask_i32gather_epi64::<8>(
|
||||
_mm_set1_epi64x(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(16, 16, 16, 16),
|
||||
_mm_setr_epi64x(-1, 0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i32gather_epi64() {
|
||||
fn test_mm256_i32gather_epi64() {
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
|
||||
let r = unsafe { _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
|
||||
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i32gather_epi64() {
|
||||
fn test_mm256_mask_i32gather_epi64() {
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm256_mask_i32gather_epi64::<8>(
|
||||
_mm256_set1_epi64x(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(0, 16, 64, 96),
|
||||
_mm256_setr_epi64x(-1, -1, -1, 0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm256_mask_i32gather_epi64::<8>(
|
||||
_mm256_set1_epi64x(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(0, 16, 64, 96),
|
||||
_mm256_setr_epi64x(-1, -1, -1, 0),
|
||||
)
|
||||
};
|
||||
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i32gather_pd() {
|
||||
fn test_mm_i32gather_pd() {
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
|
||||
let r = unsafe { _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
|
||||
assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i32gather_pd() {
|
||||
fn test_mm_mask_i32gather_pd() {
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm_mask_i32gather_pd::<8>(
|
||||
_mm_set1_pd(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(16, 16, 16, 16),
|
||||
_mm_setr_pd(-1.0, 0.0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm_mask_i32gather_pd::<8>(
|
||||
_mm_set1_pd(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(16, 16, 16, 16),
|
||||
_mm_setr_pd(-1.0, 0.0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i32gather_pd() {
|
||||
fn test_mm256_i32gather_pd() {
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
|
||||
let r = unsafe { _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
|
||||
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i32gather_pd() {
|
||||
fn test_mm256_mask_i32gather_pd() {
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm256_mask_i32gather_pd::<8>(
|
||||
_mm256_set1_pd(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(0, 16, 64, 96),
|
||||
_mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm256_mask_i32gather_pd::<8>(
|
||||
_mm256_set1_pd(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi32(0, 16, 64, 96),
|
||||
_mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
|
||||
)
|
||||
};
|
||||
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i64gather_epi32() {
|
||||
fn test_mm_i64gather_epi32() {
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
|
||||
let r = unsafe { _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i64gather_epi32() {
|
||||
fn test_mm_mask_i64gather_epi32() {
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm_mask_i64gather_epi32::<4>(
|
||||
_mm_set1_epi32(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi64x(0, 16),
|
||||
_mm_setr_epi32(-1, 0, -1, 0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm_mask_i64gather_epi32::<4>(
|
||||
_mm_set1_epi32(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi64x(0, 16),
|
||||
_mm_setr_epi32(-1, 0, -1, 0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i64gather_epi32() {
|
||||
fn test_mm256_i64gather_epi32() {
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
let r =
|
||||
unsafe { _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i64gather_epi32() {
|
||||
fn test_mm256_mask_i64gather_epi32() {
|
||||
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm256_mask_i64gather_epi32::<4>(
|
||||
_mm_set1_epi32(256),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi64x(0, 16, 64, 96),
|
||||
_mm_setr_epi32(-1, -1, -1, 0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm256_mask_i64gather_epi32::<4>(
|
||||
_mm_set1_epi32(256),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi64x(0, 16, 64, 96),
|
||||
_mm_setr_epi32(-1, -1, -1, 0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i64gather_ps() {
|
||||
fn test_mm_i64gather_ps() {
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
|
||||
let r = unsafe { _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
|
||||
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i64gather_ps() {
|
||||
fn test_mm_mask_i64gather_ps() {
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm_mask_i64gather_ps::<4>(
|
||||
_mm_set1_ps(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi64x(0, 16),
|
||||
_mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm_mask_i64gather_ps::<4>(
|
||||
_mm_set1_ps(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi64x(0, 16),
|
||||
_mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i64gather_ps() {
|
||||
fn test_mm256_i64gather_ps() {
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
let r =
|
||||
unsafe { _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
|
||||
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i64gather_ps() {
|
||||
fn test_mm256_mask_i64gather_ps() {
|
||||
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
|
||||
// A multiplier of 4 is word-addressing for f32s
|
||||
let r = _mm256_mask_i64gather_ps::<4>(
|
||||
_mm_set1_ps(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi64x(0, 16, 64, 96),
|
||||
_mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm256_mask_i64gather_ps::<4>(
|
||||
_mm_set1_ps(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi64x(0, 16, 64, 96),
|
||||
_mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i64gather_epi64() {
|
||||
fn test_mm_i64gather_epi64() {
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
|
||||
let r = unsafe { _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i64gather_epi64() {
|
||||
fn test_mm_mask_i64gather_epi64() {
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm_mask_i64gather_epi64::<8>(
|
||||
_mm_set1_epi64x(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi64x(16, 16),
|
||||
_mm_setr_epi64x(-1, 0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm_mask_i64gather_epi64::<8>(
|
||||
_mm_set1_epi64x(256),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi64x(16, 16),
|
||||
_mm_setr_epi64x(-1, 0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i64gather_epi64() {
|
||||
fn test_mm256_i64gather_epi64() {
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
let r =
|
||||
unsafe { _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
|
||||
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i64gather_epi64() {
|
||||
fn test_mm256_mask_i64gather_epi64() {
|
||||
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
|
||||
// A multiplier of 8 is word-addressing for i64s
|
||||
let r = _mm256_mask_i64gather_epi64::<8>(
|
||||
_mm256_set1_epi64x(256),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi64x(0, 16, 64, 96),
|
||||
_mm256_setr_epi64x(-1, -1, -1, 0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm256_mask_i64gather_epi64::<8>(
|
||||
_mm256_set1_epi64x(256),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi64x(0, 16, 64, 96),
|
||||
_mm256_setr_epi64x(-1, -1, -1, 0),
|
||||
)
|
||||
};
|
||||
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_i64gather_pd() {
|
||||
fn test_mm_i64gather_pd() {
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
|
||||
let r = unsafe { _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
|
||||
assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm_mask_i64gather_pd() {
|
||||
fn test_mm_mask_i64gather_pd() {
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm_mask_i64gather_pd::<8>(
|
||||
_mm_set1_pd(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi64x(16, 16),
|
||||
_mm_setr_pd(-1.0, 0.0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm_mask_i64gather_pd::<8>(
|
||||
_mm_set1_pd(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm_setr_epi64x(16, 16),
|
||||
_mm_setr_pd(-1.0, 0.0),
|
||||
)
|
||||
};
|
||||
assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_i64gather_pd() {
|
||||
fn test_mm256_i64gather_pd() {
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
|
||||
let r =
|
||||
unsafe { _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
|
||||
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_mask_i64gather_pd() {
|
||||
fn test_mm256_mask_i64gather_pd() {
|
||||
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
|
||||
// A multiplier of 8 is word-addressing for f64s
|
||||
let r = _mm256_mask_i64gather_pd::<8>(
|
||||
_mm256_set1_pd(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi64x(0, 16, 64, 96),
|
||||
_mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
|
||||
);
|
||||
let r = unsafe {
|
||||
_mm256_mask_i64gather_pd::<8>(
|
||||
_mm256_set1_pd(256.0),
|
||||
arr.as_ptr(),
|
||||
_mm256_setr_epi64x(0, 16, 64, 96),
|
||||
_mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
|
||||
)
|
||||
};
|
||||
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -593,7 +593,7 @@ pub fn _mm_cvtness_sbh(a: f32) -> bf16 {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::simd::u16x4;
|
||||
use crate::core_arch::simd::{f32x4, f32x8, f32x16, u16x4, u16x8, u16x16, u16x32};
|
||||
use crate::{
|
||||
core_arch::x86::*,
|
||||
mem::{transmute, transmute_copy},
|
||||
|
|
@ -601,13 +601,13 @@ mod tests {
|
|||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm_cvtne2ps_pbh() {
|
||||
fn test_mm_cvtne2ps_pbh() {
|
||||
let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
|
||||
let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
|
||||
let a: __m128 = transmute(a_array);
|
||||
let b: __m128 = transmute(b_array);
|
||||
let a = f32x4::from_array(a_array).as_m128();
|
||||
let b = f32x4::from_array(b_array).as_m128();
|
||||
let c: __m128bh = _mm_cvtne2ps_pbh(a, b);
|
||||
let result: [u16; 8] = transmute(c.as_u16x8());
|
||||
let result = *c.as_u16x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 8] = [
|
||||
0b1_10000110_0110010,
|
||||
|
|
@ -623,7 +623,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm_mask_cvtne2ps_pbh() {
|
||||
fn test_mm_mask_cvtne2ps_pbh() {
|
||||
let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
|
||||
let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
|
||||
#[rustfmt::skip]
|
||||
|
|
@ -637,12 +637,12 @@ mod tests {
|
|||
0b0_10000000_1110000,
|
||||
0b0_10000100_1001001,
|
||||
];
|
||||
let src: __m128bh = transmute(src_array);
|
||||
let a: __m128 = transmute(a_array);
|
||||
let b: __m128 = transmute(b_array);
|
||||
let src = u16x8::from_array(src_array).as_m128bh();
|
||||
let a = f32x4::from_array(a_array).as_m128();
|
||||
let b = f32x4::from_array(b_array).as_m128();
|
||||
let k: __mmask8 = 0b1111_1111;
|
||||
let c: __m128bh = _mm_mask_cvtne2ps_pbh(src, k, a, b);
|
||||
let result: [u16; 8] = transmute(c.as_u16x8());
|
||||
let result = *c.as_u16x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 8] = [
|
||||
0b1_10000110_0110010,
|
||||
|
|
@ -657,20 +657,20 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k = 0b0000_0000;
|
||||
let c = _mm_mask_cvtne2ps_pbh(src, k, a, b);
|
||||
let result: [u16; 8] = transmute(c.as_u16x8());
|
||||
let result = *c.as_u16x8().as_array();
|
||||
let expected_result = src_array;
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm_maskz_cvtne2ps_pbh() {
|
||||
fn test_mm_maskz_cvtne2ps_pbh() {
|
||||
let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
|
||||
let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
|
||||
let a: __m128 = transmute(a_array);
|
||||
let b: __m128 = transmute(b_array);
|
||||
let a = f32x4::from_array(a_array).as_m128();
|
||||
let b = f32x4::from_array(b_array).as_m128();
|
||||
let k: __mmask8 = 0b1111_1111;
|
||||
let c: __m128bh = _mm_maskz_cvtne2ps_pbh(k, a, b);
|
||||
let result: [u16; 8] = transmute(c.as_u16x8());
|
||||
let result = *c.as_u16x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 8] = [
|
||||
0b1_10000110_0110010,
|
||||
|
|
@ -685,7 +685,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k = 0b0011_1100;
|
||||
let c = _mm_maskz_cvtne2ps_pbh(k, a, b);
|
||||
let result: [u16; 8] = transmute(c.as_u16x8());
|
||||
let result = *c.as_u16x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 8] = [
|
||||
0,
|
||||
|
|
@ -701,7 +701,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm256_cvtne2ps_pbh() {
|
||||
fn test_mm256_cvtne2ps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -723,10 +723,10 @@ mod tests {
|
|||
-1000.158_f32,
|
||||
-575.575_f32,
|
||||
];
|
||||
let a: __m256 = transmute(a_array);
|
||||
let b: __m256 = transmute(b_array);
|
||||
let a = f32x8::from_array(a_array).as_m256();
|
||||
let b = f32x8::from_array(b_array).as_m256();
|
||||
let c: __m256bh = _mm256_cvtne2ps_pbh(a, b);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 16] = [
|
||||
0b1_10000110_0110010,
|
||||
|
|
@ -750,7 +750,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm256_mask_cvtne2ps_pbh() {
|
||||
fn test_mm256_mask_cvtne2ps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -790,12 +790,12 @@ mod tests {
|
|||
0b0_10000000_1110000,
|
||||
0b0_10000100_1001001,
|
||||
];
|
||||
let src: __m256bh = transmute(src_array);
|
||||
let a: __m256 = transmute(a_array);
|
||||
let b: __m256 = transmute(b_array);
|
||||
let src = u16x16::from_array(src_array).as_m256bh();
|
||||
let a = f32x8::from_array(a_array).as_m256();
|
||||
let b = f32x8::from_array(b_array).as_m256();
|
||||
let k: __mmask16 = 0xffff;
|
||||
let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 16] = [
|
||||
0b1_10000110_0110010,
|
||||
|
|
@ -818,13 +818,13 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask16 = 0;
|
||||
let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
let expected_result = src_array;
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_cvtne2ps_pbh() {
|
||||
fn test_mm256_maskz_cvtne2ps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -846,11 +846,11 @@ mod tests {
|
|||
-1000.158_f32,
|
||||
-575.575_f32,
|
||||
];
|
||||
let a: __m256 = transmute(a_array);
|
||||
let b: __m256 = transmute(b_array);
|
||||
let a = f32x8::from_array(a_array).as_m256();
|
||||
let b = f32x8::from_array(b_array).as_m256();
|
||||
let k: __mmask16 = 0xffff;
|
||||
let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 16] = [
|
||||
0b1_10000110_0110010,
|
||||
|
|
@ -873,7 +873,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask16 = 0b0110_1100_0011_0110;
|
||||
let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 16] = [
|
||||
0,
|
||||
|
|
@ -897,7 +897,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512f")]
|
||||
unsafe fn test_mm512_cvtne2ps_pbh() {
|
||||
fn test_mm512_cvtne2ps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -935,10 +935,10 @@ mod tests {
|
|||
-1000.158_f32,
|
||||
-575.575_f32,
|
||||
];
|
||||
let a: __m512 = transmute(a_array);
|
||||
let b: __m512 = transmute(b_array);
|
||||
let a = f32x16::from_array(a_array).as_m512();
|
||||
let b = f32x16::from_array(b_array).as_m512();
|
||||
let c: __m512bh = _mm512_cvtne2ps_pbh(a, b);
|
||||
let result: [u16; 32] = transmute(c.as_u16x32());
|
||||
let result = *c.as_u16x32().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 32] = [
|
||||
0b1_10000110_0110010,
|
||||
|
|
@ -978,7 +978,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512f")]
|
||||
unsafe fn test_mm512_mask_cvtne2ps_pbh() {
|
||||
fn test_mm512_mask_cvtne2ps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -1050,12 +1050,12 @@ mod tests {
|
|||
0b0_10000000_1110000,
|
||||
0b0_10000100_1001001,
|
||||
];
|
||||
let src: __m512bh = transmute(src_array);
|
||||
let a: __m512 = transmute(a_array);
|
||||
let b: __m512 = transmute(b_array);
|
||||
let src = u16x32::from_array(src_array).as_m512bh();
|
||||
let a = f32x16::from_array(a_array).as_m512();
|
||||
let b = f32x16::from_array(b_array).as_m512();
|
||||
let k: __mmask32 = 0xffffffff;
|
||||
let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
|
||||
let result: [u16; 32] = transmute(c.as_u16x32());
|
||||
let result = *c.as_u16x32().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 32] = [
|
||||
0b1_10000110_0110010,
|
||||
|
|
@ -1094,13 +1094,13 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask32 = 0;
|
||||
let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
|
||||
let result: [u16; 32] = transmute(c.as_u16x32());
|
||||
let result = *c.as_u16x32().as_array();
|
||||
let expected_result = src_array;
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512f")]
|
||||
unsafe fn test_mm512_maskz_cvtne2ps_pbh() {
|
||||
fn test_mm512_maskz_cvtne2ps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -1138,11 +1138,11 @@ mod tests {
|
|||
-1000.158_f32,
|
||||
-575.575_f32,
|
||||
];
|
||||
let a: __m512 = transmute(a_array);
|
||||
let b: __m512 = transmute(b_array);
|
||||
let a = f32x16::from_array(a_array).as_m512();
|
||||
let b = f32x16::from_array(b_array).as_m512();
|
||||
let k: __mmask32 = 0xffffffff;
|
||||
let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
|
||||
let result: [u16; 32] = transmute(c.as_u16x32());
|
||||
let result = *c.as_u16x32().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 32] = [
|
||||
0b1_10000110_0110010,
|
||||
|
|
@ -1181,7 +1181,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask32 = 0b1100_1010_1001_0110_1010_0011_0101_0110;
|
||||
let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
|
||||
let result: [u16; 32] = transmute(c.as_u16x32());
|
||||
let result = *c.as_u16x32().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 32] = [
|
||||
0,
|
||||
|
|
@ -1221,7 +1221,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm256_cvtneps_pbh() {
|
||||
fn test_mm256_cvtneps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -1233,9 +1233,9 @@ mod tests {
|
|||
1000.158_f32,
|
||||
575.575_f32,
|
||||
];
|
||||
let a: __m256 = transmute(a_array);
|
||||
let a = f32x8::from_array(a_array).as_m256();
|
||||
let c: __m128bh = _mm256_cvtneps_pbh(a);
|
||||
let result: [u16; 8] = transmute(c.as_u16x8());
|
||||
let result = *c.as_u16x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 8] = [
|
||||
0b0_10000110_0110010,
|
||||
|
|
@ -1251,7 +1251,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm256_mask_cvtneps_pbh() {
|
||||
fn test_mm256_mask_cvtneps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -1273,11 +1273,11 @@ mod tests {
|
|||
0b1_10001000_1111010,
|
||||
0b1_10001000_0010000,
|
||||
];
|
||||
let src: __m128bh = transmute(src_array);
|
||||
let a: __m256 = transmute(a_array);
|
||||
let src = u16x8::from_array(src_array).as_m128bh();
|
||||
let a = f32x8::from_array(a_array).as_m256();
|
||||
let k: __mmask8 = 0xff;
|
||||
let b = _mm256_mask_cvtneps_pbh(src, k, a);
|
||||
let result: [u16; 8] = transmute(b.as_u16x8());
|
||||
let result = *b.as_u16x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 8] = [
|
||||
0b0_10000110_0110010,
|
||||
|
|
@ -1292,13 +1292,13 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0x0;
|
||||
let b: __m128bh = _mm256_mask_cvtneps_pbh(src, k, a);
|
||||
let result: [u16; 8] = transmute(b.as_u16x8());
|
||||
let result = *b.as_u16x8().as_array();
|
||||
let expected_result: [u16; 8] = src_array;
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_cvtneps_pbh() {
|
||||
fn test_mm256_maskz_cvtneps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -1310,10 +1310,10 @@ mod tests {
|
|||
1000.158_f32,
|
||||
575.575_f32,
|
||||
];
|
||||
let a: __m256 = transmute(a_array);
|
||||
let a = f32x8::from_array(a_array).as_m256();
|
||||
let k: __mmask8 = 0xff;
|
||||
let b = _mm256_maskz_cvtneps_pbh(k, a);
|
||||
let result: [u16; 8] = transmute(b.as_u16x8());
|
||||
let result = *b.as_u16x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 8] = [
|
||||
0b0_10000110_0110010,
|
||||
|
|
@ -1328,14 +1328,14 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0x6;
|
||||
let b: __m128bh = _mm256_maskz_cvtneps_pbh(k, a);
|
||||
let result: [u16; 8] = transmute(b.as_u16x8());
|
||||
let result = *b.as_u16x8().as_array();
|
||||
let expected_result: [u16; 8] =
|
||||
[0, 0b0_10000010_0101000, 0b0_10000000_1110000, 0, 0, 0, 0, 0];
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512f")]
|
||||
unsafe fn test_mm512_cvtneps_pbh() {
|
||||
fn test_mm512_cvtneps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -1355,9 +1355,9 @@ mod tests {
|
|||
1000.158_f32,
|
||||
575.575_f32,
|
||||
];
|
||||
let a: __m512 = transmute(a_array);
|
||||
let a = f32x16::from_array(a_array).as_m512();
|
||||
let c: __m256bh = _mm512_cvtneps_pbh(a);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 16] = [
|
||||
0b0_10000110_0110010,
|
||||
|
|
@ -1381,7 +1381,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512f")]
|
||||
unsafe fn test_mm512_mask_cvtneps_pbh() {
|
||||
fn test_mm512_mask_cvtneps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -1419,11 +1419,11 @@ mod tests {
|
|||
0b1_10001000_1111010,
|
||||
0b1_10001000_0010000,
|
||||
];
|
||||
let src: __m256bh = transmute(src_array);
|
||||
let a: __m512 = transmute(a_array);
|
||||
let src = u16x16::from_array(src_array).as_m256bh();
|
||||
let a = f32x16::from_array(a_array).as_m512();
|
||||
let k: __mmask16 = 0xffff;
|
||||
let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 16] = [
|
||||
0b0_10000110_0110010,
|
||||
|
|
@ -1446,13 +1446,13 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask16 = 0;
|
||||
let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
let expected_result = src_array;
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512f")]
|
||||
unsafe fn test_mm512_maskz_cvtneps_pbh() {
|
||||
fn test_mm512_maskz_cvtneps_pbh() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
178.125_f32,
|
||||
|
|
@ -1472,10 +1472,10 @@ mod tests {
|
|||
1000.158_f32,
|
||||
575.575_f32,
|
||||
];
|
||||
let a: __m512 = transmute(a_array);
|
||||
let a = f32x16::from_array(a_array).as_m512();
|
||||
let k: __mmask16 = 0xffff;
|
||||
let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 16] = [
|
||||
0b0_10000110_0110010,
|
||||
|
|
@ -1498,7 +1498,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask16 = 0x653a;
|
||||
let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
|
||||
let result: [u16; 16] = transmute(c.as_u16x16());
|
||||
let result = *c.as_u16x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [u16; 16] = [
|
||||
0,
|
||||
|
|
@ -1522,74 +1522,74 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm_dpbf16_ps() {
|
||||
fn test_mm_dpbf16_ps() {
|
||||
let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
|
||||
let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
|
||||
let a1: __m128 = transmute(a_array);
|
||||
let b1: __m128 = transmute(b_array);
|
||||
let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
|
||||
let a1 = f32x4::from_array(a_array).as_m128();
|
||||
let b1 = f32x4::from_array(b_array).as_m128();
|
||||
let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
|
||||
let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
|
||||
let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
|
||||
let c: __m128 = _mm_dpbf16_ps(src, a, b);
|
||||
let result: [f32; 4] = transmute(c.as_f32x4());
|
||||
let result = *c.as_f32x4().as_array();
|
||||
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm_mask_dpbf16_ps() {
|
||||
fn test_mm_mask_dpbf16_ps() {
|
||||
let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
|
||||
let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
|
||||
let a1: __m128 = transmute(a_array);
|
||||
let b1: __m128 = transmute(b_array);
|
||||
let a1 = f32x4::from_array(a_array).as_m128();
|
||||
let b1 = f32x4::from_array(b_array).as_m128();
|
||||
let k: __mmask8 = 0xf3;
|
||||
let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
|
||||
let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
|
||||
let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
|
||||
let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
|
||||
let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
|
||||
let result: [f32; 4] = transmute(c.as_f32x4());
|
||||
let result = *c.as_f32x4().as_array();
|
||||
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32];
|
||||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0xff;
|
||||
let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
|
||||
let result: [f32; 4] = transmute(c.as_f32x4());
|
||||
let result = *c.as_f32x4().as_array();
|
||||
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
|
||||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0;
|
||||
let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
|
||||
let result: [f32; 4] = transmute(c.as_f32x4());
|
||||
let result = *c.as_f32x4().as_array();
|
||||
let expected_result: [f32; 4] = [1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32];
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm_maskz_dpbf16_ps() {
|
||||
fn test_mm_maskz_dpbf16_ps() {
|
||||
let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
|
||||
let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
|
||||
let a1: __m128 = transmute(a_array);
|
||||
let b1: __m128 = transmute(b_array);
|
||||
let a1 = f32x4::from_array(a_array).as_m128();
|
||||
let b1 = f32x4::from_array(b_array).as_m128();
|
||||
let k: __mmask8 = 0xf3;
|
||||
let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
|
||||
let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
|
||||
let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
|
||||
let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
|
||||
let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
|
||||
let result: [f32; 4] = transmute(c.as_f32x4());
|
||||
let result = *c.as_f32x4().as_array();
|
||||
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 0.0, 0.0];
|
||||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0xff;
|
||||
let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
|
||||
let result: [f32; 4] = transmute(c.as_f32x4());
|
||||
let result = *c.as_f32x4().as_array();
|
||||
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
|
||||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0;
|
||||
let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
|
||||
let result: [f32; 4] = transmute(c.as_f32x4());
|
||||
let result = *c.as_f32x4().as_array();
|
||||
let expected_result: [f32; 4] = [0.0, 0.0, 0.0, 0.0];
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm256_dpbf16_ps() {
|
||||
fn test_mm256_dpbf16_ps() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
|
||||
|
|
@ -1597,16 +1597,16 @@ mod tests {
|
|||
let b_array = [
|
||||
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
|
||||
];
|
||||
let a1: __m256 = transmute(a_array);
|
||||
let b1: __m256 = transmute(b_array);
|
||||
let a1 = f32x8::from_array(a_array).as_m256();
|
||||
let b1 = f32x8::from_array(b_array).as_m256();
|
||||
#[rustfmt::skip]
|
||||
let src: __m256 = transmute([
|
||||
let src = f32x8::from_array([
|
||||
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
|
||||
]);
|
||||
]).as_m256();
|
||||
let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
|
||||
let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
|
||||
let c: __m256 = _mm256_dpbf16_ps(src, a, b);
|
||||
let result: [f32; 8] = transmute(c.as_f32x8());
|
||||
let result = *c.as_f32x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 8] = [
|
||||
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
|
||||
|
|
@ -1615,7 +1615,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm256_mask_dpbf16_ps() {
|
||||
fn test_mm256_mask_dpbf16_ps() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
|
||||
|
|
@ -1623,17 +1623,17 @@ mod tests {
|
|||
let b_array = [
|
||||
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
|
||||
];
|
||||
let a1: __m256 = transmute(a_array);
|
||||
let b1: __m256 = transmute(b_array);
|
||||
let a1 = f32x8::from_array(a_array).as_m256();
|
||||
let b1 = f32x8::from_array(b_array).as_m256();
|
||||
let k: __mmask8 = 0x33;
|
||||
#[rustfmt::skip]
|
||||
let src: __m256 = transmute([
|
||||
let src = f32x8::from_array([
|
||||
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
|
||||
]);
|
||||
]).as_m256();
|
||||
let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
|
||||
let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
|
||||
let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
|
||||
let result: [f32; 8] = transmute(c.as_f32x8());
|
||||
let result = *c.as_f32x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 8] = [
|
||||
-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
|
||||
|
|
@ -1641,7 +1641,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0xff;
|
||||
let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
|
||||
let result: [f32; 8] = transmute(c.as_f32x8());
|
||||
let result = *c.as_f32x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 8] = [
|
||||
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
|
||||
|
|
@ -1649,7 +1649,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0;
|
||||
let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
|
||||
let result: [f32; 8] = transmute(c.as_f32x8());
|
||||
let result = *c.as_f32x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 8] = [
|
||||
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
|
||||
|
|
@ -1658,7 +1658,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_dpbf16_ps() {
|
||||
fn test_mm256_maskz_dpbf16_ps() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
|
||||
|
|
@ -1666,17 +1666,17 @@ mod tests {
|
|||
let b_array = [
|
||||
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
|
||||
];
|
||||
let a1: __m256 = transmute(a_array);
|
||||
let b1: __m256 = transmute(b_array);
|
||||
let a1 = f32x8::from_array(a_array).as_m256();
|
||||
let b1 = f32x8::from_array(b_array).as_m256();
|
||||
let k: __mmask8 = 0x33;
|
||||
#[rustfmt::skip]
|
||||
let src: __m256 = transmute([
|
||||
let src = f32x8::from_array([
|
||||
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
|
||||
]);
|
||||
]).as_m256();
|
||||
let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
|
||||
let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
|
||||
let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
|
||||
let result: [f32; 8] = transmute(c.as_f32x8());
|
||||
let result = *c.as_f32x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 8] = [
|
||||
-18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0,
|
||||
|
|
@ -1684,7 +1684,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0xff;
|
||||
let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
|
||||
let result: [f32; 8] = transmute(c.as_f32x8());
|
||||
let result = *c.as_f32x8().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 8] = [
|
||||
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
|
||||
|
|
@ -1692,13 +1692,13 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask8 = 0;
|
||||
let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
|
||||
let result: [f32; 8] = transmute(c.as_f32x8());
|
||||
let result = *c.as_f32x8().as_array();
|
||||
let expected_result: [f32; 8] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512f")]
|
||||
unsafe fn test_mm512_dpbf16_ps() {
|
||||
fn test_mm512_dpbf16_ps() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
|
||||
|
|
@ -1708,16 +1708,17 @@ mod tests {
|
|||
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
|
||||
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
|
||||
];
|
||||
let a1: __m512 = transmute(a_array);
|
||||
let b1: __m512 = transmute(b_array);
|
||||
let src: __m512 = transmute([
|
||||
let a1 = f32x16::from_array(a_array).as_m512();
|
||||
let b1 = f32x16::from_array(b_array).as_m512();
|
||||
let src = f32x16::from_array([
|
||||
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
|
||||
2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
|
||||
]);
|
||||
])
|
||||
.as_m512();
|
||||
let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
|
||||
let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
|
||||
let c: __m512 = _mm512_dpbf16_ps(src, a, b);
|
||||
let result: [f32; 16] = transmute(c.as_f32x16());
|
||||
let result = *c.as_f32x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 16] = [
|
||||
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
|
||||
|
|
@ -1727,7 +1728,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512f")]
|
||||
unsafe fn test_mm512_mask_dpbf16_ps() {
|
||||
fn test_mm512_mask_dpbf16_ps() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
|
||||
|
|
@ -1737,18 +1738,18 @@ mod tests {
|
|||
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
|
||||
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
|
||||
];
|
||||
let a1: __m512 = transmute(a_array);
|
||||
let b1: __m512 = transmute(b_array);
|
||||
let a1 = f32x16::from_array(a_array).as_m512();
|
||||
let b1 = f32x16::from_array(b_array).as_m512();
|
||||
let k: __mmask16 = 0x3333;
|
||||
#[rustfmt::skip]
|
||||
let src: __m512 = transmute([
|
||||
let src = f32x16::from_array([
|
||||
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
|
||||
2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
|
||||
]);
|
||||
]).as_m512();
|
||||
let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
|
||||
let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
|
||||
let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
|
||||
let result: [f32; 16] = transmute(c.as_f32x16());
|
||||
let result = *c.as_f32x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 16] = [
|
||||
-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
|
||||
|
|
@ -1757,7 +1758,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask16 = 0xffff;
|
||||
let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
|
||||
let result: [f32; 16] = transmute(c.as_f32x16());
|
||||
let result = *c.as_f32x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 16] = [
|
||||
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
|
||||
|
|
@ -1766,7 +1767,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask16 = 0;
|
||||
let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
|
||||
let result: [f32; 16] = transmute(c.as_f32x16());
|
||||
let result = *c.as_f32x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 16] = [
|
||||
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
|
||||
|
|
@ -1776,7 +1777,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512f")]
|
||||
unsafe fn test_mm512_maskz_dpbf16_ps() {
|
||||
fn test_mm512_maskz_dpbf16_ps() {
|
||||
#[rustfmt::skip]
|
||||
let a_array = [
|
||||
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
|
||||
|
|
@ -1786,18 +1787,18 @@ mod tests {
|
|||
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
|
||||
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
|
||||
];
|
||||
let a1: __m512 = transmute(a_array);
|
||||
let b1: __m512 = transmute(b_array);
|
||||
let a1 = f32x16::from_array(a_array).as_m512();
|
||||
let b1 = f32x16::from_array(b_array).as_m512();
|
||||
let k: __mmask16 = 0x3333;
|
||||
#[rustfmt::skip]
|
||||
let src: __m512 = transmute([
|
||||
let src = f32x16::from_array([
|
||||
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
|
||||
2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
|
||||
]);
|
||||
]).as_m512();
|
||||
let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
|
||||
let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
|
||||
let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
|
||||
let result: [f32; 16] = transmute(c.as_f32x16());
|
||||
let result = *c.as_f32x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 16] = [
|
||||
-18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32,
|
||||
|
|
@ -1806,7 +1807,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask16 = 0xffff;
|
||||
let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
|
||||
let result: [f32; 16] = transmute(c.as_f32x16());
|
||||
let result = *c.as_f32x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 16] = [
|
||||
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
|
||||
|
|
@ -1815,7 +1816,7 @@ mod tests {
|
|||
assert_eq!(result, expected_result);
|
||||
let k: __mmask16 = 0;
|
||||
let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
|
||||
let result: [f32; 16] = transmute(c.as_f32x16());
|
||||
let result = *c.as_f32x16().as_array();
|
||||
#[rustfmt::skip]
|
||||
let expected_result: [f32; 16] = [
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
|
||||
|
|
@ -1943,28 +1944,28 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm_cvtneps_pbh() {
|
||||
fn test_mm_cvtneps_pbh() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let r: u16x4 = transmute_copy(&_mm_cvtneps_pbh(a));
|
||||
let r: u16x4 = unsafe { transmute_copy(&_mm_cvtneps_pbh(a)) };
|
||||
let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm_mask_cvtneps_pbh() {
|
||||
fn test_mm_mask_cvtneps_pbh() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let src = __m128bh([5, 6, 7, 8, !0, !0, !0, !0]);
|
||||
let k = 0b1010;
|
||||
let r: u16x4 = transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a));
|
||||
let r: u16x4 = unsafe { transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a)) };
|
||||
let e = u16x4::new(5, BF16_TWO, 7, BF16_FOUR);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bf16,avx512vl")]
|
||||
unsafe fn test_mm_maskz_cvtneps_pbh() {
|
||||
fn test_mm_maskz_cvtneps_pbh() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let k = 0b1010;
|
||||
let r: u16x4 = transmute_copy(&_mm_maskz_cvtneps_pbh(k, a));
|
||||
let r: u16x4 = unsafe { transmute_copy(&_mm_maskz_cvtneps_pbh(k, a)) };
|
||||
let e = u16x4::new(0, BF16_TWO, 0, BF16_FOUR);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17098,37 +17098,37 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_loadu_epi16() {
|
||||
const fn test_mm512_loadu_epi16() {
|
||||
#[rustfmt::skip]
|
||||
let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let r = _mm512_loadu_epi16(&a[0]);
|
||||
let r = unsafe { _mm512_loadu_epi16(&a[0]) };
|
||||
#[rustfmt::skip]
|
||||
let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_loadu_epi16() {
|
||||
const fn test_mm256_loadu_epi16() {
|
||||
let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let r = _mm256_loadu_epi16(&a[0]);
|
||||
let r = unsafe { _mm256_loadu_epi16(&a[0]) };
|
||||
let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_loadu_epi16() {
|
||||
const fn test_mm_loadu_epi16() {
|
||||
let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let r = _mm_loadu_epi16(&a[0]);
|
||||
let r = unsafe { _mm_loadu_epi16(&a[0]) };
|
||||
let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_loadu_epi8() {
|
||||
const fn test_mm512_loadu_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let r = _mm512_loadu_epi8(&a[0]);
|
||||
let r = unsafe { _mm512_loadu_epi8(&a[0]) };
|
||||
#[rustfmt::skip]
|
||||
let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
|
||||
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
|
||||
|
|
@ -17136,73 +17136,85 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_loadu_epi8() {
|
||||
const fn test_mm256_loadu_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let r = _mm256_loadu_epi8(&a[0]);
|
||||
let r = unsafe { _mm256_loadu_epi8(&a[0]) };
|
||||
#[rustfmt::skip]
|
||||
let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_loadu_epi8() {
|
||||
const fn test_mm_loadu_epi8() {
|
||||
let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let r = _mm_loadu_epi8(&a[0]);
|
||||
let r = unsafe { _mm_loadu_epi8(&a[0]) };
|
||||
let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_storeu_epi16() {
|
||||
const fn test_mm512_storeu_epi16() {
|
||||
let a = _mm512_set1_epi16(9);
|
||||
let mut r = _mm512_undefined_epi32();
|
||||
_mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
|
||||
unsafe {
|
||||
_mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
|
||||
}
|
||||
assert_eq_m512i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_storeu_epi16() {
|
||||
const fn test_mm256_storeu_epi16() {
|
||||
let a = _mm256_set1_epi16(9);
|
||||
let mut r = _mm256_set1_epi32(0);
|
||||
_mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
|
||||
unsafe {
|
||||
_mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
|
||||
}
|
||||
assert_eq_m256i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_storeu_epi16() {
|
||||
const fn test_mm_storeu_epi16() {
|
||||
let a = _mm_set1_epi16(9);
|
||||
let mut r = _mm_set1_epi32(0);
|
||||
_mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
|
||||
unsafe {
|
||||
_mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
|
||||
}
|
||||
assert_eq_m128i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_storeu_epi8() {
|
||||
const fn test_mm512_storeu_epi8() {
|
||||
let a = _mm512_set1_epi8(9);
|
||||
let mut r = _mm512_undefined_epi32();
|
||||
_mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
|
||||
unsafe {
|
||||
_mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
|
||||
}
|
||||
assert_eq_m512i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_storeu_epi8() {
|
||||
const fn test_mm256_storeu_epi8() {
|
||||
let a = _mm256_set1_epi8(9);
|
||||
let mut r = _mm256_set1_epi32(0);
|
||||
_mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
|
||||
unsafe {
|
||||
_mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
|
||||
}
|
||||
assert_eq_m256i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_storeu_epi8() {
|
||||
const fn test_mm_storeu_epi8() {
|
||||
let a = _mm_set1_epi8(9);
|
||||
let mut r = _mm_set1_epi32(0);
|
||||
_mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
|
||||
unsafe {
|
||||
_mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
|
||||
}
|
||||
assert_eq_m128i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_mask_loadu_epi16() {
|
||||
const fn test_mm512_mask_loadu_epi16() {
|
||||
let src = _mm512_set1_epi16(42);
|
||||
let a = &[
|
||||
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
|
|
@ -17210,52 +17222,54 @@ mod tests {
|
|||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b10101010_11001100_11101000_11001010;
|
||||
let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
|
||||
let r = unsafe { _mm512_mask_loadu_epi16(src, m, black_box(p)) };
|
||||
let e = &[
|
||||
42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
|
||||
23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
|
||||
];
|
||||
let e = _mm512_loadu_epi16(e.as_ptr());
|
||||
let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_maskz_loadu_epi16() {
|
||||
const fn test_mm512_maskz_loadu_epi16() {
|
||||
let a = &[
|
||||
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32,
|
||||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b10101010_11001100_11101000_11001010;
|
||||
let r = _mm512_maskz_loadu_epi16(m, black_box(p));
|
||||
let r = unsafe { _mm512_maskz_loadu_epi16(m, black_box(p)) };
|
||||
let e = &[
|
||||
0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
|
||||
26, 0, 28, 0, 30, 0, 32,
|
||||
];
|
||||
let e = _mm512_loadu_epi16(e.as_ptr());
|
||||
let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_mask_storeu_epi16() {
|
||||
const fn test_mm512_mask_storeu_epi16() {
|
||||
let mut r = [42_i16; 32];
|
||||
let a = &[
|
||||
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32,
|
||||
];
|
||||
let a = _mm512_loadu_epi16(a.as_ptr());
|
||||
let a = unsafe { _mm512_loadu_epi16(a.as_ptr()) };
|
||||
let m = 0b10101010_11001100_11101000_11001010;
|
||||
_mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
|
||||
unsafe {
|
||||
_mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
|
||||
}
|
||||
let e = &[
|
||||
42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
|
||||
23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
|
||||
];
|
||||
let e = _mm512_loadu_epi16(e.as_ptr());
|
||||
assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
|
||||
let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
|
||||
assert_eq_m512i(unsafe { _mm512_loadu_epi16(r.as_ptr()) }, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_mask_loadu_epi8() {
|
||||
const fn test_mm512_mask_loadu_epi8() {
|
||||
let src = _mm512_set1_epi8(42);
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
|
|
@ -17264,18 +17278,18 @@ mod tests {
|
|||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
|
||||
let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
|
||||
let r = unsafe { _mm512_mask_loadu_epi8(src, m, black_box(p)) };
|
||||
let e = &[
|
||||
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
|
||||
23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
|
||||
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
|
||||
];
|
||||
let e = _mm512_loadu_epi8(e.as_ptr());
|
||||
let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_maskz_loadu_epi8() {
|
||||
const fn test_mm512_maskz_loadu_epi8() {
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
|
||||
|
|
@ -17283,77 +17297,81 @@ mod tests {
|
|||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
|
||||
let r = _mm512_maskz_loadu_epi8(m, black_box(p));
|
||||
let r = unsafe { _mm512_maskz_loadu_epi8(m, black_box(p)) };
|
||||
let e = &[
|
||||
0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
|
||||
26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
|
||||
50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
];
|
||||
let e = _mm512_loadu_epi8(e.as_ptr());
|
||||
let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_mm512_mask_storeu_epi8() {
|
||||
const fn test_mm512_mask_storeu_epi8() {
|
||||
let mut r = [42_i8; 64];
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
|
||||
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
|
||||
];
|
||||
let a = _mm512_loadu_epi8(a.as_ptr());
|
||||
let a = unsafe { _mm512_loadu_epi8(a.as_ptr()) };
|
||||
let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
|
||||
_mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
|
||||
unsafe {
|
||||
_mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
|
||||
}
|
||||
let e = &[
|
||||
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
|
||||
23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
|
||||
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
|
||||
];
|
||||
let e = _mm512_loadu_epi8(e.as_ptr());
|
||||
assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
|
||||
let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
|
||||
assert_eq_m512i(unsafe { _mm512_loadu_epi8(r.as_ptr()) }, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_mask_loadu_epi16() {
|
||||
const fn test_mm256_mask_loadu_epi16() {
|
||||
let src = _mm256_set1_epi16(42);
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
|
||||
let r = unsafe { _mm256_mask_loadu_epi16(src, m, black_box(p)) };
|
||||
let e = &[
|
||||
42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
|
||||
];
|
||||
let e = _mm256_loadu_epi16(e.as_ptr());
|
||||
let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_maskz_loadu_epi16() {
|
||||
const fn test_mm256_maskz_loadu_epi16() {
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm256_maskz_loadu_epi16(m, black_box(p));
|
||||
let r = unsafe { _mm256_maskz_loadu_epi16(m, black_box(p)) };
|
||||
let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
|
||||
let e = _mm256_loadu_epi16(e.as_ptr());
|
||||
let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_mask_storeu_epi16() {
|
||||
const fn test_mm256_mask_storeu_epi16() {
|
||||
let mut r = [42_i16; 16];
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let a = _mm256_loadu_epi16(a.as_ptr());
|
||||
let a = unsafe { _mm256_loadu_epi16(a.as_ptr()) };
|
||||
let m = 0b11101000_11001010;
|
||||
_mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
|
||||
unsafe {
|
||||
_mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
|
||||
}
|
||||
let e = &[
|
||||
42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
|
||||
];
|
||||
let e = _mm256_loadu_epi16(e.as_ptr());
|
||||
assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
|
||||
let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
|
||||
assert_eq_m256i(unsafe { _mm256_loadu_epi16(r.as_ptr()) }, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_mask_loadu_epi8() {
|
||||
const fn test_mm256_mask_loadu_epi8() {
|
||||
let src = _mm256_set1_epi8(42);
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
|
|
@ -17361,122 +17379,124 @@ mod tests {
|
|||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b10101010_11001100_11101000_11001010;
|
||||
let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
|
||||
let r = unsafe { _mm256_mask_loadu_epi8(src, m, black_box(p)) };
|
||||
let e = &[
|
||||
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
|
||||
23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
|
||||
];
|
||||
let e = _mm256_loadu_epi8(e.as_ptr());
|
||||
let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_maskz_loadu_epi8() {
|
||||
const fn test_mm256_maskz_loadu_epi8() {
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32,
|
||||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b10101010_11001100_11101000_11001010;
|
||||
let r = _mm256_maskz_loadu_epi8(m, black_box(p));
|
||||
let r = unsafe { _mm256_maskz_loadu_epi8(m, black_box(p)) };
|
||||
let e = &[
|
||||
0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
|
||||
26, 0, 28, 0, 30, 0, 32,
|
||||
];
|
||||
let e = _mm256_loadu_epi8(e.as_ptr());
|
||||
let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm256_mask_storeu_epi8() {
|
||||
const fn test_mm256_mask_storeu_epi8() {
|
||||
let mut r = [42_i8; 32];
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32,
|
||||
];
|
||||
let a = _mm256_loadu_epi8(a.as_ptr());
|
||||
let a = unsafe { _mm256_loadu_epi8(a.as_ptr()) };
|
||||
let m = 0b10101010_11001100_11101000_11001010;
|
||||
_mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
|
||||
unsafe {
|
||||
_mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
|
||||
}
|
||||
let e = &[
|
||||
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
|
||||
23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
|
||||
];
|
||||
let e = _mm256_loadu_epi8(e.as_ptr());
|
||||
assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
|
||||
let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
|
||||
assert_eq_m256i(unsafe { _mm256_loadu_epi8(r.as_ptr()) }, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_mask_loadu_epi16() {
|
||||
const fn test_mm_mask_loadu_epi16() {
|
||||
let src = _mm_set1_epi16(42);
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11001010;
|
||||
let r = _mm_mask_loadu_epi16(src, m, black_box(p));
|
||||
let r = unsafe { _mm_mask_loadu_epi16(src, m, black_box(p)) };
|
||||
let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
|
||||
let e = _mm_loadu_epi16(e.as_ptr());
|
||||
let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_maskz_loadu_epi16() {
|
||||
const fn test_mm_maskz_loadu_epi16() {
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11001010;
|
||||
let r = _mm_maskz_loadu_epi16(m, black_box(p));
|
||||
let r = unsafe { _mm_maskz_loadu_epi16(m, black_box(p)) };
|
||||
let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
|
||||
let e = _mm_loadu_epi16(e.as_ptr());
|
||||
let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_mask_storeu_epi16() {
|
||||
const fn test_mm_mask_storeu_epi16() {
|
||||
let mut r = [42_i16; 8];
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
|
||||
let a = _mm_loadu_epi16(a.as_ptr());
|
||||
let a = unsafe { _mm_loadu_epi16(a.as_ptr()) };
|
||||
let m = 0b11001010;
|
||||
_mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
|
||||
unsafe { _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a) };
|
||||
let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
|
||||
let e = _mm_loadu_epi16(e.as_ptr());
|
||||
assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
|
||||
let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
|
||||
assert_eq_m128i(unsafe { _mm_loadu_epi16(r.as_ptr()) }, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_mask_loadu_epi8() {
|
||||
const fn test_mm_mask_loadu_epi8() {
|
||||
let src = _mm_set1_epi8(42);
|
||||
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm_mask_loadu_epi8(src, m, black_box(p));
|
||||
let r = unsafe { _mm_mask_loadu_epi8(src, m, black_box(p)) };
|
||||
let e = &[
|
||||
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
|
||||
];
|
||||
let e = _mm_loadu_epi8(e.as_ptr());
|
||||
let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_maskz_loadu_epi8() {
|
||||
const fn test_mm_maskz_loadu_epi8() {
|
||||
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm_maskz_loadu_epi8(m, black_box(p));
|
||||
let r = unsafe { _mm_maskz_loadu_epi8(m, black_box(p)) };
|
||||
let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
|
||||
let e = _mm_loadu_epi8(e.as_ptr());
|
||||
let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
const unsafe fn test_mm_mask_storeu_epi8() {
|
||||
const fn test_mm_mask_storeu_epi8() {
|
||||
let mut r = [42_i8; 16];
|
||||
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let a = _mm_loadu_epi8(a.as_ptr());
|
||||
let a = unsafe { _mm_loadu_epi8(a.as_ptr()) };
|
||||
let m = 0b11101000_11001010;
|
||||
_mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
|
||||
unsafe { _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a) };
|
||||
let e = &[
|
||||
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
|
||||
];
|
||||
let e = _mm_loadu_epi8(e.as_ptr());
|
||||
assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
|
||||
let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
|
||||
assert_eq_m128i(unsafe { _mm_loadu_epi8(r.as_ptr()) }, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
|
|
@ -20714,36 +20734,40 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_store_mask64() {
|
||||
const fn test_store_mask64() {
|
||||
let a: __mmask64 =
|
||||
0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
|
||||
let mut r = 0;
|
||||
_store_mask64(&mut r, a);
|
||||
unsafe {
|
||||
_store_mask64(&mut r, a);
|
||||
}
|
||||
assert_eq!(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_store_mask32() {
|
||||
const fn test_store_mask32() {
|
||||
let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
|
||||
let mut r = 0;
|
||||
_store_mask32(&mut r, a);
|
||||
unsafe {
|
||||
_store_mask32(&mut r, a);
|
||||
}
|
||||
assert_eq!(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_load_mask64() {
|
||||
const fn test_load_mask64() {
|
||||
let p: __mmask64 =
|
||||
0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
|
||||
let r = _load_mask64(&p);
|
||||
let r = unsafe { _load_mask64(&p) };
|
||||
let e: __mmask64 =
|
||||
0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_load_mask32() {
|
||||
const fn test_load_mask32() {
|
||||
let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
|
||||
let r = _load_mask32(&p);
|
||||
let r = unsafe { _load_mask32(&p) };
|
||||
let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
|
@ -21163,21 +21187,21 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_kortest_mask32_u8() {
|
||||
const fn test_kortest_mask32_u8() {
|
||||
let a: __mmask32 = 0b0110100101101001_0110100101101001;
|
||||
let b: __mmask32 = 0b1011011010110110_1011011010110110;
|
||||
let mut all_ones: u8 = 0;
|
||||
let r = _kortest_mask32_u8(a, b, &mut all_ones);
|
||||
let r = unsafe { _kortest_mask32_u8(a, b, &mut all_ones) };
|
||||
assert_eq!(r, 0);
|
||||
assert_eq!(all_ones, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_kortest_mask64_u8() {
|
||||
const fn test_kortest_mask64_u8() {
|
||||
let a: __mmask64 = 0b0110100101101001_0110100101101001;
|
||||
let b: __mmask64 = 0b1011011010110110_1011011010110110;
|
||||
let mut all_ones: u8 = 0;
|
||||
let r = _kortest_mask64_u8(a, b, &mut all_ones);
|
||||
let r = unsafe { _kortest_mask64_u8(a, b, &mut all_ones) };
|
||||
assert_eq!(r, 0);
|
||||
assert_eq!(all_ones, 0);
|
||||
}
|
||||
|
|
@ -21299,11 +21323,11 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_ktest_mask32_u8() {
|
||||
const fn test_ktest_mask32_u8() {
|
||||
let a: __mmask32 = 0b0110100100111100_0110100100111100;
|
||||
let b: __mmask32 = 0b1001011011000011_1001011011000011;
|
||||
let mut and_not: u8 = 0;
|
||||
let r = _ktest_mask32_u8(a, b, &mut and_not);
|
||||
let r = unsafe { _ktest_mask32_u8(a, b, &mut and_not) };
|
||||
assert_eq!(r, 1);
|
||||
assert_eq!(and_not, 0);
|
||||
}
|
||||
|
|
@ -21325,11 +21349,11 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
const unsafe fn test_ktest_mask64_u8() {
|
||||
const fn test_ktest_mask64_u8() {
|
||||
let a: __mmask64 = 0b0110100100111100_0110100100111100;
|
||||
let b: __mmask64 = 0b1001011011000011_1001011011000011;
|
||||
let mut and_not: u8 = 0;
|
||||
let r = _ktest_mask64_u8(a, b, &mut and_not);
|
||||
let r = unsafe { _ktest_mask64_u8(a, b, &mut and_not) };
|
||||
assert_eq!(r, 1);
|
||||
assert_eq!(and_not, 0);
|
||||
}
|
||||
|
|
@ -21951,32 +21975,38 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
|
||||
fn test_mm512_mask_cvtsepi16_storeu_epi8() {
|
||||
let a = _mm512_set1_epi16(i16::MAX);
|
||||
let mut r = _mm256_undefined_si256();
|
||||
_mm512_mask_cvtsepi16_storeu_epi8(
|
||||
&mut r as *mut _ as *mut i8,
|
||||
0b11111111_11111111_11111111_11111111,
|
||||
a,
|
||||
);
|
||||
unsafe {
|
||||
_mm512_mask_cvtsepi16_storeu_epi8(
|
||||
&mut r as *mut _ as *mut i8,
|
||||
0b11111111_11111111_11111111_11111111,
|
||||
a,
|
||||
);
|
||||
}
|
||||
let e = _mm256_set1_epi8(i8::MAX);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
|
||||
fn test_mm256_mask_cvtsepi16_storeu_epi8() {
|
||||
let a = _mm256_set1_epi16(i16::MAX);
|
||||
let mut r = _mm_undefined_si128();
|
||||
_mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
|
||||
unsafe {
|
||||
_mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
|
||||
}
|
||||
let e = _mm_set1_epi8(i8::MAX);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
|
||||
fn test_mm_mask_cvtsepi16_storeu_epi8() {
|
||||
let a = _mm_set1_epi16(i16::MAX);
|
||||
let mut r = _mm_set1_epi8(0);
|
||||
_mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
|
||||
unsafe {
|
||||
_mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
|
||||
}
|
||||
#[rustfmt::skip]
|
||||
let e = _mm_set_epi8(
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
|
@ -21986,63 +22016,75 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
|
||||
fn test_mm512_mask_cvtepi16_storeu_epi8() {
|
||||
let a = _mm512_set1_epi16(8);
|
||||
let mut r = _mm256_undefined_si256();
|
||||
_mm512_mask_cvtepi16_storeu_epi8(
|
||||
&mut r as *mut _ as *mut i8,
|
||||
0b11111111_11111111_11111111_11111111,
|
||||
a,
|
||||
);
|
||||
unsafe {
|
||||
_mm512_mask_cvtepi16_storeu_epi8(
|
||||
&mut r as *mut _ as *mut i8,
|
||||
0b11111111_11111111_11111111_11111111,
|
||||
a,
|
||||
);
|
||||
}
|
||||
let e = _mm256_set1_epi8(8);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
|
||||
fn test_mm256_mask_cvtepi16_storeu_epi8() {
|
||||
let a = _mm256_set1_epi16(8);
|
||||
let mut r = _mm_undefined_si128();
|
||||
_mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
|
||||
unsafe {
|
||||
_mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
|
||||
}
|
||||
let e = _mm_set1_epi8(8);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
|
||||
fn test_mm_mask_cvtepi16_storeu_epi8() {
|
||||
let a = _mm_set1_epi16(8);
|
||||
let mut r = _mm_set1_epi8(0);
|
||||
_mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
|
||||
unsafe {
|
||||
_mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
|
||||
}
|
||||
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
|
||||
fn test_mm512_mask_cvtusepi16_storeu_epi8() {
|
||||
let a = _mm512_set1_epi16(i16::MAX);
|
||||
let mut r = _mm256_undefined_si256();
|
||||
_mm512_mask_cvtusepi16_storeu_epi8(
|
||||
&mut r as *mut _ as *mut i8,
|
||||
0b11111111_11111111_11111111_11111111,
|
||||
a,
|
||||
);
|
||||
unsafe {
|
||||
_mm512_mask_cvtusepi16_storeu_epi8(
|
||||
&mut r as *mut _ as *mut i8,
|
||||
0b11111111_11111111_11111111_11111111,
|
||||
a,
|
||||
);
|
||||
}
|
||||
let e = _mm256_set1_epi8(u8::MAX as i8);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
|
||||
fn test_mm256_mask_cvtusepi16_storeu_epi8() {
|
||||
let a = _mm256_set1_epi16(i16::MAX);
|
||||
let mut r = _mm_undefined_si128();
|
||||
_mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
|
||||
unsafe {
|
||||
_mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
|
||||
}
|
||||
let e = _mm_set1_epi8(u8::MAX as i8);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
|
||||
fn test_mm_mask_cvtusepi16_storeu_epi8() {
|
||||
let a = _mm_set1_epi16(i16::MAX);
|
||||
let mut r = _mm_set1_epi8(0);
|
||||
_mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
|
||||
unsafe {
|
||||
_mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
|
||||
}
|
||||
#[rustfmt::skip]
|
||||
let e = _mm_set_epi8(
|
||||
0, 0, 0, 0,
|
||||
|
|
|
|||
|
|
@ -7401,27 +7401,25 @@ unsafe extern "C" {
|
|||
mod tests {
|
||||
use super::*;
|
||||
use crate::core_arch::assert_eq_const as assert_eq;
|
||||
use crate::core_arch::x86::*;
|
||||
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::x86::*;
|
||||
use crate::mem::transmute;
|
||||
const OPRND1_64: f64 = f64::from_bits(0x3333333333333333);
|
||||
const OPRND2_64: f64 = f64::from_bits(0x5555555555555555);
|
||||
|
||||
const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) };
|
||||
const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) };
|
||||
const AND_64: f64 = f64::from_bits(0x1111111111111111);
|
||||
const ANDN_64: f64 = f64::from_bits(0x4444444444444444);
|
||||
const OR_64: f64 = f64::from_bits(0x7777777777777777);
|
||||
const XOR_64: f64 = f64::from_bits(0x6666666666666666);
|
||||
|
||||
const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) };
|
||||
const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) };
|
||||
const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) };
|
||||
const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) };
|
||||
const OPRND1_32: f32 = f32::from_bits(0x33333333);
|
||||
const OPRND2_32: f32 = f32::from_bits(0x55555555);
|
||||
|
||||
const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) };
|
||||
const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) };
|
||||
|
||||
const AND_32: f32 = unsafe { transmute(0x11111111_u32) };
|
||||
const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) };
|
||||
const OR_32: f32 = unsafe { transmute(0x77777777_u32) };
|
||||
const XOR_32: f32 = unsafe { transmute(0x66666666_u32) };
|
||||
const AND_32: f32 = f32::from_bits(0x11111111);
|
||||
const ANDN_32: f32 = f32::from_bits(0x44444444);
|
||||
const OR_32: f32 = f32::from_bits(0x77777777);
|
||||
const XOR_32: f32 = f32::from_bits(0x66666666);
|
||||
|
||||
#[simd_test(enable = "avx512dq,avx512vl")]
|
||||
const fn test_mm_mask_and_pd() {
|
||||
|
|
@ -10023,11 +10021,11 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
const unsafe fn test_kortest_mask8_u8() {
|
||||
const fn test_kortest_mask8_u8() {
|
||||
let a: __mmask8 = 0b01101001;
|
||||
let b: __mmask8 = 0b10110110;
|
||||
let mut all_ones: u8 = 0;
|
||||
let r = _kortest_mask8_u8(a, b, &mut all_ones);
|
||||
let r = unsafe { _kortest_mask8_u8(a, b, &mut all_ones) };
|
||||
assert_eq!(r, 0);
|
||||
assert_eq!(all_ones, 1);
|
||||
}
|
||||
|
|
@ -10049,7 +10047,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
const unsafe fn test_kshiftli_mask8() {
|
||||
const fn test_kshiftli_mask8() {
|
||||
let a: __mmask8 = 0b01101001;
|
||||
let r = _kshiftli_mask8::<3>(a);
|
||||
let e: __mmask8 = 0b01001000;
|
||||
|
|
@ -10089,11 +10087,11 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
const unsafe fn test_ktest_mask8_u8() {
|
||||
const fn test_ktest_mask8_u8() {
|
||||
let a: __mmask8 = 0b01101001;
|
||||
let b: __mmask8 = 0b10010110;
|
||||
let mut and_not: u8 = 0;
|
||||
let r = _ktest_mask8_u8(a, b, &mut and_not);
|
||||
let r = unsafe { _ktest_mask8_u8(a, b, &mut and_not) };
|
||||
assert_eq!(r, 1);
|
||||
assert_eq!(and_not, 0);
|
||||
}
|
||||
|
|
@ -10115,11 +10113,11 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
const unsafe fn test_ktest_mask16_u8() {
|
||||
const fn test_ktest_mask16_u8() {
|
||||
let a: __mmask16 = 0b0110100100111100;
|
||||
let b: __mmask16 = 0b1001011011000011;
|
||||
let mut and_not: u8 = 0;
|
||||
let r = _ktest_mask16_u8(a, b, &mut and_not);
|
||||
let r = unsafe { _ktest_mask16_u8(a, b, &mut and_not) };
|
||||
assert_eq!(r, 1);
|
||||
assert_eq!(and_not, 0);
|
||||
}
|
||||
|
|
@ -10141,18 +10139,20 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
const unsafe fn test_load_mask8() {
|
||||
const fn test_load_mask8() {
|
||||
let a: __mmask8 = 0b01101001;
|
||||
let r = _load_mask8(&a);
|
||||
let r = unsafe { _load_mask8(&a) };
|
||||
let e: __mmask8 = 0b01101001;
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
const unsafe fn test_store_mask8() {
|
||||
const fn test_store_mask8() {
|
||||
let a: __mmask8 = 0b01101001;
|
||||
let mut r = 0;
|
||||
_store_mask8(&mut r, a);
|
||||
unsafe {
|
||||
_store_mask8(&mut r, a);
|
||||
}
|
||||
let e: __mmask8 = 0b01101001;
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -16932,7 +16932,6 @@ unsafe extern "C" {
|
|||
mod tests {
|
||||
use crate::core_arch::assert_eq_const as assert_eq;
|
||||
use crate::core_arch::x86::*;
|
||||
use crate::mem::transmute;
|
||||
use crate::ptr::{addr_of, addr_of_mut};
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
|
|
@ -17569,72 +17568,72 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm_load_ph() {
|
||||
const fn test_mm_load_ph() {
|
||||
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
|
||||
let b = _mm_load_ph(addr_of!(a).cast());
|
||||
let b = unsafe { _mm_load_ph(addr_of!(a).cast()) };
|
||||
assert_eq_m128h(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm256_load_ph() {
|
||||
const fn test_mm256_load_ph() {
|
||||
let a = _mm256_set_ph(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
);
|
||||
let b = _mm256_load_ph(addr_of!(a).cast());
|
||||
let b = unsafe { _mm256_load_ph(addr_of!(a).cast()) };
|
||||
assert_eq_m256h(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
const unsafe fn test_mm512_load_ph() {
|
||||
const fn test_mm512_load_ph() {
|
||||
let a = _mm512_set_ph(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
|
||||
31.0, 32.0,
|
||||
);
|
||||
let b = _mm512_load_ph(addr_of!(a).cast());
|
||||
let b = unsafe { _mm512_load_ph(addr_of!(a).cast()) };
|
||||
assert_eq_m512h(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm_load_sh() {
|
||||
const fn test_mm_load_sh() {
|
||||
let a = _mm_set_sh(1.0);
|
||||
let b = _mm_load_sh(addr_of!(a).cast());
|
||||
let b = unsafe { _mm_load_sh(addr_of!(a).cast()) };
|
||||
assert_eq_m128h(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm_mask_load_sh() {
|
||||
fn test_mm_mask_load_sh() {
|
||||
let a = _mm_set_sh(1.0);
|
||||
let src = _mm_set_sh(2.);
|
||||
let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
|
||||
let b = unsafe { _mm_mask_load_sh(src, 1, addr_of!(a).cast()) };
|
||||
assert_eq_m128h(a, b);
|
||||
let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
|
||||
let b = unsafe { _mm_mask_load_sh(src, 0, addr_of!(a).cast()) };
|
||||
assert_eq_m128h(src, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm_maskz_load_sh() {
|
||||
fn test_mm_maskz_load_sh() {
|
||||
let a = _mm_set_sh(1.0);
|
||||
let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
|
||||
let b = unsafe { _mm_maskz_load_sh(1, addr_of!(a).cast()) };
|
||||
assert_eq_m128h(a, b);
|
||||
let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
|
||||
let b = unsafe { _mm_maskz_load_sh(0, addr_of!(a).cast()) };
|
||||
assert_eq_m128h(_mm_setzero_ph(), b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm_loadu_ph() {
|
||||
const fn test_mm_loadu_ph() {
|
||||
let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
|
||||
let r = _mm_loadu_ph(array.as_ptr());
|
||||
let r = unsafe { _mm_loadu_ph(array.as_ptr()) };
|
||||
let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
|
||||
assert_eq_m128h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm256_loadu_ph() {
|
||||
const fn test_mm256_loadu_ph() {
|
||||
let array = [
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
];
|
||||
let r = _mm256_loadu_ph(array.as_ptr());
|
||||
let r = unsafe { _mm256_loadu_ph(array.as_ptr()) };
|
||||
let e = _mm256_setr_ph(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
);
|
||||
|
|
@ -17642,13 +17641,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
const unsafe fn test_mm512_loadu_ph() {
|
||||
const fn test_mm512_loadu_ph() {
|
||||
let array = [
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
|
||||
31.0, 32.0,
|
||||
];
|
||||
let r = _mm512_loadu_ph(array.as_ptr());
|
||||
let r = unsafe { _mm512_loadu_ph(array.as_ptr()) };
|
||||
let e = _mm512_setr_ph(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
|
||||
|
|
@ -17686,81 +17685,99 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm_store_ph() {
|
||||
const fn test_mm_store_ph() {
|
||||
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
|
||||
let mut b = _mm_setzero_ph();
|
||||
_mm_store_ph(addr_of_mut!(b).cast(), a);
|
||||
unsafe {
|
||||
_mm_store_ph(addr_of_mut!(b).cast(), a);
|
||||
}
|
||||
assert_eq_m128h(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm256_store_ph() {
|
||||
const fn test_mm256_store_ph() {
|
||||
let a = _mm256_set_ph(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
);
|
||||
let mut b = _mm256_setzero_ph();
|
||||
_mm256_store_ph(addr_of_mut!(b).cast(), a);
|
||||
unsafe {
|
||||
_mm256_store_ph(addr_of_mut!(b).cast(), a);
|
||||
}
|
||||
assert_eq_m256h(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
const unsafe fn test_mm512_store_ph() {
|
||||
const fn test_mm512_store_ph() {
|
||||
let a = _mm512_set_ph(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
|
||||
31.0, 32.0,
|
||||
);
|
||||
let mut b = _mm512_setzero_ph();
|
||||
_mm512_store_ph(addr_of_mut!(b).cast(), a);
|
||||
unsafe {
|
||||
_mm512_store_ph(addr_of_mut!(b).cast(), a);
|
||||
}
|
||||
assert_eq_m512h(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm_store_sh() {
|
||||
const fn test_mm_store_sh() {
|
||||
let a = _mm_set_sh(1.0);
|
||||
let mut b = _mm_setzero_ph();
|
||||
_mm_store_sh(addr_of_mut!(b).cast(), a);
|
||||
unsafe {
|
||||
_mm_store_sh(addr_of_mut!(b).cast(), a);
|
||||
}
|
||||
assert_eq_m128h(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm_mask_store_sh() {
|
||||
fn test_mm_mask_store_sh() {
|
||||
let a = _mm_set_sh(1.0);
|
||||
let mut b = _mm_setzero_ph();
|
||||
_mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
|
||||
unsafe {
|
||||
_mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
|
||||
}
|
||||
assert_eq_m128h(_mm_setzero_ph(), b);
|
||||
_mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
|
||||
unsafe {
|
||||
_mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
|
||||
}
|
||||
assert_eq_m128h(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm_storeu_ph() {
|
||||
const fn test_mm_storeu_ph() {
|
||||
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
|
||||
let mut array = [0.0; 8];
|
||||
_mm_storeu_ph(array.as_mut_ptr(), a);
|
||||
assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
|
||||
unsafe {
|
||||
_mm_storeu_ph(array.as_mut_ptr(), a);
|
||||
}
|
||||
assert_eq_m128h(a, unsafe { _mm_loadu_ph(array.as_ptr()) });
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const unsafe fn test_mm256_storeu_ph() {
|
||||
const fn test_mm256_storeu_ph() {
|
||||
let a = _mm256_set_ph(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
);
|
||||
let mut array = [0.0; 16];
|
||||
_mm256_storeu_ph(array.as_mut_ptr(), a);
|
||||
assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
|
||||
unsafe {
|
||||
_mm256_storeu_ph(array.as_mut_ptr(), a);
|
||||
}
|
||||
assert_eq_m256h(a, unsafe { _mm256_loadu_ph(array.as_ptr()) });
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
const unsafe fn test_mm512_storeu_ph() {
|
||||
const fn test_mm512_storeu_ph() {
|
||||
let a = _mm512_set_ph(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
|
||||
31.0, 32.0,
|
||||
);
|
||||
let mut array = [0.0; 32];
|
||||
_mm512_storeu_ph(array.as_mut_ptr(), a);
|
||||
assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
|
||||
unsafe {
|
||||
_mm512_storeu_ph(array.as_mut_ptr(), a);
|
||||
}
|
||||
assert_eq_m512h(a, unsafe { _mm512_loadu_ph(array.as_ptr()) });
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
|
|
@ -23993,16 +24010,16 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
const fn test_mm256_reduce_mul_ph() {
|
||||
let a = _mm256_set1_ph(2.0);
|
||||
let a = _mm256_set1_ph(1.2);
|
||||
let r = _mm256_reduce_mul_ph(a);
|
||||
assert_eq!(r, 65536.0);
|
||||
assert_eq!(r, 18.5);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
const fn test_mm512_reduce_mul_ph() {
|
||||
let a = _mm512_set1_ph(2.0);
|
||||
let a = _mm512_set1_ph(1.2);
|
||||
let r = _mm512_reduce_mul_ph(a);
|
||||
assert_eq!(r, 16777216.0);
|
||||
assert_eq!(r, 342.3);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
|
|
|
|||
|
|
@ -3932,7 +3932,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2")]
|
||||
unsafe fn test_mm512_mask_expandloadu_epi16() {
|
||||
fn test_mm512_mask_expandloadu_epi16() {
|
||||
let src = _mm512_set1_epi16(42);
|
||||
let a = &[
|
||||
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
|
|
@ -3940,7 +3940,7 @@ mod tests {
|
|||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010_11110000_00001111;
|
||||
let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
|
||||
let r = unsafe { _mm512_mask_expandloadu_epi16(src, m, black_box(p)) };
|
||||
let e = _mm512_set_epi16(
|
||||
16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
|
||||
42, 42, 42, 42, 42, 4, 3, 2, 1,
|
||||
|
|
@ -3949,14 +3949,14 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2")]
|
||||
unsafe fn test_mm512_maskz_expandloadu_epi16() {
|
||||
fn test_mm512_maskz_expandloadu_epi16() {
|
||||
let a = &[
|
||||
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32,
|
||||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010_11110000_00001111;
|
||||
let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
|
||||
let r = unsafe { _mm512_maskz_expandloadu_epi16(m, black_box(p)) };
|
||||
let e = _mm512_set_epi16(
|
||||
16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 4, 3, 2, 1,
|
||||
|
|
@ -3965,49 +3965,49 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm256_mask_expandloadu_epi16() {
|
||||
fn test_mm256_mask_expandloadu_epi16() {
|
||||
let src = _mm256_set1_epi16(42);
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
|
||||
let r = unsafe { _mm256_mask_expandloadu_epi16(src, m, black_box(p)) };
|
||||
let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_expandloadu_epi16() {
|
||||
fn test_mm256_maskz_expandloadu_epi16() {
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
|
||||
let r = unsafe { _mm256_maskz_expandloadu_epi16(m, black_box(p)) };
|
||||
let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm_mask_expandloadu_epi16() {
|
||||
fn test_mm_mask_expandloadu_epi16() {
|
||||
let src = _mm_set1_epi16(42);
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
|
||||
let r = unsafe { _mm_mask_expandloadu_epi16(src, m, black_box(p)) };
|
||||
let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm_maskz_expandloadu_epi16() {
|
||||
fn test_mm_maskz_expandloadu_epi16() {
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
|
||||
let r = unsafe { _mm_maskz_expandloadu_epi16(m, black_box(p)) };
|
||||
let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2")]
|
||||
unsafe fn test_mm512_mask_expandloadu_epi8() {
|
||||
fn test_mm512_mask_expandloadu_epi8() {
|
||||
let src = _mm512_set1_epi8(42);
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
|
|
@ -4016,7 +4016,7 @@ mod tests {
|
|||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
|
||||
let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
|
||||
let r = unsafe { _mm512_mask_expandloadu_epi8(src, m, black_box(p)) };
|
||||
let e = _mm512_set_epi8(
|
||||
32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42,
|
||||
42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42,
|
||||
|
|
@ -4026,7 +4026,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2")]
|
||||
unsafe fn test_mm512_maskz_expandloadu_epi8() {
|
||||
fn test_mm512_maskz_expandloadu_epi8() {
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
|
||||
|
|
@ -4034,7 +4034,7 @@ mod tests {
|
|||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
|
||||
let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
|
||||
let r = unsafe { _mm512_maskz_expandloadu_epi8(m, black_box(p)) };
|
||||
let e = _mm512_set_epi8(
|
||||
32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0,
|
||||
|
|
@ -4044,7 +4044,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm256_mask_expandloadu_epi8() {
|
||||
fn test_mm256_mask_expandloadu_epi8() {
|
||||
let src = _mm256_set1_epi8(42);
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
|
|
@ -4052,7 +4052,7 @@ mod tests {
|
|||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010_11110000_00001111;
|
||||
let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
|
||||
let r = unsafe { _mm256_mask_expandloadu_epi8(src, m, black_box(p)) };
|
||||
let e = _mm256_set_epi8(
|
||||
16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
|
||||
42, 42, 42, 42, 42, 4, 3, 2, 1,
|
||||
|
|
@ -4061,14 +4061,14 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_expandloadu_epi8() {
|
||||
fn test_mm256_maskz_expandloadu_epi8() {
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32,
|
||||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010_11110000_00001111;
|
||||
let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
|
||||
let r = unsafe { _mm256_maskz_expandloadu_epi8(m, black_box(p)) };
|
||||
let e = _mm256_set_epi8(
|
||||
16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 4, 3, 2, 1,
|
||||
|
|
@ -4077,36 +4077,44 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm_mask_expandloadu_epi8() {
|
||||
fn test_mm_mask_expandloadu_epi8() {
|
||||
let src = _mm_set1_epi8(42);
|
||||
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
|
||||
let r = unsafe { _mm_mask_expandloadu_epi8(src, m, black_box(p)) };
|
||||
let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm_maskz_expandloadu_epi8() {
|
||||
fn test_mm_maskz_expandloadu_epi8() {
|
||||
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
|
||||
let r = unsafe { _mm_maskz_expandloadu_epi8(m, black_box(p)) };
|
||||
let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2")]
|
||||
unsafe fn test_mm512_mask_compressstoreu_epi16() {
|
||||
fn test_mm512_mask_compressstoreu_epi16() {
|
||||
let a = _mm512_set_epi16(
|
||||
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
|
||||
10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
|
||||
);
|
||||
let mut r = [0_i16; 32];
|
||||
_mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
|
||||
unsafe {
|
||||
_mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
|
||||
}
|
||||
assert_eq!(&r, &[0_i16; 32]);
|
||||
_mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
|
||||
unsafe {
|
||||
_mm512_mask_compressstoreu_epi16(
|
||||
r.as_mut_ptr(),
|
||||
0b11110000_11001010_11111111_00000000,
|
||||
a,
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
&r,
|
||||
&[
|
||||
|
|
@ -4117,40 +4125,52 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm256_mask_compressstoreu_epi16() {
|
||||
fn test_mm256_mask_compressstoreu_epi16() {
|
||||
let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
|
||||
let mut r = [0_i16; 16];
|
||||
_mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
|
||||
unsafe {
|
||||
_mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
|
||||
}
|
||||
assert_eq!(&r, &[0_i16; 16]);
|
||||
_mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a);
|
||||
unsafe {
|
||||
_mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a);
|
||||
}
|
||||
assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm_mask_compressstoreu_epi16() {
|
||||
fn test_mm_mask_compressstoreu_epi16() {
|
||||
let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
|
||||
let mut r = [0_i16; 8];
|
||||
_mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
|
||||
unsafe {
|
||||
_mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
|
||||
}
|
||||
assert_eq!(&r, &[0_i16; 8]);
|
||||
_mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a);
|
||||
unsafe {
|
||||
_mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a);
|
||||
}
|
||||
assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2")]
|
||||
unsafe fn test_mm512_mask_compressstoreu_epi8() {
|
||||
fn test_mm512_mask_compressstoreu_epi8() {
|
||||
let a = _mm512_set_epi8(
|
||||
64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43,
|
||||
42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21,
|
||||
20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
|
||||
);
|
||||
let mut r = [0_i8; 64];
|
||||
_mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
|
||||
unsafe {
|
||||
_mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
|
||||
}
|
||||
assert_eq!(&r, &[0_i8; 64]);
|
||||
_mm512_mask_compressstoreu_epi8(
|
||||
r.as_mut_ptr(),
|
||||
0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
|
||||
a,
|
||||
);
|
||||
unsafe {
|
||||
_mm512_mask_compressstoreu_epi8(
|
||||
r.as_mut_ptr(),
|
||||
0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
|
||||
a,
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
&r,
|
||||
&[
|
||||
|
|
@ -4162,15 +4182,23 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm256_mask_compressstoreu_epi8() {
|
||||
fn test_mm256_mask_compressstoreu_epi8() {
|
||||
let a = _mm256_set_epi8(
|
||||
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
|
||||
10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
|
||||
);
|
||||
let mut r = [0_i8; 32];
|
||||
_mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
|
||||
unsafe {
|
||||
_mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
|
||||
}
|
||||
assert_eq!(&r, &[0_i8; 32]);
|
||||
_mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
|
||||
unsafe {
|
||||
_mm256_mask_compressstoreu_epi8(
|
||||
r.as_mut_ptr(),
|
||||
0b11110000_11001010_11111111_00000000,
|
||||
a,
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
&r,
|
||||
&[
|
||||
|
|
@ -4181,12 +4209,16 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "avx512vbmi2,avx512vl")]
|
||||
unsafe fn test_mm_mask_compressstoreu_epi8() {
|
||||
fn test_mm_mask_compressstoreu_epi8() {
|
||||
let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
|
||||
let mut r = [0_i8; 16];
|
||||
_mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
|
||||
unsafe {
|
||||
_mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
|
||||
}
|
||||
assert_eq!(&r, &[0_i8; 16]);
|
||||
_mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a);
|
||||
unsafe {
|
||||
_mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a);
|
||||
}
|
||||
assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -242,127 +242,127 @@ mod tests {
|
|||
const BF16_EIGHT: u16 = 0b0_10000010_0000000;
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm_bcstnebf16_ps() {
|
||||
fn test_mm_bcstnebf16_ps() {
|
||||
let a = bf16::from_bits(BF16_ONE);
|
||||
let r = _mm_bcstnebf16_ps(addr_of!(a));
|
||||
let r = unsafe { _mm_bcstnebf16_ps(addr_of!(a)) };
|
||||
let e = _mm_set_ps(1., 1., 1., 1.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm256_bcstnebf16_ps() {
|
||||
fn test_mm256_bcstnebf16_ps() {
|
||||
let a = bf16::from_bits(BF16_ONE);
|
||||
let r = _mm256_bcstnebf16_ps(addr_of!(a));
|
||||
let r = unsafe { _mm256_bcstnebf16_ps(addr_of!(a)) };
|
||||
let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm_bcstnesh_ps() {
|
||||
fn test_mm_bcstnesh_ps() {
|
||||
let a = 1.0_f16;
|
||||
let r = _mm_bcstnesh_ps(addr_of!(a));
|
||||
let r = unsafe { _mm_bcstnesh_ps(addr_of!(a)) };
|
||||
let e = _mm_set_ps(1., 1., 1., 1.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm256_bcstnesh_ps() {
|
||||
fn test_mm256_bcstnesh_ps() {
|
||||
let a = 1.0_f16;
|
||||
let r = _mm256_bcstnesh_ps(addr_of!(a));
|
||||
let r = unsafe { _mm256_bcstnesh_ps(addr_of!(a)) };
|
||||
let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm_cvtneebf16_ps() {
|
||||
fn test_mm_cvtneebf16_ps() {
|
||||
let a = __m128bh([
|
||||
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
|
||||
]);
|
||||
let r = _mm_cvtneebf16_ps(addr_of!(a));
|
||||
let r = unsafe { _mm_cvtneebf16_ps(addr_of!(a)) };
|
||||
let e = _mm_setr_ps(1., 3., 5., 7.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm256_cvtneebf16_ps() {
|
||||
fn test_mm256_cvtneebf16_ps() {
|
||||
let a = __m256bh([
|
||||
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
|
||||
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
|
||||
]);
|
||||
let r = _mm256_cvtneebf16_ps(addr_of!(a));
|
||||
let r = unsafe { _mm256_cvtneebf16_ps(addr_of!(a)) };
|
||||
let e = _mm256_setr_ps(1., 3., 5., 7., 1., 3., 5., 7.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm_cvtneeph_ps() {
|
||||
fn test_mm_cvtneeph_ps() {
|
||||
let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
|
||||
let r = _mm_cvtneeph_ps(addr_of!(a));
|
||||
let r = unsafe { _mm_cvtneeph_ps(addr_of!(a)) };
|
||||
let e = _mm_setr_ps(1., 3., 5., 7.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm256_cvtneeph_ps() {
|
||||
fn test_mm256_cvtneeph_ps() {
|
||||
let a = __m256h([
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
]);
|
||||
let r = _mm256_cvtneeph_ps(addr_of!(a));
|
||||
let r = unsafe { _mm256_cvtneeph_ps(addr_of!(a)) };
|
||||
let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm_cvtneobf16_ps() {
|
||||
fn test_mm_cvtneobf16_ps() {
|
||||
let a = __m128bh([
|
||||
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
|
||||
]);
|
||||
let r = _mm_cvtneobf16_ps(addr_of!(a));
|
||||
let r = unsafe { _mm_cvtneobf16_ps(addr_of!(a)) };
|
||||
let e = _mm_setr_ps(2., 4., 6., 8.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm256_cvtneobf16_ps() {
|
||||
fn test_mm256_cvtneobf16_ps() {
|
||||
let a = __m256bh([
|
||||
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
|
||||
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
|
||||
]);
|
||||
let r = _mm256_cvtneobf16_ps(addr_of!(a));
|
||||
let r = unsafe { _mm256_cvtneobf16_ps(addr_of!(a)) };
|
||||
let e = _mm256_setr_ps(2., 4., 6., 8., 2., 4., 6., 8.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm_cvtneoph_ps() {
|
||||
fn test_mm_cvtneoph_ps() {
|
||||
let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
|
||||
let r = _mm_cvtneoph_ps(addr_of!(a));
|
||||
let r = unsafe { _mm_cvtneoph_ps(addr_of!(a)) };
|
||||
let e = _mm_setr_ps(2., 4., 6., 8.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm256_cvtneoph_ps() {
|
||||
fn test_mm256_cvtneoph_ps() {
|
||||
let a = __m256h([
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
]);
|
||||
let r = _mm256_cvtneoph_ps(addr_of!(a));
|
||||
let r = unsafe { _mm256_cvtneoph_ps(addr_of!(a)) };
|
||||
let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm_cvtneps_avx_pbh() {
|
||||
fn test_mm_cvtneps_avx_pbh() {
|
||||
let a = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r: u16x4 = transmute_copy(&_mm_cvtneps_avx_pbh(a));
|
||||
let r: u16x4 = unsafe { transmute_copy(&_mm_cvtneps_avx_pbh(a)) };
|
||||
let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avxneconvert")]
|
||||
unsafe fn test_mm256_cvtneps_avx_pbh() {
|
||||
fn test_mm256_cvtneps_avx_pbh() {
|
||||
let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
let r: u16x8 = transmute(_mm256_cvtneps_avx_pbh(a));
|
||||
let r: u16x8 = _mm256_cvtneps_avx_pbh(a).as_u16x8();
|
||||
let e = u16x8::new(
|
||||
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
|
||||
);
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ pub fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::assert_eq_const as assert_eq;
|
||||
use crate::{core_arch::x86::*, mem::transmute};
|
||||
use crate::core_arch::x86::*;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
const F16_ONE: i16 = 0x3c00;
|
||||
|
|
|
|||
|
|
@ -77,12 +77,14 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "fxsr")]
|
||||
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
|
||||
unsafe fn test_fxsave() {
|
||||
fn test_fxsave() {
|
||||
let mut a = FxsaveArea::new();
|
||||
let mut b = FxsaveArea::new();
|
||||
|
||||
fxsr::_fxsave(a.ptr());
|
||||
fxsr::_fxrstor(a.ptr());
|
||||
fxsr::_fxsave(b.ptr());
|
||||
unsafe {
|
||||
fxsr::_fxsave(a.ptr());
|
||||
fxsr::_fxrstor(a.ptr());
|
||||
fxsr::_fxsave(b.ptr());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -898,25 +898,25 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512f")]
|
||||
unsafe fn test_mm512_gf2p8mul_epi8() {
|
||||
fn test_mm512_gf2p8mul_epi8() {
|
||||
let (left, right, expected) = generate_byte_mul_test_data();
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_512 {
|
||||
let left = load_m512i_word(&left, i);
|
||||
let right = load_m512i_word(&right, i);
|
||||
let expected = load_m512i_word(&expected, i);
|
||||
let left = unsafe { load_m512i_word(&left, i) };
|
||||
let right = unsafe { load_m512i_word(&right, i) };
|
||||
let expected = unsafe { load_m512i_word(&expected, i) };
|
||||
let result = _mm512_gf2p8mul_epi8(left, right);
|
||||
assert_eq_m512i(result, expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw")]
|
||||
unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
|
||||
fn test_mm512_maskz_gf2p8mul_epi8() {
|
||||
let (left, right, _expected) = generate_byte_mul_test_data();
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_512 {
|
||||
let left = load_m512i_word(&left, i);
|
||||
let right = load_m512i_word(&right, i);
|
||||
let left = unsafe { load_m512i_word(&left, i) };
|
||||
let right = unsafe { load_m512i_word(&right, i) };
|
||||
let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right);
|
||||
assert_eq_m512i(result_zero, _mm512_setzero_si512());
|
||||
let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
|
||||
|
|
@ -930,12 +930,12 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw")]
|
||||
unsafe fn test_mm512_mask_gf2p8mul_epi8() {
|
||||
fn test_mm512_mask_gf2p8mul_epi8() {
|
||||
let (left, right, _expected) = generate_byte_mul_test_data();
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_512 {
|
||||
let left = load_m512i_word(&left, i);
|
||||
let right = load_m512i_word(&right, i);
|
||||
let left = unsafe { load_m512i_word(&left, i) };
|
||||
let right = unsafe { load_m512i_word(&right, i) };
|
||||
let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right);
|
||||
assert_eq_m512i(result_left, left);
|
||||
let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
|
||||
|
|
@ -948,25 +948,25 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx")]
|
||||
unsafe fn test_mm256_gf2p8mul_epi8() {
|
||||
fn test_mm256_gf2p8mul_epi8() {
|
||||
let (left, right, expected) = generate_byte_mul_test_data();
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_256 {
|
||||
let left = load_m256i_word(&left, i);
|
||||
let right = load_m256i_word(&right, i);
|
||||
let expected = load_m256i_word(&expected, i);
|
||||
let left = unsafe { load_m256i_word(&left, i) };
|
||||
let right = unsafe { load_m256i_word(&right, i) };
|
||||
let expected = unsafe { load_m256i_word(&expected, i) };
|
||||
let result = _mm256_gf2p8mul_epi8(left, right);
|
||||
assert_eq_m256i(result, expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
|
||||
fn test_mm256_maskz_gf2p8mul_epi8() {
|
||||
let (left, right, _expected) = generate_byte_mul_test_data();
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_256 {
|
||||
let left = load_m256i_word(&left, i);
|
||||
let right = load_m256i_word(&right, i);
|
||||
let left = unsafe { load_m256i_word(&left, i) };
|
||||
let right = unsafe { load_m256i_word(&right, i) };
|
||||
let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right);
|
||||
assert_eq_m256i(result_zero, _mm256_setzero_si256());
|
||||
let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
|
||||
|
|
@ -980,12 +980,12 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_gf2p8mul_epi8() {
|
||||
fn test_mm256_mask_gf2p8mul_epi8() {
|
||||
let (left, right, _expected) = generate_byte_mul_test_data();
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_256 {
|
||||
let left = load_m256i_word(&left, i);
|
||||
let right = load_m256i_word(&right, i);
|
||||
let left = unsafe { load_m256i_word(&left, i) };
|
||||
let right = unsafe { load_m256i_word(&right, i) };
|
||||
let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right);
|
||||
assert_eq_m256i(result_left, left);
|
||||
let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
|
||||
|
|
@ -998,25 +998,25 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni")]
|
||||
unsafe fn test_mm_gf2p8mul_epi8() {
|
||||
fn test_mm_gf2p8mul_epi8() {
|
||||
let (left, right, expected) = generate_byte_mul_test_data();
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_128 {
|
||||
let left = load_m128i_word(&left, i);
|
||||
let right = load_m128i_word(&right, i);
|
||||
let expected = load_m128i_word(&expected, i);
|
||||
let left = unsafe { load_m128i_word(&left, i) };
|
||||
let right = unsafe { load_m128i_word(&right, i) };
|
||||
let expected = unsafe { load_m128i_word(&expected, i) };
|
||||
let result = _mm_gf2p8mul_epi8(left, right);
|
||||
assert_eq_m128i(result, expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_gf2p8mul_epi8() {
|
||||
fn test_mm_maskz_gf2p8mul_epi8() {
|
||||
let (left, right, _expected) = generate_byte_mul_test_data();
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_128 {
|
||||
let left = load_m128i_word(&left, i);
|
||||
let right = load_m128i_word(&right, i);
|
||||
let left = unsafe { load_m128i_word(&left, i) };
|
||||
let right = unsafe { load_m128i_word(&right, i) };
|
||||
let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right);
|
||||
assert_eq_m128i(result_zero, _mm_setzero_si128());
|
||||
let mask_bytes: __mmask16 = 0x0F_F0;
|
||||
|
|
@ -1030,12 +1030,12 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_gf2p8mul_epi8() {
|
||||
fn test_mm_mask_gf2p8mul_epi8() {
|
||||
let (left, right, _expected) = generate_byte_mul_test_data();
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_128 {
|
||||
let left = load_m128i_word(&left, i);
|
||||
let right = load_m128i_word(&right, i);
|
||||
let left = unsafe { load_m128i_word(&left, i) };
|
||||
let right = unsafe { load_m128i_word(&right, i) };
|
||||
let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right);
|
||||
assert_eq_m128i(result_left, left);
|
||||
let mask_bytes: __mmask16 = 0x0F_F0;
|
||||
|
|
@ -1048,7 +1048,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512f")]
|
||||
unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
|
||||
fn test_mm512_gf2p8affine_epi64_epi8() {
|
||||
let identity: i64 = 0x01_02_04_08_10_20_40_80;
|
||||
const IDENTITY_BYTE: i32 = 0;
|
||||
let constant: i64 = 0;
|
||||
|
|
@ -1061,20 +1061,20 @@ mod tests {
|
|||
let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_512 {
|
||||
let data = load_m512i_word(&bytes, i);
|
||||
let data = unsafe { load_m512i_word(&bytes, i) };
|
||||
let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
|
||||
assert_eq_m512i(result, data);
|
||||
let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
|
||||
assert_eq_m512i(result, constant_reference);
|
||||
let data = load_m512i_word(&more_bytes, i);
|
||||
let data = unsafe { load_m512i_word(&more_bytes, i) };
|
||||
let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
|
||||
assert_eq_m512i(result, data);
|
||||
let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
|
||||
assert_eq_m512i(result, constant_reference);
|
||||
|
||||
let matrix = load_m512i_word(&matrices, i);
|
||||
let vector = load_m512i_word(&vectors, i);
|
||||
let reference = load_m512i_word(&references, i);
|
||||
let matrix = unsafe { load_m512i_word(&matrices, i) };
|
||||
let vector = unsafe { load_m512i_word(&vectors, i) };
|
||||
let reference = unsafe { load_m512i_word(&references, i) };
|
||||
|
||||
let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
|
||||
assert_eq_m512i(result, reference);
|
||||
|
|
@ -1082,13 +1082,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw")]
|
||||
unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
|
||||
fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_512 {
|
||||
let matrix = load_m512i_word(&matrices, i);
|
||||
let vector = load_m512i_word(&vectors, i);
|
||||
let matrix = unsafe { load_m512i_word(&matrices, i) };
|
||||
let vector = unsafe { load_m512i_word(&vectors, i) };
|
||||
let result_zero =
|
||||
_mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
|
||||
assert_eq_m512i(result_zero, _mm512_setzero_si512());
|
||||
|
|
@ -1104,13 +1104,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw")]
|
||||
unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
|
||||
fn test_mm512_mask_gf2p8affine_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_512 {
|
||||
let left = load_m512i_word(&vectors, i);
|
||||
let right = load_m512i_word(&matrices, i);
|
||||
let left = unsafe { load_m512i_word(&vectors, i) };
|
||||
let right = unsafe { load_m512i_word(&matrices, i) };
|
||||
let result_left =
|
||||
_mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
|
||||
assert_eq_m512i(result_left, left);
|
||||
|
|
@ -1125,7 +1125,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx")]
|
||||
unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
|
||||
fn test_mm256_gf2p8affine_epi64_epi8() {
|
||||
let identity: i64 = 0x01_02_04_08_10_20_40_80;
|
||||
const IDENTITY_BYTE: i32 = 0;
|
||||
let constant: i64 = 0;
|
||||
|
|
@ -1138,20 +1138,20 @@ mod tests {
|
|||
let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_256 {
|
||||
let data = load_m256i_word(&bytes, i);
|
||||
let data = unsafe { load_m256i_word(&bytes, i) };
|
||||
let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
|
||||
assert_eq_m256i(result, data);
|
||||
let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
|
||||
assert_eq_m256i(result, constant_reference);
|
||||
let data = load_m256i_word(&more_bytes, i);
|
||||
let data = unsafe { load_m256i_word(&more_bytes, i) };
|
||||
let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
|
||||
assert_eq_m256i(result, data);
|
||||
let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
|
||||
assert_eq_m256i(result, constant_reference);
|
||||
|
||||
let matrix = load_m256i_word(&matrices, i);
|
||||
let vector = load_m256i_word(&vectors, i);
|
||||
let reference = load_m256i_word(&references, i);
|
||||
let matrix = unsafe { load_m256i_word(&matrices, i) };
|
||||
let vector = unsafe { load_m256i_word(&vectors, i) };
|
||||
let reference = unsafe { load_m256i_word(&references, i) };
|
||||
|
||||
let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
|
||||
assert_eq_m256i(result, reference);
|
||||
|
|
@ -1159,13 +1159,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
|
||||
fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_256 {
|
||||
let matrix = load_m256i_word(&matrices, i);
|
||||
let vector = load_m256i_word(&vectors, i);
|
||||
let matrix = unsafe { load_m256i_word(&matrices, i) };
|
||||
let vector = unsafe { load_m256i_word(&vectors, i) };
|
||||
let result_zero =
|
||||
_mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
|
||||
assert_eq_m256i(result_zero, _mm256_setzero_si256());
|
||||
|
|
@ -1181,13 +1181,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
|
||||
fn test_mm256_mask_gf2p8affine_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_256 {
|
||||
let left = load_m256i_word(&vectors, i);
|
||||
let right = load_m256i_word(&matrices, i);
|
||||
let left = unsafe { load_m256i_word(&vectors, i) };
|
||||
let right = unsafe { load_m256i_word(&matrices, i) };
|
||||
let result_left =
|
||||
_mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
|
||||
assert_eq_m256i(result_left, left);
|
||||
|
|
@ -1202,7 +1202,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni")]
|
||||
unsafe fn test_mm_gf2p8affine_epi64_epi8() {
|
||||
fn test_mm_gf2p8affine_epi64_epi8() {
|
||||
let identity: i64 = 0x01_02_04_08_10_20_40_80;
|
||||
const IDENTITY_BYTE: i32 = 0;
|
||||
let constant: i64 = 0;
|
||||
|
|
@ -1215,20 +1215,20 @@ mod tests {
|
|||
let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_128 {
|
||||
let data = load_m128i_word(&bytes, i);
|
||||
let data = unsafe { load_m128i_word(&bytes, i) };
|
||||
let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
|
||||
assert_eq_m128i(result, data);
|
||||
let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
|
||||
assert_eq_m128i(result, constant_reference);
|
||||
let data = load_m128i_word(&more_bytes, i);
|
||||
let data = unsafe { load_m128i_word(&more_bytes, i) };
|
||||
let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
|
||||
assert_eq_m128i(result, data);
|
||||
let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
|
||||
assert_eq_m128i(result, constant_reference);
|
||||
|
||||
let matrix = load_m128i_word(&matrices, i);
|
||||
let vector = load_m128i_word(&vectors, i);
|
||||
let reference = load_m128i_word(&references, i);
|
||||
let matrix = unsafe { load_m128i_word(&matrices, i) };
|
||||
let vector = unsafe { load_m128i_word(&vectors, i) };
|
||||
let reference = unsafe { load_m128i_word(&references, i) };
|
||||
|
||||
let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
|
||||
assert_eq_m128i(result, reference);
|
||||
|
|
@ -1236,13 +1236,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
|
||||
fn test_mm_maskz_gf2p8affine_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_128 {
|
||||
let matrix = load_m128i_word(&matrices, i);
|
||||
let vector = load_m128i_word(&vectors, i);
|
||||
let matrix = unsafe { load_m128i_word(&matrices, i) };
|
||||
let vector = unsafe { load_m128i_word(&vectors, i) };
|
||||
let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
|
||||
assert_eq_m128i(result_zero, _mm_setzero_si128());
|
||||
let mask_bytes: __mmask16 = 0x0F_F0;
|
||||
|
|
@ -1257,13 +1257,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
|
||||
fn test_mm_mask_gf2p8affine_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_128 {
|
||||
let left = load_m128i_word(&vectors, i);
|
||||
let right = load_m128i_word(&matrices, i);
|
||||
let left = unsafe { load_m128i_word(&vectors, i) };
|
||||
let right = unsafe { load_m128i_word(&matrices, i) };
|
||||
let result_left =
|
||||
_mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
|
||||
assert_eq_m128i(result_left, left);
|
||||
|
|
@ -1278,7 +1278,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512f")]
|
||||
unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
|
||||
fn test_mm512_gf2p8affineinv_epi64_epi8() {
|
||||
let identity: i64 = 0x01_02_04_08_10_20_40_80;
|
||||
const IDENTITY_BYTE: i32 = 0;
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
|
|
@ -1288,8 +1288,8 @@ mod tests {
|
|||
let (inputs, results) = generate_inv_tests_data();
|
||||
|
||||
for i in 0..NUM_BYTES_WORDS_512 {
|
||||
let input = load_m512i_word(&inputs, i);
|
||||
let reference = load_m512i_word(&results, i);
|
||||
let input = unsafe { load_m512i_word(&inputs, i) };
|
||||
let reference = unsafe { load_m512i_word(&results, i) };
|
||||
let result = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
|
||||
let remultiplied = _mm512_gf2p8mul_epi8(result, input);
|
||||
assert_eq_m512i(remultiplied, reference);
|
||||
|
|
@ -1300,8 +1300,8 @@ mod tests {
|
|||
generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_512 {
|
||||
let vector = load_m512i_word(&vectors, i);
|
||||
let matrix = load_m512i_word(&matrices, i);
|
||||
let vector = unsafe { load_m512i_word(&vectors, i) };
|
||||
let matrix = unsafe { load_m512i_word(&matrices, i) };
|
||||
|
||||
let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
|
||||
let reference = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
|
||||
|
|
@ -1314,21 +1314,21 @@ mod tests {
|
|||
let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX);
|
||||
|
||||
for i in 0..NUM_BYTES_WORDS_512 {
|
||||
let reference = load_m512i_word(&AES_S_BOX, i);
|
||||
let input = load_m512i_word(&inputs, i);
|
||||
let reference = unsafe { load_m512i_word(&AES_S_BOX, i) };
|
||||
let input = unsafe { load_m512i_word(&inputs, i) };
|
||||
let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
|
||||
assert_eq_m512i(result, reference);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw")]
|
||||
unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
|
||||
fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_512 {
|
||||
let matrix = load_m512i_word(&matrices, i);
|
||||
let vector = load_m512i_word(&vectors, i);
|
||||
let matrix = unsafe { load_m512i_word(&matrices, i) };
|
||||
let vector = unsafe { load_m512i_word(&vectors, i) };
|
||||
let result_zero =
|
||||
_mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
|
||||
assert_eq_m512i(result_zero, _mm512_setzero_si512());
|
||||
|
|
@ -1344,13 +1344,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw")]
|
||||
unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
|
||||
fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_512 {
|
||||
let left = load_m512i_word(&vectors, i);
|
||||
let right = load_m512i_word(&matrices, i);
|
||||
let left = unsafe { load_m512i_word(&vectors, i) };
|
||||
let right = unsafe { load_m512i_word(&matrices, i) };
|
||||
let result_left =
|
||||
_mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
|
||||
assert_eq_m512i(result_left, left);
|
||||
|
|
@ -1366,7 +1366,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx")]
|
||||
unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
|
||||
fn test_mm256_gf2p8affineinv_epi64_epi8() {
|
||||
let identity: i64 = 0x01_02_04_08_10_20_40_80;
|
||||
const IDENTITY_BYTE: i32 = 0;
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
|
|
@ -1376,8 +1376,8 @@ mod tests {
|
|||
let (inputs, results) = generate_inv_tests_data();
|
||||
|
||||
for i in 0..NUM_BYTES_WORDS_256 {
|
||||
let input = load_m256i_word(&inputs, i);
|
||||
let reference = load_m256i_word(&results, i);
|
||||
let input = unsafe { load_m256i_word(&inputs, i) };
|
||||
let reference = unsafe { load_m256i_word(&results, i) };
|
||||
let result = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
|
||||
let remultiplied = _mm256_gf2p8mul_epi8(result, input);
|
||||
assert_eq_m256i(remultiplied, reference);
|
||||
|
|
@ -1388,8 +1388,8 @@ mod tests {
|
|||
generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_256 {
|
||||
let vector = load_m256i_word(&vectors, i);
|
||||
let matrix = load_m256i_word(&matrices, i);
|
||||
let vector = unsafe { load_m256i_word(&vectors, i) };
|
||||
let matrix = unsafe { load_m256i_word(&matrices, i) };
|
||||
|
||||
let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
|
||||
let reference = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
|
||||
|
|
@ -1402,21 +1402,21 @@ mod tests {
|
|||
let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX);
|
||||
|
||||
for i in 0..NUM_BYTES_WORDS_256 {
|
||||
let reference = load_m256i_word(&AES_S_BOX, i);
|
||||
let input = load_m256i_word(&inputs, i);
|
||||
let reference = unsafe { load_m256i_word(&AES_S_BOX, i) };
|
||||
let input = unsafe { load_m256i_word(&inputs, i) };
|
||||
let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
|
||||
assert_eq_m256i(result, reference);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
|
||||
fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_256 {
|
||||
let matrix = load_m256i_word(&matrices, i);
|
||||
let vector = load_m256i_word(&vectors, i);
|
||||
let matrix = unsafe { load_m256i_word(&matrices, i) };
|
||||
let vector = unsafe { load_m256i_word(&vectors, i) };
|
||||
let result_zero =
|
||||
_mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
|
||||
assert_eq_m256i(result_zero, _mm256_setzero_si256());
|
||||
|
|
@ -1432,13 +1432,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
|
||||
fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_256 {
|
||||
let left = load_m256i_word(&vectors, i);
|
||||
let right = load_m256i_word(&matrices, i);
|
||||
let left = unsafe { load_m256i_word(&vectors, i) };
|
||||
let right = unsafe { load_m256i_word(&matrices, i) };
|
||||
let result_left =
|
||||
_mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
|
||||
assert_eq_m256i(result_left, left);
|
||||
|
|
@ -1454,7 +1454,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni")]
|
||||
unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
|
||||
fn test_mm_gf2p8affineinv_epi64_epi8() {
|
||||
let identity: i64 = 0x01_02_04_08_10_20_40_80;
|
||||
const IDENTITY_BYTE: i32 = 0;
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
|
|
@ -1464,8 +1464,8 @@ mod tests {
|
|||
let (inputs, results) = generate_inv_tests_data();
|
||||
|
||||
for i in 0..NUM_BYTES_WORDS_128 {
|
||||
let input = load_m128i_word(&inputs, i);
|
||||
let reference = load_m128i_word(&results, i);
|
||||
let input = unsafe { load_m128i_word(&inputs, i) };
|
||||
let reference = unsafe { load_m128i_word(&results, i) };
|
||||
let result = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
|
||||
let remultiplied = _mm_gf2p8mul_epi8(result, input);
|
||||
assert_eq_m128i(remultiplied, reference);
|
||||
|
|
@ -1476,8 +1476,8 @@ mod tests {
|
|||
generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_128 {
|
||||
let vector = load_m128i_word(&vectors, i);
|
||||
let matrix = load_m128i_word(&matrices, i);
|
||||
let vector = unsafe { load_m128i_word(&vectors, i) };
|
||||
let matrix = unsafe { load_m128i_word(&matrices, i) };
|
||||
|
||||
let inv_vec = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
|
||||
let reference = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
|
||||
|
|
@ -1490,21 +1490,21 @@ mod tests {
|
|||
let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX);
|
||||
|
||||
for i in 0..NUM_BYTES_WORDS_128 {
|
||||
let reference = load_m128i_word(&AES_S_BOX, i);
|
||||
let input = load_m128i_word(&inputs, i);
|
||||
let reference = unsafe { load_m128i_word(&AES_S_BOX, i) };
|
||||
let input = unsafe { load_m128i_word(&inputs, i) };
|
||||
let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
|
||||
assert_eq_m128i(result, reference);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
|
||||
fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_128 {
|
||||
let matrix = load_m128i_word(&matrices, i);
|
||||
let vector = load_m128i_word(&vectors, i);
|
||||
let matrix = unsafe { load_m128i_word(&matrices, i) };
|
||||
let vector = unsafe { load_m128i_word(&vectors, i) };
|
||||
let result_zero =
|
||||
_mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
|
||||
assert_eq_m128i(result_zero, _mm_setzero_si128());
|
||||
|
|
@ -1520,13 +1520,13 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
|
||||
fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
|
||||
const CONSTANT_BYTE: i32 = 0x63;
|
||||
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
|
||||
|
||||
for i in 0..NUM_TEST_WORDS_128 {
|
||||
let left = load_m128i_word(&vectors, i);
|
||||
let right = load_m128i_word(&matrices, i);
|
||||
let left = unsafe { load_m128i_word(&vectors, i) };
|
||||
let right = unsafe { load_m128i_word(&matrices, i) };
|
||||
let result_left =
|
||||
_mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
|
||||
assert_eq_m128i(result_left, left);
|
||||
|
|
|
|||
|
|
@ -352,45 +352,47 @@ mod tests {
|
|||
use stdarch_test::simd_test;
|
||||
|
||||
#[target_feature(enable = "kl")]
|
||||
unsafe fn encodekey128() -> [u8; 48] {
|
||||
fn encodekey128() -> [u8; 48] {
|
||||
let mut handle = [0; 48];
|
||||
let _ = _mm_encodekey128_u32(0, _mm_setzero_si128(), handle.as_mut_ptr());
|
||||
let _ = unsafe { _mm_encodekey128_u32(0, _mm_setzero_si128(), handle.as_mut_ptr()) };
|
||||
handle
|
||||
}
|
||||
|
||||
#[target_feature(enable = "kl")]
|
||||
unsafe fn encodekey256() -> [u8; 64] {
|
||||
fn encodekey256() -> [u8; 64] {
|
||||
let mut handle = [0; 64];
|
||||
let _ = _mm_encodekey256_u32(
|
||||
0,
|
||||
_mm_setzero_si128(),
|
||||
_mm_setzero_si128(),
|
||||
handle.as_mut_ptr(),
|
||||
);
|
||||
let _ = unsafe {
|
||||
_mm_encodekey256_u32(
|
||||
0,
|
||||
_mm_setzero_si128(),
|
||||
_mm_setzero_si128(),
|
||||
handle.as_mut_ptr(),
|
||||
)
|
||||
};
|
||||
handle
|
||||
}
|
||||
|
||||
#[simd_test(enable = "kl")]
|
||||
unsafe fn test_mm_encodekey128_u32() {
|
||||
fn test_mm_encodekey128_u32() {
|
||||
encodekey128();
|
||||
}
|
||||
|
||||
#[simd_test(enable = "kl")]
|
||||
unsafe fn test_mm_encodekey256_u32() {
|
||||
fn test_mm_encodekey256_u32() {
|
||||
encodekey256();
|
||||
}
|
||||
|
||||
#[simd_test(enable = "kl")]
|
||||
unsafe fn test_mm_aesenc128kl_u8() {
|
||||
fn test_mm_aesenc128kl_u8() {
|
||||
let mut buffer = _mm_setzero_si128();
|
||||
let key = encodekey128();
|
||||
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr());
|
||||
let status = unsafe { _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr()) };
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr());
|
||||
let status = unsafe { _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr()) };
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
|
||||
|
|
@ -398,16 +400,16 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "kl")]
|
||||
unsafe fn test_mm_aesdec128kl_u8() {
|
||||
fn test_mm_aesdec128kl_u8() {
|
||||
let mut buffer = _mm_setzero_si128();
|
||||
let key = encodekey128();
|
||||
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr());
|
||||
let status = unsafe { _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr()) };
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr());
|
||||
let status = unsafe { _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr()) };
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
|
||||
|
|
@ -415,16 +417,16 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "kl")]
|
||||
unsafe fn test_mm_aesenc256kl_u8() {
|
||||
fn test_mm_aesenc256kl_u8() {
|
||||
let mut buffer = _mm_setzero_si128();
|
||||
let key = encodekey256();
|
||||
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr());
|
||||
let status = unsafe { _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr()) };
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr());
|
||||
let status = unsafe { _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr()) };
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
|
||||
|
|
@ -432,16 +434,16 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "kl")]
|
||||
unsafe fn test_mm_aesdec256kl_u8() {
|
||||
fn test_mm_aesdec256kl_u8() {
|
||||
let mut buffer = _mm_setzero_si128();
|
||||
let key = encodekey256();
|
||||
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr());
|
||||
let status = unsafe { _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr()) };
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr());
|
||||
let status = unsafe { _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr()) };
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
|
||||
|
|
@ -449,16 +451,20 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "widekl")]
|
||||
unsafe fn test_mm_aesencwide128kl_u8() {
|
||||
fn test_mm_aesencwide128kl_u8() {
|
||||
let mut buffer = [_mm_setzero_si128(); 8];
|
||||
let key = encodekey128();
|
||||
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
|
||||
let status = unsafe {
|
||||
_mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
|
||||
};
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
|
||||
let status = unsafe {
|
||||
_mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
|
||||
};
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
|
||||
|
|
@ -468,16 +474,20 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "widekl")]
|
||||
unsafe fn test_mm_aesdecwide128kl_u8() {
|
||||
fn test_mm_aesdecwide128kl_u8() {
|
||||
let mut buffer = [_mm_setzero_si128(); 8];
|
||||
let key = encodekey128();
|
||||
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
|
||||
let status = unsafe {
|
||||
_mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
|
||||
};
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
|
||||
let status = unsafe {
|
||||
_mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
|
||||
};
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
|
||||
|
|
@ -487,16 +497,20 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "widekl")]
|
||||
unsafe fn test_mm_aesencwide256kl_u8() {
|
||||
fn test_mm_aesencwide256kl_u8() {
|
||||
let mut buffer = [_mm_setzero_si128(); 8];
|
||||
let key = encodekey256();
|
||||
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
|
||||
let status = unsafe {
|
||||
_mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
|
||||
};
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
|
||||
let status = unsafe {
|
||||
_mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
|
||||
};
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
|
||||
|
|
@ -506,16 +520,20 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "widekl")]
|
||||
unsafe fn test_mm_aesdecwide256kl_u8() {
|
||||
fn test_mm_aesdecwide256kl_u8() {
|
||||
let mut buffer = [_mm_setzero_si128(); 8];
|
||||
let key = encodekey256();
|
||||
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
|
||||
let status = unsafe {
|
||||
_mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
|
||||
};
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
for _ in 0..100 {
|
||||
let status = _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
|
||||
let status = unsafe {
|
||||
_mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
|
||||
};
|
||||
assert_eq!(status, 0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -120,13 +120,15 @@ mod tests {
|
|||
use crate::core_arch::x86::*;
|
||||
|
||||
#[simd_test(enable = "rtm")]
|
||||
unsafe fn test_xbegin() {
|
||||
fn test_xbegin() {
|
||||
let mut x = 0;
|
||||
for _ in 0..10 {
|
||||
let code = _xbegin();
|
||||
let code = unsafe { _xbegin() };
|
||||
if code == _XBEGIN_STARTED {
|
||||
x += 1;
|
||||
_xend();
|
||||
unsafe {
|
||||
_xend();
|
||||
}
|
||||
assert_eq!(x, 1);
|
||||
break;
|
||||
}
|
||||
|
|
@ -135,19 +137,23 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "rtm")]
|
||||
unsafe fn test_xabort() {
|
||||
fn test_xabort() {
|
||||
const ABORT_CODE: u32 = 42;
|
||||
// aborting outside a transactional region does nothing
|
||||
_xabort::<ABORT_CODE>();
|
||||
unsafe {
|
||||
_xabort::<ABORT_CODE>();
|
||||
}
|
||||
|
||||
for _ in 0..10 {
|
||||
let mut x = 0;
|
||||
let code = rtm::_xbegin();
|
||||
let code = unsafe { _xbegin() };
|
||||
if code == _XBEGIN_STARTED {
|
||||
x += 1;
|
||||
rtm::_xabort::<ABORT_CODE>();
|
||||
unsafe {
|
||||
_xabort::<ABORT_CODE>();
|
||||
}
|
||||
} else if code & _XABORT_EXPLICIT != 0 {
|
||||
let test_abort_code = rtm::_xabort_code(code);
|
||||
let test_abort_code = _xabort_code(code);
|
||||
assert_eq!(test_abort_code, ABORT_CODE);
|
||||
}
|
||||
assert_eq!(x, 0);
|
||||
|
|
@ -155,14 +161,16 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "rtm")]
|
||||
unsafe fn test_xtest() {
|
||||
assert_eq!(_xtest(), 0);
|
||||
fn test_xtest() {
|
||||
assert_eq!(unsafe { _xtest() }, 0);
|
||||
|
||||
for _ in 0..10 {
|
||||
let code = rtm::_xbegin();
|
||||
let code = unsafe { _xbegin() };
|
||||
if code == _XBEGIN_STARTED {
|
||||
let in_tx = _xtest();
|
||||
rtm::_xend();
|
||||
let in_tx = unsafe { _xtest() };
|
||||
unsafe {
|
||||
_xend();
|
||||
}
|
||||
|
||||
// putting the assert inside the transaction would abort the transaction on fail
|
||||
// without any output/panic/etc
|
||||
|
|
|
|||
|
|
@ -3147,21 +3147,21 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_load_ss() {
|
||||
const fn test_mm_load_ss() {
|
||||
let a = 42.0f32;
|
||||
let r = _mm_load_ss(ptr::addr_of!(a));
|
||||
let r = unsafe { _mm_load_ss(ptr::addr_of!(a)) };
|
||||
assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_load1_ps() {
|
||||
const fn test_mm_load1_ps() {
|
||||
let a = 42.0f32;
|
||||
let r = _mm_load1_ps(ptr::addr_of!(a));
|
||||
let r = unsafe { _mm_load1_ps(ptr::addr_of!(a)) };
|
||||
assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_load_ps() {
|
||||
const fn test_mm_load_ps() {
|
||||
let vals = Memory {
|
||||
data: [1.0f32, 2.0, 3.0, 4.0],
|
||||
};
|
||||
|
|
@ -3169,21 +3169,21 @@ mod tests {
|
|||
// guaranteed to be aligned to 16 bytes
|
||||
let p = vals.data.as_ptr();
|
||||
|
||||
let r = _mm_load_ps(p);
|
||||
let r = unsafe { _mm_load_ps(p) };
|
||||
let e = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_loadu_ps() {
|
||||
const fn test_mm_loadu_ps() {
|
||||
let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
|
||||
let p = vals.as_ptr().add(3);
|
||||
let r = _mm_loadu_ps(black_box(p));
|
||||
let p = unsafe { vals.as_ptr().add(3) };
|
||||
let r = unsafe { _mm_loadu_ps(black_box(p)) };
|
||||
assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_loadr_ps() {
|
||||
const fn test_mm_loadr_ps() {
|
||||
let vals = Memory {
|
||||
data: [1.0f32, 2.0, 3.0, 4.0],
|
||||
};
|
||||
|
|
@ -3191,16 +3191,18 @@ mod tests {
|
|||
// guaranteed to be aligned to 16 bytes
|
||||
let p = vals.data.as_ptr();
|
||||
|
||||
let r = _mm_loadr_ps(p);
|
||||
let r = unsafe { _mm_loadr_ps(p) };
|
||||
let e = _mm_setr_ps(4.0, 3.0, 2.0, 1.0);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_store_ss() {
|
||||
const fn test_mm_store_ss() {
|
||||
let mut vals = [0.0f32; 8];
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
_mm_store_ss(vals.as_mut_ptr().add(1), a);
|
||||
unsafe {
|
||||
_mm_store_ss(vals.as_mut_ptr().add(1), a);
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0.0);
|
||||
assert_eq!(vals[1], 1.0);
|
||||
|
|
@ -3208,46 +3210,52 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_store1_ps() {
|
||||
const fn test_mm_store1_ps() {
|
||||
let mut vals = Memory { data: [0.0f32; 4] };
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
|
||||
// guaranteed to be aligned to 16 bytes
|
||||
let p = vals.data.as_mut_ptr();
|
||||
|
||||
_mm_store1_ps(p, *black_box(&a));
|
||||
unsafe {
|
||||
_mm_store1_ps(p, *black_box(&a));
|
||||
}
|
||||
|
||||
assert_eq!(vals.data, [1.0, 1.0, 1.0, 1.0]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_store_ps() {
|
||||
const fn test_mm_store_ps() {
|
||||
let mut vals = Memory { data: [0.0f32; 4] };
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
|
||||
// guaranteed to be aligned to 16 bytes
|
||||
let p = vals.data.as_mut_ptr();
|
||||
|
||||
_mm_store_ps(p, *black_box(&a));
|
||||
unsafe {
|
||||
_mm_store_ps(p, *black_box(&a));
|
||||
}
|
||||
|
||||
assert_eq!(vals.data, [1.0, 2.0, 3.0, 4.0]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_storer_ps() {
|
||||
const fn test_mm_storer_ps() {
|
||||
let mut vals = Memory { data: [0.0f32; 4] };
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
|
||||
// guaranteed to be aligned to 16 bytes
|
||||
let p = vals.data.as_mut_ptr();
|
||||
|
||||
_mm_storer_ps(p, *black_box(&a));
|
||||
unsafe {
|
||||
_mm_storer_ps(p, *black_box(&a));
|
||||
}
|
||||
|
||||
assert_eq!(vals.data, [4.0, 3.0, 2.0, 1.0]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_storeu_ps() {
|
||||
const fn test_mm_storeu_ps() {
|
||||
#[repr(align(16))]
|
||||
struct Memory8 {
|
||||
data: [f32; 8],
|
||||
|
|
@ -3258,9 +3266,11 @@ mod tests {
|
|||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
|
||||
// guaranteed to be *not* aligned to 16 bytes
|
||||
let p = vals.data.as_mut_ptr().offset(1);
|
||||
let p = unsafe { vals.data.as_mut_ptr().offset(1) };
|
||||
|
||||
_mm_storeu_ps(p, *black_box(&a));
|
||||
unsafe {
|
||||
_mm_storeu_ps(p, *black_box(&a));
|
||||
}
|
||||
|
||||
assert_eq!(vals.data, [0.0, 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0]);
|
||||
}
|
||||
|
|
@ -3315,11 +3325,13 @@ mod tests {
|
|||
// Miri cannot support this until it is clear how it fits in the Rust memory model
|
||||
// (non-temporal store)
|
||||
#[cfg_attr(miri, ignore)]
|
||||
unsafe fn test_mm_stream_ps() {
|
||||
fn test_mm_stream_ps() {
|
||||
let a = _mm_set1_ps(7.0);
|
||||
let mut mem = Memory { data: [-1.0; 4] };
|
||||
|
||||
_mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
|
||||
unsafe {
|
||||
_mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
|
||||
}
|
||||
_mm_sfence();
|
||||
for i in 0..4 {
|
||||
assert_eq!(mem.data[i], get_m128(a, i));
|
||||
|
|
|
|||
|
|
@ -3291,9 +3291,11 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
unsafe fn test_mm_clflush() {
|
||||
fn test_mm_clflush() {
|
||||
let x = 0_u8;
|
||||
_mm_clflush(ptr::addr_of!(x));
|
||||
unsafe {
|
||||
_mm_clflush(ptr::addr_of!(x));
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
|
|
@ -3725,7 +3727,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
unsafe fn test_mm_sll_epi16() {
|
||||
fn test_mm_sll_epi16() {
|
||||
let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
|
||||
let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
|
||||
assert_eq_m128i(
|
||||
|
|
@ -4071,7 +4073,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
unsafe fn test_mm_cvtps_epi32() {
|
||||
fn test_mm_cvtps_epi32() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_cvtps_epi32(a);
|
||||
assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
|
||||
|
|
@ -4178,23 +4180,23 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_loadl_epi64() {
|
||||
const fn test_mm_loadl_epi64() {
|
||||
let a = _mm_setr_epi64x(6, 5);
|
||||
let r = _mm_loadl_epi64(ptr::addr_of!(a));
|
||||
let r = unsafe { _mm_loadl_epi64(ptr::addr_of!(a)) };
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_load_si128() {
|
||||
const fn test_mm_load_si128() {
|
||||
let a = _mm_set_epi64x(5, 6);
|
||||
let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
|
||||
let r = unsafe { _mm_load_si128(ptr::addr_of!(a) as *const _) };
|
||||
assert_eq_m128i(a, r);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_loadu_si128() {
|
||||
const fn test_mm_loadu_si128() {
|
||||
let a = _mm_set_epi64x(5, 6);
|
||||
let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
|
||||
let r = unsafe { _mm_loadu_si128(ptr::addr_of!(a) as *const _) };
|
||||
assert_eq_m128i(a, r);
|
||||
}
|
||||
|
||||
|
|
@ -4202,7 +4204,7 @@ mod tests {
|
|||
// Miri cannot support this until it is clear how it fits in the Rust memory model
|
||||
// (non-temporal store)
|
||||
#[cfg_attr(miri, ignore)]
|
||||
unsafe fn test_mm_maskmoveu_si128() {
|
||||
fn test_mm_maskmoveu_si128() {
|
||||
let a = _mm_set1_epi8(9);
|
||||
#[rustfmt::skip]
|
||||
let mask = _mm_set_epi8(
|
||||
|
|
@ -4210,33 +4212,41 @@ mod tests {
|
|||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
);
|
||||
let mut r = _mm_set1_epi8(0);
|
||||
_mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
|
||||
unsafe {
|
||||
_mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
|
||||
}
|
||||
_mm_sfence();
|
||||
let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_store_si128() {
|
||||
const fn test_mm_store_si128() {
|
||||
let a = _mm_set1_epi8(9);
|
||||
let mut r = _mm_set1_epi8(0);
|
||||
_mm_store_si128(&mut r, a);
|
||||
unsafe {
|
||||
_mm_store_si128(&mut r, a);
|
||||
}
|
||||
assert_eq_m128i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_storeu_si128() {
|
||||
const fn test_mm_storeu_si128() {
|
||||
let a = _mm_set1_epi8(9);
|
||||
let mut r = _mm_set1_epi8(0);
|
||||
_mm_storeu_si128(&mut r, a);
|
||||
unsafe {
|
||||
_mm_storeu_si128(&mut r, a);
|
||||
}
|
||||
assert_eq_m128i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_storel_epi64() {
|
||||
const fn test_mm_storel_epi64() {
|
||||
let a = _mm_setr_epi64x(2, 9);
|
||||
let mut r = _mm_set1_epi8(0);
|
||||
_mm_storel_epi64(&mut r, a);
|
||||
unsafe {
|
||||
_mm_storel_epi64(&mut r, a);
|
||||
}
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
|
||||
}
|
||||
|
||||
|
|
@ -4244,10 +4254,12 @@ mod tests {
|
|||
// Miri cannot support this until it is clear how it fits in the Rust memory model
|
||||
// (non-temporal store)
|
||||
#[cfg_attr(miri, ignore)]
|
||||
unsafe fn test_mm_stream_si128() {
|
||||
fn test_mm_stream_si128() {
|
||||
let a = _mm_setr_epi32(1, 2, 3, 4);
|
||||
let mut r = _mm_undefined_si128();
|
||||
_mm_stream_si128(ptr::addr_of_mut!(r), a);
|
||||
unsafe {
|
||||
_mm_stream_si128(ptr::addr_of_mut!(r), a);
|
||||
}
|
||||
_mm_sfence();
|
||||
assert_eq_m128i(r, a);
|
||||
}
|
||||
|
|
@ -4256,10 +4268,12 @@ mod tests {
|
|||
// Miri cannot support this until it is clear how it fits in the Rust memory model
|
||||
// (non-temporal store)
|
||||
#[cfg_attr(miri, ignore)]
|
||||
unsafe fn test_mm_stream_si32() {
|
||||
fn test_mm_stream_si32() {
|
||||
let a: i32 = 7;
|
||||
let mut mem = boxed::Box::<i32>::new(-1);
|
||||
_mm_stream_si32(ptr::addr_of_mut!(*mem), a);
|
||||
unsafe {
|
||||
_mm_stream_si32(ptr::addr_of_mut!(*mem), a);
|
||||
}
|
||||
_mm_sfence();
|
||||
assert_eq!(a, *mem);
|
||||
}
|
||||
|
|
@ -4909,40 +4923,40 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_load_pd() {
|
||||
const fn test_mm_load_pd() {
|
||||
let mem = Memory {
|
||||
data: [1.0f64, 2.0, 3.0, 4.0],
|
||||
};
|
||||
let vals = &mem.data;
|
||||
let d = vals.as_ptr();
|
||||
|
||||
let r = _mm_load_pd(d);
|
||||
let r = unsafe { _mm_load_pd(d) };
|
||||
assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_load_sd() {
|
||||
const fn test_mm_load_sd() {
|
||||
let a = 1.;
|
||||
let expected = _mm_setr_pd(a, 0.);
|
||||
let r = _mm_load_sd(&a);
|
||||
let r = unsafe { _mm_load_sd(&a) };
|
||||
assert_eq_m128d(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_loadh_pd() {
|
||||
const fn test_mm_loadh_pd() {
|
||||
let a = _mm_setr_pd(1., 2.);
|
||||
let b = 3.;
|
||||
let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
|
||||
let r = _mm_loadh_pd(a, &b);
|
||||
let r = unsafe { _mm_loadh_pd(a, &b) };
|
||||
assert_eq_m128d(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_loadl_pd() {
|
||||
const fn test_mm_loadl_pd() {
|
||||
let a = _mm_setr_pd(1., 2.);
|
||||
let b = 3.;
|
||||
let expected = _mm_setr_pd(3., get_m128d(a, 1));
|
||||
let r = _mm_loadl_pd(a, &b);
|
||||
let r = unsafe { _mm_loadl_pd(a, &b) };
|
||||
assert_eq_m128d(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -4950,7 +4964,7 @@ mod tests {
|
|||
// Miri cannot support this until it is clear how it fits in the Rust memory model
|
||||
// (non-temporal store)
|
||||
#[cfg_attr(miri, ignore)]
|
||||
unsafe fn test_mm_stream_pd() {
|
||||
fn test_mm_stream_pd() {
|
||||
#[repr(align(128))]
|
||||
struct Memory {
|
||||
pub data: [f64; 2],
|
||||
|
|
@ -4958,7 +4972,9 @@ mod tests {
|
|||
let a = _mm_set1_pd(7.0);
|
||||
let mut mem = Memory { data: [-1.0; 2] };
|
||||
|
||||
_mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
|
||||
unsafe {
|
||||
_mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
|
||||
}
|
||||
_mm_sfence();
|
||||
for i in 0..2 {
|
||||
assert_eq!(mem.data[i], get_m128d(a, i));
|
||||
|
|
@ -4966,132 +4982,154 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_store_sd() {
|
||||
const fn test_mm_store_sd() {
|
||||
let mut dest = 0.;
|
||||
let a = _mm_setr_pd(1., 2.);
|
||||
_mm_store_sd(&mut dest, a);
|
||||
unsafe {
|
||||
_mm_store_sd(&mut dest, a);
|
||||
}
|
||||
assert_eq!(dest, _mm_cvtsd_f64(a));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_store_pd() {
|
||||
const fn test_mm_store_pd() {
|
||||
let mut mem = Memory { data: [0.0f64; 4] };
|
||||
let vals = &mut mem.data;
|
||||
let a = _mm_setr_pd(1.0, 2.0);
|
||||
let d = vals.as_mut_ptr();
|
||||
|
||||
_mm_store_pd(d, *black_box(&a));
|
||||
unsafe {
|
||||
_mm_store_pd(d, *black_box(&a));
|
||||
}
|
||||
assert_eq!(vals[0], 1.0);
|
||||
assert_eq!(vals[1], 2.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_storeu_pd() {
|
||||
const fn test_mm_storeu_pd() {
|
||||
// guaranteed to be aligned to 16 bytes
|
||||
let mut mem = Memory { data: [0.0f64; 4] };
|
||||
let vals = &mut mem.data;
|
||||
let a = _mm_setr_pd(1.0, 2.0);
|
||||
|
||||
// so p is *not* aligned to 16 bytes
|
||||
let p = vals.as_mut_ptr().offset(1);
|
||||
_mm_storeu_pd(p, *black_box(&a));
|
||||
unsafe {
|
||||
let p = vals.as_mut_ptr().offset(1);
|
||||
_mm_storeu_pd(p, *black_box(&a));
|
||||
}
|
||||
|
||||
assert_eq!(*vals, [0.0, 1.0, 2.0, 0.0]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_storeu_si16() {
|
||||
const fn test_mm_storeu_si16() {
|
||||
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
|
||||
_mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
|
||||
unsafe {
|
||||
_mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
|
||||
}
|
||||
let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_storeu_si32() {
|
||||
const fn test_mm_storeu_si32() {
|
||||
let a = _mm_setr_epi32(1, 2, 3, 4);
|
||||
let mut r = _mm_setr_epi32(5, 6, 7, 8);
|
||||
_mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
|
||||
unsafe {
|
||||
_mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
|
||||
}
|
||||
let e = _mm_setr_epi32(1, 6, 7, 8);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_storeu_si64() {
|
||||
const fn test_mm_storeu_si64() {
|
||||
let a = _mm_setr_epi64x(1, 2);
|
||||
let mut r = _mm_setr_epi64x(3, 4);
|
||||
_mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
|
||||
unsafe {
|
||||
_mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
|
||||
}
|
||||
let e = _mm_setr_epi64x(1, 4);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_store1_pd() {
|
||||
const fn test_mm_store1_pd() {
|
||||
let mut mem = Memory { data: [0.0f64; 4] };
|
||||
let vals = &mut mem.data;
|
||||
let a = _mm_setr_pd(1.0, 2.0);
|
||||
let d = vals.as_mut_ptr();
|
||||
|
||||
_mm_store1_pd(d, *black_box(&a));
|
||||
unsafe {
|
||||
_mm_store1_pd(d, *black_box(&a));
|
||||
}
|
||||
assert_eq!(vals[0], 1.0);
|
||||
assert_eq!(vals[1], 1.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_store_pd1() {
|
||||
const fn test_mm_store_pd1() {
|
||||
let mut mem = Memory { data: [0.0f64; 4] };
|
||||
let vals = &mut mem.data;
|
||||
let a = _mm_setr_pd(1.0, 2.0);
|
||||
let d = vals.as_mut_ptr();
|
||||
|
||||
_mm_store_pd1(d, *black_box(&a));
|
||||
unsafe {
|
||||
_mm_store_pd1(d, *black_box(&a));
|
||||
}
|
||||
assert_eq!(vals[0], 1.0);
|
||||
assert_eq!(vals[1], 1.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_storer_pd() {
|
||||
const fn test_mm_storer_pd() {
|
||||
let mut mem = Memory { data: [0.0f64; 4] };
|
||||
let vals = &mut mem.data;
|
||||
let a = _mm_setr_pd(1.0, 2.0);
|
||||
let d = vals.as_mut_ptr();
|
||||
|
||||
_mm_storer_pd(d, *black_box(&a));
|
||||
unsafe {
|
||||
_mm_storer_pd(d, *black_box(&a));
|
||||
}
|
||||
assert_eq!(vals[0], 2.0);
|
||||
assert_eq!(vals[1], 1.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_storeh_pd() {
|
||||
const fn test_mm_storeh_pd() {
|
||||
let mut dest = 0.;
|
||||
let a = _mm_setr_pd(1., 2.);
|
||||
_mm_storeh_pd(&mut dest, a);
|
||||
unsafe {
|
||||
_mm_storeh_pd(&mut dest, a);
|
||||
}
|
||||
assert_eq!(dest, get_m128d(a, 1));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_storel_pd() {
|
||||
const fn test_mm_storel_pd() {
|
||||
let mut dest = 0.;
|
||||
let a = _mm_setr_pd(1., 2.);
|
||||
_mm_storel_pd(&mut dest, a);
|
||||
unsafe {
|
||||
_mm_storel_pd(&mut dest, a);
|
||||
}
|
||||
assert_eq!(dest, _mm_cvtsd_f64(a));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_loadr_pd() {
|
||||
const fn test_mm_loadr_pd() {
|
||||
let mut mem = Memory {
|
||||
data: [1.0f64, 2.0, 3.0, 4.0],
|
||||
};
|
||||
let vals = &mut mem.data;
|
||||
let d = vals.as_ptr();
|
||||
|
||||
let r = _mm_loadr_pd(d);
|
||||
let r = unsafe { _mm_loadr_pd(d) };
|
||||
assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_loadu_pd() {
|
||||
const fn test_mm_loadu_pd() {
|
||||
// guaranteed to be aligned to 16 bytes
|
||||
let mut mem = Memory {
|
||||
data: [1.0f64, 2.0, 3.0, 4.0],
|
||||
|
|
@ -5099,31 +5137,31 @@ mod tests {
|
|||
let vals = &mut mem.data;
|
||||
|
||||
// so this will *not* be aligned to 16 bytes
|
||||
let d = vals.as_ptr().offset(1);
|
||||
let d = unsafe { vals.as_ptr().offset(1) };
|
||||
|
||||
let r = _mm_loadu_pd(d);
|
||||
let r = unsafe { _mm_loadu_pd(d) };
|
||||
let e = _mm_setr_pd(2.0, 3.0);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_loadu_si16() {
|
||||
const fn test_mm_loadu_si16() {
|
||||
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
|
||||
let r = unsafe { _mm_loadu_si16(ptr::addr_of!(a) as *const _) };
|
||||
assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_loadu_si32() {
|
||||
const fn test_mm_loadu_si32() {
|
||||
let a = _mm_setr_epi32(1, 2, 3, 4);
|
||||
let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
|
||||
let r = unsafe { _mm_loadu_si32(ptr::addr_of!(a) as *const _) };
|
||||
assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_loadu_si64() {
|
||||
const fn test_mm_loadu_si64() {
|
||||
let a = _mm_setr_epi64x(5, 6);
|
||||
let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
|
||||
let r = unsafe { _mm_loadu_si64(ptr::addr_of!(a) as *const _) };
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
|
||||
}
|
||||
|
||||
|
|
@ -5302,16 +5340,16 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_load1_pd() {
|
||||
const fn test_mm_load1_pd() {
|
||||
let d = -5.0;
|
||||
let r = _mm_load1_pd(&d);
|
||||
let r = unsafe { _mm_load1_pd(&d) };
|
||||
assert_eq_m128d(r, _mm_setr_pd(d, d));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_load_pd1() {
|
||||
const fn test_mm_load_pd1() {
|
||||
let d = -5.0;
|
||||
let r = _mm_load_pd1(&d);
|
||||
let r = unsafe { _mm_load_pd1(&d) };
|
||||
assert_eq_m128d(r, _mm_setr_pd(d, d));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -239,7 +239,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse3")]
|
||||
unsafe fn test_mm_lddqu_si128() {
|
||||
fn test_mm_lddqu_si128() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm_setr_epi8(
|
||||
1, 2, 3, 4,
|
||||
|
|
@ -247,7 +247,7 @@ mod tests {
|
|||
9, 10, 11, 12,
|
||||
13, 14, 15, 16,
|
||||
);
|
||||
let r = _mm_lddqu_si128(&a);
|
||||
let r = unsafe { _mm_lddqu_si128(&a) };
|
||||
assert_eq_m128i(a, r);
|
||||
}
|
||||
|
||||
|
|
@ -273,9 +273,9 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse3")]
|
||||
const unsafe fn test_mm_loaddup_pd() {
|
||||
const fn test_mm_loaddup_pd() {
|
||||
let d = -5.0;
|
||||
let r = _mm_loaddup_pd(&d);
|
||||
let r = unsafe { _mm_loaddup_pd(&d) };
|
||||
assert_eq_m128d(r, _mm_setr_pd(d, d));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1219,20 +1219,20 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse4.1")]
|
||||
const unsafe fn test_mm_blendv_pd() {
|
||||
const fn test_mm_blendv_pd() {
|
||||
let a = _mm_set1_pd(0.0);
|
||||
let b = _mm_set1_pd(1.0);
|
||||
let mask = transmute(_mm_setr_epi64x(0, -1));
|
||||
let mask = _mm_castsi128_pd(_mm_setr_epi64x(0, -1));
|
||||
let r = _mm_blendv_pd(a, b, mask);
|
||||
let e = _mm_setr_pd(0.0, 1.0);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse4.1")]
|
||||
const unsafe fn test_mm_blendv_ps() {
|
||||
const fn test_mm_blendv_ps() {
|
||||
let a = _mm_set1_ps(0.0);
|
||||
let b = _mm_set1_ps(1.0);
|
||||
let mask = transmute(_mm_setr_epi32(0, -1, 0, -1));
|
||||
let mask = _mm_castsi128_ps(_mm_setr_epi32(0, -1, 0, -1));
|
||||
let r = _mm_blendv_ps(a, b, mask);
|
||||
let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
|
||||
assert_eq_m128(r, e);
|
||||
|
|
@ -1949,9 +1949,9 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse4.1")]
|
||||
unsafe fn test_mm_stream_load_si128() {
|
||||
fn test_mm_stream_load_si128() {
|
||||
let a = _mm_set_epi64x(5, 6);
|
||||
let r = _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _);
|
||||
let r = unsafe { _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _) };
|
||||
assert_eq_m128i(a, r);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -613,6 +613,7 @@ mod tests {
|
|||
use crate::core_arch::assert_eq_const as assert_eq;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::simd::*;
|
||||
use crate::core_arch::x86::*;
|
||||
use std::ptr;
|
||||
|
||||
|
|
@ -625,7 +626,7 @@ mod tests {
|
|||
assert!(s.len() <= 16);
|
||||
let mut array = [0u8; 16];
|
||||
array[..s.len()].copy_from_slice(s);
|
||||
unsafe { transmute(array) }
|
||||
u8x16::from_array(array).as_m128i()
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse4.2")]
|
||||
|
|
|
|||
|
|
@ -206,7 +206,7 @@ mod tests {
|
|||
// Miri cannot support this until it is clear how it fits in the Rust memory model
|
||||
// (non-temporal store)
|
||||
#[cfg_attr(miri, ignore)]
|
||||
unsafe fn test_mm_stream_sd() {
|
||||
fn test_mm_stream_sd() {
|
||||
let mut mem = MemoryF64 {
|
||||
data: [1.0_f64, 2.0],
|
||||
};
|
||||
|
|
@ -216,7 +216,9 @@ mod tests {
|
|||
|
||||
let x = _mm_setr_pd(3.0, 4.0);
|
||||
|
||||
_mm_stream_sd(d, x);
|
||||
unsafe {
|
||||
_mm_stream_sd(d, x);
|
||||
}
|
||||
_mm_sfence();
|
||||
}
|
||||
assert_eq!(mem.data[0], 3.0);
|
||||
|
|
@ -232,7 +234,7 @@ mod tests {
|
|||
// Miri cannot support this until it is clear how it fits in the Rust memory model
|
||||
// (non-temporal store)
|
||||
#[cfg_attr(miri, ignore)]
|
||||
unsafe fn test_mm_stream_ss() {
|
||||
fn test_mm_stream_ss() {
|
||||
let mut mem = MemoryF32 {
|
||||
data: [1.0_f32, 2.0, 3.0, 4.0],
|
||||
};
|
||||
|
|
@ -242,7 +244,9 @@ mod tests {
|
|||
|
||||
let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
|
||||
|
||||
_mm_stream_ss(d, x);
|
||||
unsafe {
|
||||
_mm_stream_ss(d, x);
|
||||
}
|
||||
_mm_sfence();
|
||||
}
|
||||
assert_eq!(mem.data[0], 5.0);
|
||||
|
|
|
|||
|
|
@ -197,47 +197,53 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "xsave")]
|
||||
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
|
||||
unsafe fn test_xsave() {
|
||||
fn test_xsave() {
|
||||
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
|
||||
let mut a = XsaveArea::new();
|
||||
let mut b = XsaveArea::new();
|
||||
|
||||
_xsave(a.ptr(), m);
|
||||
_xrstor(a.ptr(), m);
|
||||
_xsave(b.ptr(), m);
|
||||
unsafe {
|
||||
_xsave(a.ptr(), m);
|
||||
_xrstor(a.ptr(), m);
|
||||
_xsave(b.ptr(), m);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "xsave")]
|
||||
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
|
||||
unsafe fn test_xgetbv() {
|
||||
fn test_xgetbv() {
|
||||
let xcr_n: u32 = _XCR_XFEATURE_ENABLED_MASK;
|
||||
|
||||
let xcr: u64 = _xgetbv(xcr_n);
|
||||
let xcr_cpy: u64 = _xgetbv(xcr_n);
|
||||
let xcr: u64 = unsafe { _xgetbv(xcr_n) };
|
||||
let xcr_cpy: u64 = unsafe { _xgetbv(xcr_n) };
|
||||
assert_eq!(xcr, xcr_cpy);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "xsave,xsaveopt")]
|
||||
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
|
||||
unsafe fn test_xsaveopt() {
|
||||
fn test_xsaveopt() {
|
||||
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
|
||||
let mut a = XsaveArea::new();
|
||||
let mut b = XsaveArea::new();
|
||||
|
||||
_xsaveopt(a.ptr(), m);
|
||||
_xrstor(a.ptr(), m);
|
||||
_xsaveopt(b.ptr(), m);
|
||||
unsafe {
|
||||
_xsaveopt(a.ptr(), m);
|
||||
_xrstor(a.ptr(), m);
|
||||
_xsaveopt(b.ptr(), m);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "xsave,xsavec")]
|
||||
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
|
||||
unsafe fn test_xsavec() {
|
||||
fn test_xsavec() {
|
||||
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
|
||||
let mut a = XsaveArea::new();
|
||||
let mut b = XsaveArea::new();
|
||||
|
||||
_xsavec(a.ptr(), m);
|
||||
_xrstor(a.ptr(), m);
|
||||
_xsavec(b.ptr(), m);
|
||||
unsafe {
|
||||
_xsavec(a.ptr(), m);
|
||||
_xrstor(a.ptr(), m);
|
||||
_xsavec(b.ptr(), m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -581,267 +581,297 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "amx-tile")]
|
||||
unsafe fn test_tile_loadconfig() {
|
||||
let config = __tilecfg::default();
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_release();
|
||||
fn test_tile_loadconfig() {
|
||||
unsafe {
|
||||
let config = __tilecfg::default();
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_release();
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-tile")]
|
||||
unsafe fn test_tile_storeconfig() {
|
||||
let config = __tilecfg::new(1, 0, [32; 8], [8; 8]);
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
let mut _config = __tilecfg::default();
|
||||
_tile_storeconfig(_config.as_mut_ptr());
|
||||
_tile_release();
|
||||
assert_eq!(config, _config);
|
||||
fn test_tile_storeconfig() {
|
||||
unsafe {
|
||||
let config = __tilecfg::new(1, 0, [32; 8], [8; 8]);
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
let mut _config = __tilecfg::default();
|
||||
_tile_storeconfig(_config.as_mut_ptr());
|
||||
_tile_release();
|
||||
assert_eq!(config, _config);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-tile")]
|
||||
unsafe fn test_tile_zero() {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mut out = [[1_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[0; 64]; 16]);
|
||||
fn test_tile_zero() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mut out = [[1_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[0; 64]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-tile")]
|
||||
unsafe fn test_tile_stored() {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mut out = [[1_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[0; 64]; 16]);
|
||||
fn test_tile_stored() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mut out = [[1_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[0; 64]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-tile")]
|
||||
unsafe fn test_tile_loadd() {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_loadd::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
fn test_tile_loadd() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_loadd::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-tile")]
|
||||
unsafe fn test_tile_stream_loadd() {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_stream_loadd::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
fn test_tile_stream_loadd() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_stream_loadd::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-tile")]
|
||||
unsafe fn test_tile_release() {
|
||||
_tile_release();
|
||||
fn test_tile_release() {
|
||||
unsafe {
|
||||
_tile_release();
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-bf16,avx512f")]
|
||||
unsafe fn test_tile_dpbf16ps() {
|
||||
_init_amx();
|
||||
let bf16_1: u16 = _mm_cvtness_sbh(1.0).to_bits();
|
||||
let bf16_2: u16 = _mm_cvtness_sbh(2.0).to_bits();
|
||||
let ones: [u8; 1024] = transmute([bf16_1; 512]);
|
||||
let twos: [u8; 1024] = transmute([bf16_2; 512]);
|
||||
let mut res = [[0f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbf16ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[64f32; 16]; 16]);
|
||||
fn test_tile_dpbf16ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let bf16_1: u16 = _mm_cvtness_sbh(1.0).to_bits();
|
||||
let bf16_2: u16 = _mm_cvtness_sbh(2.0).to_bits();
|
||||
let ones: [u8; 1024] = transmute([bf16_1; 512]);
|
||||
let twos: [u8; 1024] = transmute([bf16_2; 512]);
|
||||
let mut res = [[0f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbf16ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[64f32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-int8")]
|
||||
unsafe fn test_tile_dpbssd() {
|
||||
_init_amx();
|
||||
let ones = [-1_i8; 1024];
|
||||
let twos = [-2_i8; 1024];
|
||||
let mut res = [[0_i32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
|
||||
_tile_dpbssd::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128_i32; 16]; 16]);
|
||||
fn test_tile_dpbssd() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [-1_i8; 1024];
|
||||
let twos = [-2_i8; 1024];
|
||||
let mut res = [[0_i32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
|
||||
_tile_dpbssd::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128_i32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-int8")]
|
||||
unsafe fn test_tile_dpbsud() {
|
||||
_init_amx();
|
||||
let ones = [-1_i8; 1024];
|
||||
let twos = [2_u8; 1024];
|
||||
let mut res = [[0_i32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbsud::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[-128_i32; 16]; 16]);
|
||||
fn test_tile_dpbsud() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [-1_i8; 1024];
|
||||
let twos = [2_u8; 1024];
|
||||
let mut res = [[0_i32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbsud::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[-128_i32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-int8")]
|
||||
unsafe fn test_tile_dpbusd() {
|
||||
_init_amx();
|
||||
let ones = [1_u8; 1024];
|
||||
let twos = [-2_i8; 1024];
|
||||
let mut res = [[0_i32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
|
||||
_tile_dpbusd::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[-128_i32; 16]; 16]);
|
||||
fn test_tile_dpbusd() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [1_u8; 1024];
|
||||
let twos = [-2_i8; 1024];
|
||||
let mut res = [[0_i32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
|
||||
_tile_dpbusd::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[-128_i32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-int8")]
|
||||
unsafe fn test_tile_dpbuud() {
|
||||
_init_amx();
|
||||
let ones = [1_u8; 1024];
|
||||
let twos = [2_u8; 1024];
|
||||
let mut res = [[0_i32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbuud::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128_i32; 16]; 16]);
|
||||
fn test_tile_dpbuud() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [1_u8; 1024];
|
||||
let twos = [2_u8; 1024];
|
||||
let mut res = [[0_i32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbuud::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128_i32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-fp16")]
|
||||
unsafe fn test_tile_dpfp16ps() {
|
||||
_init_amx();
|
||||
let ones = [1f16; 512];
|
||||
let twos = [2f16; 512];
|
||||
let mut res = [[0f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
|
||||
_tile_dpfp16ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[64f32; 16]; 16]);
|
||||
fn test_tile_dpfp16ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [1f16; 512];
|
||||
let twos = [2f16; 512];
|
||||
let mut res = [[0f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
|
||||
_tile_dpfp16ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[64f32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-complex")]
|
||||
unsafe fn test_tile_cmmimfp16ps() {
|
||||
_init_amx();
|
||||
let ones = [1f16; 512];
|
||||
let twos = [2f16; 512];
|
||||
let mut res = [[0f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
|
||||
_tile_cmmimfp16ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[64f32; 16]; 16]);
|
||||
fn test_tile_cmmimfp16ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [1f16; 512];
|
||||
let twos = [2f16; 512];
|
||||
let mut res = [[0f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
|
||||
_tile_cmmimfp16ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[64f32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-complex")]
|
||||
unsafe fn test_tile_cmmrlfp16ps() {
|
||||
_init_amx();
|
||||
let ones = [1f16; 512];
|
||||
let twos = [2f16; 512];
|
||||
let mut res = [[0f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
|
||||
_tile_cmmrlfp16ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[0f32; 16]; 16]);
|
||||
fn test_tile_cmmrlfp16ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [1f16; 512];
|
||||
let twos = [2f16; 512];
|
||||
let mut res = [[0f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
|
||||
_tile_cmmrlfp16ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[0f32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
const BF8_ONE: u8 = 0x3c;
|
||||
|
|
@ -850,223 +880,245 @@ mod tests {
|
|||
const HF8_TWO: u8 = 0x40;
|
||||
|
||||
#[simd_test(enable = "amx-fp8")]
|
||||
unsafe fn test_tile_dpbf8ps() {
|
||||
_init_amx();
|
||||
let ones = [BF8_ONE; 1024];
|
||||
let twos = [BF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
fn test_tile_dpbf8ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [BF8_ONE; 1024];
|
||||
let twos = [BF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-fp8")]
|
||||
unsafe fn test_tile_dpbhf8ps() {
|
||||
_init_amx();
|
||||
let ones = [BF8_ONE; 1024];
|
||||
let twos = [HF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbhf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
fn test_tile_dpbhf8ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [BF8_ONE; 1024];
|
||||
let twos = [HF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbhf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-fp8")]
|
||||
unsafe fn test_tile_dphbf8ps() {
|
||||
_init_amx();
|
||||
let ones = [HF8_ONE; 1024];
|
||||
let twos = [BF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dphbf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
fn test_tile_dphbf8ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [HF8_ONE; 1024];
|
||||
let twos = [BF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dphbf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-fp8")]
|
||||
unsafe fn test_tile_dphf8ps() {
|
||||
_init_amx();
|
||||
let ones = [HF8_ONE; 1024];
|
||||
let twos = [HF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dphf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
fn test_tile_dphf8ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let ones = [HF8_ONE; 1024];
|
||||
let twos = [HF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dphf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-movrs")]
|
||||
unsafe fn test_tile_loaddrs() {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
fn test_tile_loaddrs() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-movrs")]
|
||||
unsafe fn test_tile_stream_loaddrs() {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_stream_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-avx512,avx10.2")]
|
||||
unsafe fn test_tile_movrow() {
|
||||
_init_amx();
|
||||
let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_movrow::<0>(i);
|
||||
assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
|
||||
fn test_tile_stream_loaddrs() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_stream_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-avx512,avx10.2")]
|
||||
unsafe fn test_tile_cvtrowd2ps() {
|
||||
_init_amx();
|
||||
let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
fn test_tile_movrow() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_cvtrowd2ps::<0>(i);
|
||||
assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_movrow::<0>(i);
|
||||
assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-avx512,avx10.2")]
|
||||
unsafe fn test_tile_cvtrowps2phh() {
|
||||
_init_amx();
|
||||
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
fn test_tile_cvtrowd2ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_cvtrowps2phh::<0>(i);
|
||||
assert_eq!(
|
||||
*row.as_f16x32().as_array(),
|
||||
array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
|
||||
);
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_cvtrowd2ps::<0>(i);
|
||||
assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-avx512,avx10.2")]
|
||||
unsafe fn test_tile_cvtrowps2phl() {
|
||||
_init_amx();
|
||||
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
fn test_tile_cvtrowps2phh() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_cvtrowps2phl::<0>(i);
|
||||
assert_eq!(
|
||||
*row.as_f16x32().as_array(),
|
||||
array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
|
||||
);
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_cvtrowps2phh::<0>(i);
|
||||
assert_eq!(
|
||||
*row.as_f16x32().as_array(),
|
||||
array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-avx512,avx10.2")]
|
||||
fn test_tile_cvtrowps2phl() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_cvtrowps2phl::<0>(i);
|
||||
assert_eq!(
|
||||
*row.as_f16x32().as_array(),
|
||||
array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-tf32")]
|
||||
unsafe fn test_tile_mmultf32ps() {
|
||||
_init_amx();
|
||||
let a: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
let b: [[f32; 16]; 16] = [array::from_fn(|j| j as _); _];
|
||||
let mut res = [[0.0; 16]; 16];
|
||||
fn test_tile_mmultf32ps() {
|
||||
unsafe {
|
||||
_init_amx();
|
||||
let a: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
let b: [[f32; 16]; 16] = [array::from_fn(|j| j as _); _];
|
||||
let mut res = [[0.0; 16]; 16];
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(a.as_ptr().cast(), 64);
|
||||
_tile_loadd::<2>(b.as_ptr().cast(), 64);
|
||||
_tile_mmultf32ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(a.as_ptr().cast(), 64);
|
||||
_tile_loadd::<2>(b.as_ptr().cast(), 64);
|
||||
_tile_mmultf32ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
|
||||
let expected = array::from_fn(|i| array::from_fn(|j| 16.0 * i as f32 * j as f32));
|
||||
assert_eq!(res, expected);
|
||||
let expected = array::from_fn(|i| array::from_fn(|j| 16.0 * i as f32 * j as f32));
|
||||
assert_eq!(res, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -77,12 +77,14 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "fxsr")]
|
||||
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
|
||||
unsafe fn test_fxsave64() {
|
||||
fn test_fxsave64() {
|
||||
let mut a = FxsaveArea::new();
|
||||
let mut b = FxsaveArea::new();
|
||||
|
||||
fxsr::_fxsave64(a.ptr());
|
||||
fxsr::_fxrstor64(a.ptr());
|
||||
fxsr::_fxsave64(b.ptr());
|
||||
unsafe {
|
||||
fxsr::_fxsave64(a.ptr());
|
||||
fxsr::_fxrstor64(a.ptr());
|
||||
fxsr::_fxsave64(b.ptr());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -204,10 +204,12 @@ mod tests {
|
|||
// Miri cannot support this until it is clear how it fits in the Rust memory model
|
||||
// (non-temporal store)
|
||||
#[cfg_attr(miri, ignore)]
|
||||
unsafe fn test_mm_stream_si64() {
|
||||
fn test_mm_stream_si64() {
|
||||
let a: i64 = 7;
|
||||
let mut mem = boxed::Box::<i64>::new(-1);
|
||||
_mm_stream_si64(ptr::addr_of_mut!(*mem), a);
|
||||
unsafe {
|
||||
_mm_stream_si64(ptr::addr_of_mut!(*mem), a);
|
||||
}
|
||||
_mm_sfence();
|
||||
assert_eq!(a, *mem);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -132,37 +132,43 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "xsave")]
|
||||
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
|
||||
unsafe fn test_xsave64() {
|
||||
fn test_xsave64() {
|
||||
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
|
||||
let mut a = XsaveArea::new();
|
||||
let mut b = XsaveArea::new();
|
||||
|
||||
_xsave64(a.ptr(), m);
|
||||
_xrstor64(a.ptr(), m);
|
||||
_xsave64(b.ptr(), m);
|
||||
unsafe {
|
||||
_xsave64(a.ptr(), m);
|
||||
_xrstor64(a.ptr(), m);
|
||||
_xsave64(b.ptr(), m);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "xsave,xsaveopt")]
|
||||
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
|
||||
unsafe fn test_xsaveopt64() {
|
||||
fn test_xsaveopt64() {
|
||||
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
|
||||
let mut a = XsaveArea::new();
|
||||
let mut b = XsaveArea::new();
|
||||
|
||||
_xsaveopt64(a.ptr(), m);
|
||||
_xrstor64(a.ptr(), m);
|
||||
_xsaveopt64(b.ptr(), m);
|
||||
unsafe {
|
||||
_xsaveopt64(a.ptr(), m);
|
||||
_xrstor64(a.ptr(), m);
|
||||
_xsaveopt64(b.ptr(), m);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "xsave,xsavec")]
|
||||
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
|
||||
unsafe fn test_xsavec64() {
|
||||
fn test_xsavec64() {
|
||||
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
|
||||
let mut a = XsaveArea::new();
|
||||
let mut b = XsaveArea::new();
|
||||
|
||||
_xsavec64(a.ptr(), m);
|
||||
_xrstor64(a.ptr(), m);
|
||||
_xsavec64(b.ptr(), m);
|
||||
unsafe {
|
||||
_xsavec64(a.ptr(), m);
|
||||
_xrstor64(a.ptr(), m);
|
||||
_xsavec64(b.ptr(), m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,43 @@
|
|||
# Not supported by qemu (will throw illegal instruction)
|
||||
vamax_f16
|
||||
vamaxq_f16
|
||||
vamin_f16
|
||||
vaminq_f16
|
||||
vscale_f16
|
||||
vscale_f32
|
||||
vscaleq_f16
|
||||
vscaleq_f32
|
||||
vscaleq_f64
|
||||
vluti2_lane_p16
|
||||
vluti2_lane_p8
|
||||
vluti2_lane_s16
|
||||
vluti2_lane_s8
|
||||
vluti2_lane_u16
|
||||
vluti2_lane_u8
|
||||
vluti2q_lane_p16
|
||||
vluti2q_lane_p8
|
||||
vluti2q_lane_s16
|
||||
vluti2q_lane_s8
|
||||
vluti2q_lane_u16
|
||||
vluti2_laneq_f16
|
||||
vluti2_lane_f16
|
||||
vluti2_laneq_f16
|
||||
vluti2_laneq_p16
|
||||
vluti2_laneq_p8
|
||||
vluti2_laneq_s16
|
||||
vluti2_laneq_s8
|
||||
vluti2_laneq_u16
|
||||
vluti2_laneq_u8
|
||||
vluti2q_lane_f16
|
||||
vluti2q_laneq_f16
|
||||
vluti2q_laneq_p16
|
||||
vluti2q_laneq_p8
|
||||
vluti2q_laneq_s16
|
||||
vluti2q_laneq_s8
|
||||
vluti2q_laneq_u16
|
||||
vluti2q_laneq_u8
|
||||
vluti2q_lane_u8
|
||||
|
||||
# Not implemented in stdarch yet
|
||||
vbfdot_f32
|
||||
vbfdot_lane_f32
|
||||
|
|
@ -30,18 +70,6 @@ vrnd32x_f64
|
|||
vrnd32z_f64
|
||||
vrnd64x_f64
|
||||
vrnd64z_f64
|
||||
vluti2_lane_p16
|
||||
vluti2_lane_p8
|
||||
vluti2_lane_s16
|
||||
vluti2_lane_s8
|
||||
vluti2_lane_u16
|
||||
vluti2_lane_u8
|
||||
vluti2q_lane_p16
|
||||
vluti2q_lane_p8
|
||||
vluti2q_lane_s16
|
||||
vluti2q_lane_s8
|
||||
vluti2q_lane_u16
|
||||
vluti2q_lane_u8
|
||||
vluti4q_lane_f16_x2
|
||||
vluti4q_lane_p16_x2
|
||||
vluti4q_lane_p8
|
||||
|
|
|
|||
|
|
@ -38,6 +38,45 @@ vusdotq_lane_s32
|
|||
vusdotq_laneq_s32
|
||||
|
||||
# Below are in common to missing_aarch64.txt
|
||||
# Not supported by qemu (will throw illegal instruction)
|
||||
vamax_f16
|
||||
vamaxq_f16
|
||||
vamin_f16
|
||||
vaminq_f16
|
||||
vscale_f16
|
||||
vscale_f32
|
||||
vscaleq_f16
|
||||
vscaleq_f32
|
||||
vscaleq_f64
|
||||
vluti2_lane_p16
|
||||
vluti2_lane_p8
|
||||
vluti2_lane_s16
|
||||
vluti2_lane_s8
|
||||
vluti2_lane_u16
|
||||
vluti2_lane_u8
|
||||
vluti2q_lane_p16
|
||||
vluti2q_lane_p8
|
||||
vluti2q_lane_s16
|
||||
vluti2q_lane_s8
|
||||
vluti2q_lane_u16
|
||||
vluti2_laneq_f16
|
||||
vluti2_lane_f16
|
||||
vluti2_laneq_f16
|
||||
vluti2_laneq_p16
|
||||
vluti2_laneq_p8
|
||||
vluti2_laneq_s16
|
||||
vluti2_laneq_s8
|
||||
vluti2_laneq_u16
|
||||
vluti2_laneq_u8
|
||||
vluti2q_lane_f16
|
||||
vluti2q_laneq_f16
|
||||
vluti2q_laneq_p16
|
||||
vluti2q_laneq_p8
|
||||
vluti2q_laneq_s16
|
||||
vluti2q_laneq_s8
|
||||
vluti2q_laneq_u16
|
||||
vluti2q_laneq_u8
|
||||
vluti2q_lane_u8
|
||||
|
||||
# Not implemented in stdarch yet
|
||||
vbfdot_f32
|
||||
|
|
@ -71,18 +110,6 @@ vrnd32x_f64
|
|||
vrnd32z_f64
|
||||
vrnd64x_f64
|
||||
vrnd64z_f64
|
||||
vluti2_lane_p16
|
||||
vluti2_lane_p8
|
||||
vluti2_lane_s16
|
||||
vluti2_lane_s8
|
||||
vluti2_lane_u16
|
||||
vluti2_lane_u8
|
||||
vluti2q_lane_p16
|
||||
vluti2q_lane_p8
|
||||
vluti2q_lane_s16
|
||||
vluti2q_lane_s8
|
||||
vluti2q_lane_u16
|
||||
vluti2q_lane_u8
|
||||
vluti4q_lane_f16_x2
|
||||
vluti4q_lane_p16_x2
|
||||
vluti4q_lane_p8
|
||||
|
|
|
|||
300
library/stdarch/crates/stdarch-gen-arm/README.md
Normal file
300
library/stdarch/crates/stdarch-gen-arm/README.md
Normal file
|
|
@ -0,0 +1,300 @@
|
|||
# stdarch-gen-arm generator guide
|
||||
## Running the generator
|
||||
- Run: `cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec`
|
||||
```
|
||||
$ cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec
|
||||
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.18s
|
||||
Running `target/debug/stdarch-gen-arm crates/stdarch-gen-arm/spec`
|
||||
```
|
||||
## Input/Output
|
||||
### Input files (intrinsic YAML definitions)
|
||||
- `crates/stdarch-gen-arm/spec/<feature>/*.spec.yml`
|
||||
### Output files
|
||||
- Generated intrinsics:
|
||||
- `crates/core_arch/src/<arch>/<feature>/generated.rs`
|
||||
- Generated load/store tests:
|
||||
- `crates/core_arch/src/<arch>/<feature>/ld_st_tests_<arch>.rs`
|
||||
- Only generated when `test: { load: <idx> }` or `test: { store: <idx> }` is set for SVE/SVE2 intrinsics.
|
||||
## `.spec.yml` file anatomy
|
||||
```
|
||||
---
|
||||
Configs
|
||||
---
|
||||
Variable definitions
|
||||
---
|
||||
|
||||
Intrinsic definitions
|
||||
|
||||
---
|
||||
```
|
||||
- If you're new to YAML syntax, consider [reviewing](https://quickref.me/yaml.html) some of the less obvious syntax and features.
|
||||
- For example, mapping an attribute to a sequence can be done in two different ways:
|
||||
```yaml
|
||||
attribute: [item_a, item_b, item_c]
|
||||
```
|
||||
or
|
||||
```yaml
|
||||
attribute:
|
||||
- item_a
|
||||
- item_b
|
||||
- item_c
|
||||
```
|
||||
## Configs
|
||||
- Mappings defining top-level settings applied to all intrinsics:
|
||||
- `arch_cfgs`
|
||||
- Sequence of mappings specifying `arch_name`, `target_feature` (sequence), and `llvm_prefix`.
|
||||
- `uses_neon_types`(_Optional_)
|
||||
- A boolean specifying whether to emit NEON type imports in generated code.
|
||||
- `auto_big_endian`(_Optional_)
|
||||
- A boolean specifying whether to auto-generate big-endian shuffles when possible.
|
||||
- `auto_llvm_sign_conversion`(_Optional_)
|
||||
- A boolean specifying whether to auto-convert LLVM wrapper args to signed types.
|
||||
## Variable definitions
|
||||
- Defines YAML anchors/variables to avoid repetition.
|
||||
- Commonly used for stability attributes, cfgs and target features.
|
||||
## Intrinsic definitions
|
||||
### Example
|
||||
```yaml
|
||||
- name: "vtst{neon_type[0].no}"
|
||||
doc: "Signed compare bitwise Test bits nonzero"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
safety: safe
|
||||
types:
|
||||
- [int64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
|
||||
- [int64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)']
|
||||
- [poly64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
|
||||
- [poly64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)']
|
||||
compose:
|
||||
- Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}]
|
||||
- Let: [d, "{type[2]}", "{type[3]}"]
|
||||
- FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]]
|
||||
```
|
||||
|
||||
### Explanation of fields
|
||||
- `name`
|
||||
- The name of the intrinsic
|
||||
- Often built from a base name followed by a type suffix
|
||||
- `doc` (_Optional_)
|
||||
- A string explaining the purpose of the intrinsic
|
||||
- `static_defs` (_Optional_)
|
||||
- A sequence of const generics of the format `"const <NAME>: <type>"`
|
||||
- `arguments`
|
||||
- A sequence of strings in the format `"<argname>: <argtype>"`
|
||||
- `return_type` (_Optional_)
|
||||
- A string specifying the return type. If omitted, the intrinsic returns `()`.
|
||||
- `attr` (_Optional_)
|
||||
- A sequence of items defining the attributes to be applied to the intrinsic. Often stability attributes, target features, or `assert_instr` tests. At least one of `attr` or `assert_instr` must be set.
|
||||
- `target_features` (_Optional_)
|
||||
- A sequence of target features to enable for this intrinsic (merged with any global `arch_cfgs` settings).
|
||||
- `assert_instr` (_Optional_)
|
||||
- A sequence of strings expected to be found in the assembly. Required if `attr` is not set.
|
||||
- `safety` (_Optional_)
|
||||
- Use `safe`, or map `unsafe:` to a sequence of unsafety comments:
|
||||
- `custom: "<string>"`
|
||||
- `uninitialized`
|
||||
- `pointer_offset`, `pointer_offset_vnum`, or `dereference` (optionally qualified with `predicated`, `predicated_non_faulting`, or `predicated_first_faulting`)
|
||||
- `unpredictable_on_fault`
|
||||
- `non_temporal`
|
||||
- `neon`
|
||||
- `no_provenance: "<string>"`
|
||||
- `substitutions` (_Optional_)
|
||||
- Mappings of custom wildcard names to either `MatchSize` or `MatchKind` expressions
|
||||
- `types`
|
||||
- A sequence or sequence of sequences specifying the types to use when producing each intrinsic variant. These sequences can then be indexed by wildcards.
|
||||
- `constraints` (_Optional_)
|
||||
- A sequence of mappings. Each specifies a variable and a constraint. The available mappings are:
|
||||
- Assert a variable's value exists in a sequence of i32's
|
||||
- Usage: `{ variable: <name>, any_values: [<i32>,...] }`
|
||||
- Assert a variable's value exists in a range (inclusive)
|
||||
- Usage: `{ variable: <name>, range: [<i32>, <i32>] }`
|
||||
- Assert a variable's value exists in a range via a match (inclusive)
|
||||
- Usage: `{ variable: <name>, range: <MatchSize returning [i32,i32]> }`
|
||||
- Assert a variable's value does not exceed the number of elements in a SVE type `<type>`.
|
||||
- Usage: `{ variable: <name>, sve_max_elems_type: <type> }`
|
||||
- Assert a variable's value does not exceed the number of elements in a vector type `<type>`.
|
||||
- Usage: `{ variable: <name>, vec_max_elems_type: <type> }`
|
||||
- `predication_methods` (_Optional_)
|
||||
- Configuration for predicate-form variants. Only used when the intrinsic name includes an `_m*_` wildcard (e.g., `{_mx}`, `{_mxz}`).
|
||||
- `zeroing_method`: Required when requesting `_z`; either `{ drop: <arg> }` to remove an argument and replace it with a zero initialiser, or `{ select: <predicate_var> }` to select zeros into a predicate.
|
||||
- `dont_care_method`: How `_x` should be implemented (`inferred`, `as_zeroing`, or `as_merging`).
|
||||
- `compose`
|
||||
- A sequence of expressions that make up the body of the intrinsic
|
||||
- `big_endian_inverse` (_Optional_)
|
||||
- A boolean, default false. If true, generates two implementations of each intrinsic variant, one for each endianness, and attempts to automatically generate the required bit swizzles
|
||||
- `visibility` (_Optional_)
|
||||
- Function visibility. One of `public` (default) or `private`.
|
||||
- `n_variant_op` (_Optional_)
|
||||
- Enables generation of an `_n` variant when the intrinsic name includes the `{_n}` wildcard. Set to the operand name that should be splattered for the `_n` form.
|
||||
- `test` (_Optional_)
|
||||
- When set, load/store tests are automatically generated.
|
||||
- A mapping of either `load` or `store` to a number that indexes `types` to specify the type that the test should be addressing in memory.
|
||||
### Expressions
|
||||
#### Common
|
||||
- `Let`
|
||||
- Defines a variable
|
||||
- Usage: `Let: [<variable>, <type(optional)>, <expression>]`
|
||||
- `Const`
|
||||
- Defines a const
|
||||
- Usage: `Const: [<variable>, <type>, <expression>]`
|
||||
- `Assign`
|
||||
- Performs variable assignment
|
||||
- Usage: `Assign: [<variable>, <expression>]`
|
||||
- `FnCall`
|
||||
- Performs a function call
|
||||
- Usage: `FnCall: [<function pointer: expression>, [<argument: expression>, ... ], [<turbofish argument: expression>, ...](optional), <unsafe wrapper(optional): bool>]`
|
||||
- `MacroCall`
|
||||
- Performs a macro call
|
||||
- Usage: `MacroCall: [<macro name>, <token stream>]`
|
||||
- `MethodCall`
|
||||
- Performs a method call
|
||||
- Usage: `MethodCall: [<object: expression>, <method name>, [<argument: expression>, ... ]]`
|
||||
- `LLVMLink`
|
||||
- Creates an LLVM link and stores the function's name in the wildcard `{llvm_link}` for later use in subsequent expressions.
|
||||
- If left unset, the arguments and return type inherit from the intrinsic's signature by default. The links will also be set automatically if unset.
|
||||
- Usage:
|
||||
```yaml
|
||||
LLVMLink:
|
||||
name: <name>
|
||||
arguments: [<expression>, ... ](optional)
|
||||
return_type: <return type>(optional)
|
||||
links: (optional)
|
||||
- link: <link>
|
||||
arch: <arch>
|
||||
- ...
|
||||
```
|
||||
- `Identifier`
|
||||
- Emits a symbol. Prepend with a `$` to treat it as a scope variable, which engages variable tracking and enables inference. For example, `my_function_name` for a generic symbol or `$my_variable` for a variable.
|
||||
- Usage `Identifier: [<symbol name>, <Variable|Symbol>]`
|
||||
- `CastAs`
|
||||
- Casts an expression to an unchecked type
|
||||
- Usage: `CastAs: [<expression>, <type>]`
|
||||
- `MatchSize`
|
||||
- Allows for conditional generation depending on the size of a specified type
|
||||
- Usage:
|
||||
```yaml
|
||||
MatchSize:
|
||||
- <type>
|
||||
- default: <expression>
|
||||
byte(optional): <expression>
|
||||
halfword(optional): <expression>
|
||||
doubleword(optional): <expression>
|
||||
```
|
||||
- `MatchKind`
|
||||
- Allows for conditional generation depending on the kind of a specified type
|
||||
```yaml
|
||||
MatchKind:
|
||||
- <type>
|
||||
- default: <expression>
|
||||
float(optional): <expression>
|
||||
unsigned(optional): <expression>
|
||||
```
|
||||
#### Rarely Used
|
||||
- `IntConstant`
|
||||
- Constant signed integer expression
|
||||
- Usage: `IntConstant: <i32>`
|
||||
- `FloatConstant`
|
||||
- Constant floating-point expression
|
||||
- Usage: `FloatConstant: <f32>`
|
||||
- `BoolConstant`
|
||||
- Constant boolean expression
|
||||
- Usage: `BoolConstant: <bool>`
|
||||
- `Array`
|
||||
- An array of expressions
|
||||
- Usage: `Array: [<expression>, ...]`
|
||||
- `SvUndef`
|
||||
- Returns the LLVM `undef` symbol
|
||||
- Usage: `SvUndef`
|
||||
- `Multiply`
|
||||
- Simply `*`
|
||||
- Usage: `Multiply: [<expression>, <expression>]`
|
||||
- `Xor`
|
||||
- Simply `^`
|
||||
- Usage: `Xor: [<expression>, <expression>]`
|
||||
- `ConvertConst`
|
||||
- Converts the specified constant to the specified type's kind
|
||||
- Usage: `ConvertConst: [<type>, <i32>]`
|
||||
- `Type`
|
||||
- Yields the given type in the Rust representation
|
||||
- Usage: `Type: [<type>]`
|
||||
|
||||
### Wildstrings
|
||||
- Wildstrings let you take advantage of wildcards.
|
||||
- For example, they are often used in intrinsic names `name: "vtst{neon_type[0].no}"`
|
||||
- As shown above, wildcards are identified by the surrounding curly brackets.
|
||||
- Double curly brackets can be used to escape wildcard functionality if you need literal curly brackets in the generated intrinsic.
|
||||
### Wildcards
|
||||
Wildcards are heavily used in the spec. They let you write generalised definitions for a group of intrinsics that generate multiple variants. The wildcard itself is replaced with the relevant string in each variant.
|
||||
Ignoring endianness, for each row in the `types` field of an intrinsic in the spec, a variant of the intrinsic will be generated. That row's contents can be indexed by the wildcards. Below is the behaviour of each wildcard.
|
||||
- `type[<index: usize>]`
|
||||
- Replaced in each variant with the value in the indexed position in the relevant row of the `types` field.
|
||||
- For unnested sequences of `types` (i.e., `types` is a sequence where each element is a single item, not another sequence), the square brackets can be omitted. Simply: `type`
|
||||
- `neon_type[<index: usize>]`
|
||||
- Extends the behaviour of `type` with some NEON-specific features and inference.
|
||||
- Tuples: This wildcard can also be written as `neon_type_x<n>` where `n` is in the set `{2,3,4}`. This generates the `n`-tuple variant of the (inferred) NEON type.
|
||||
- Suffixes: These modify the behaviour of the wildcard from simple substitution.
|
||||
- `no` - normal behaviour. Tries to do as much work as it can for you, inferring when to emit:
|
||||
- Regular type-size suffixes: `_s8`, `_u16`, `_f32`, ...
|
||||
- `q` variants for double-width (128b) vector types: `q_s8`, `q_u16`, `q_f32`, ...
|
||||
- `_x<n>` variants for tuple vector types: `_s8_x2`, `_u32_x3`, `_f64_x4`, ...
|
||||
- As well as any combination of the above: `q_s16_x16` ...
|
||||
- Most of the other suffixes modify the normal behaviour by disabling features or adding new ones. (See table below)
|
||||
- `sve_type[<index: usize>]`
|
||||
- Similar to `neon_type`, but without the suffixes.
|
||||
- `size[<index: usize>]`
|
||||
- The size (in bits) of the indexed type.
|
||||
- `size_minus_one[<index: usize>]`
|
||||
- Emits the size (in bits) of the indexed type minus one.
|
||||
- `size_literal[<index: usize>]`
|
||||
- The literal representation of the indexed type.
|
||||
- `b`: byte, `h`: halfword, `w`: word, or `d`: double.
|
||||
- `type_kind[<index: usize>]`
|
||||
- The literal representation of the indexed type's kind.
|
||||
- `f`: float, `s`: signed, `u`: unsigned, `p`: polynomial, `b`: boolean.
|
||||
- `size_in_bytes_log2[<index: usize>]`
|
||||
- Log2 of the size of the indexed type in *bytes*.
|
||||
- `predicate[<index: usize>]`
|
||||
- SVE predicate vector type inferred from the indexed type.
|
||||
- `max_predicate`
|
||||
- The same as predicate, but uses the largest type in the relevant `types` sequence/row.
|
||||
- `_n`
|
||||
- Emits the current N-variant suffix when `n_variant_op` is configured.
|
||||
- `<wildcard> as <type>`
|
||||
- If `<wildcard>` evaluates to a vector, it produces a vector of the same shape, but with `<type>` as the base type.
|
||||
- `llvm_link`
|
||||
- If the `LLVMLink` mapping has been set for an intrinsic, this will give the name of the link.
|
||||
- `_m*`
|
||||
- Predicate form masks. Use wildcards such as `{_mx}` or `{_mxz}` to expand merging/don't-care/zeroing variants according to the mask.
|
||||
- `<custom>`
|
||||
- You may simply call upon wildcards defined under `substitutions`.
|
||||
### neon_type suffixes
|
||||
|
||||
| suffix | implication |
|
||||
| ----------------- | --------------------------------------------- |
|
||||
| `.no` | Normal |
|
||||
| `.noq` | Never include `q`s |
|
||||
| `.nox` | Never include `_x<n>`s |
|
||||
| `.N` | Include `_n_`, e.g., `_n_s8` |
|
||||
| `.noq_N` | Include `_n_`, but never `q`s |
|
||||
| `.dup` | Include `_dup_`, e.g., `_dup_s8` |
|
||||
| `.dup_nox` | Include `_dup_` but never `_x<n>`s |
|
||||
| `.lane` | Include `_lane_`, e.g., `_lane_s8` |
|
||||
| `.lane_nox` | Include `_lane_`, but never `_x<n>`s |
|
||||
| `.rot90` | Include `_rot90_`, e.g., `_rot90_s8` |
|
||||
| `.rot180` | Include `_rot180_`, e.g., `_rot180_s8` |
|
||||
| `.rot270` | Include `_rot270_`, e.g., `_rot270_s8` |
|
||||
| `.rot90_lane` | Include `_rot90_lane_` |
|
||||
| `.rot180_lane` | Include `_rot180_lane_` |
|
||||
| `.rot270_lane` | Include `_rot270_lane_` |
|
||||
| `.rot90_laneq` | Include `_rot90_laneq_` |
|
||||
| `.rot180_laneq` | Include `_rot180_laneq_` |
|
||||
| `.rot270_laneq` | Include `_rot270_laneq_` |
|
||||
| `.base` | Produce only the size, e.g., `8`, `16` |
|
||||
| `.u` | Produce the type's unsigned equivalent |
|
||||
| `.laneq_nox` | Include `_laneq_`, but never `_x<n>`s |
|
||||
| `.tuple` | Produce only the size of the tuple, e.g., `3` |
|
||||
| `.base_byte_size` | Produce only the size in bytes. |
|
||||
|
||||
|
|
@ -66,6 +66,14 @@ neon-unstable-feat-lut: &neon-unstable-feat-lut
|
|||
aarch64-stable-jscvt: &aarch64-stable-jscvt
|
||||
FnCall: [stable, ['feature = "stdarch_aarch64_jscvt"', 'since = "CURRENT_RUSTC_VERSION"']]
|
||||
|
||||
# #[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
|
||||
neon-unstable-feat-lrcpc3: &neon-unstable-feat-lrcpc3
|
||||
FnCall: [unstable, ['feature = "stdarch_neon_feat_lrcpc3"', 'issue = "none"']]
|
||||
|
||||
# #[unstable(feature = "stdarch_neon_fp8", issue = "none")]
|
||||
neon-unstable-fp8: &neon-unstable-fp8
|
||||
FnCall: [unstable, ['feature = "stdarch_neon_fp8"', 'issue = "none"']]
|
||||
|
||||
# #[cfg(target_endian = "little")]
|
||||
little-endian: &little-endian
|
||||
FnCall: [cfg, ['target_endian = "little"']]
|
||||
|
|
@ -4398,6 +4406,116 @@ intrinsics:
|
|||
- - FnCall: [transmute, [a]]
|
||||
- FnCall: [transmute, [b]]
|
||||
|
||||
- name: "vldap1{neon_type[1].lane_nox}"
|
||||
doc: "Load-acquire RCpc one single-element structure to one lane of one register"
|
||||
arguments: ["ptr: {type[0]}", "src: {type[1]}"]
|
||||
static_defs: ["const LANE: i32"]
|
||||
return_type: "{type[1]}"
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,rcpc3"']]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [ldap1, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ["2"]]
|
||||
- *neon-unstable-feat-lrcpc3
|
||||
types:
|
||||
- ['*const i64', int64x1_t, 'static_assert!', 'LANE == 0']
|
||||
- ['*const i64', int64x2_t,'static_assert_uimm_bits!', 'LANE, 1']
|
||||
compose:
|
||||
- FnCall: ['{type[2]}', ['{type[3]}']]
|
||||
- Let:
|
||||
- "atomic_src"
|
||||
- FnCall: ["crate::sync::atomic::AtomicI64::from_ptr", ['ptr as *mut i64']]
|
||||
- Identifier: [';', Symbol]
|
||||
- FnCall:
|
||||
- simd_insert!
|
||||
- - src
|
||||
- "LANE as u32"
|
||||
- MethodCall:
|
||||
- "atomic_src"
|
||||
- load
|
||||
- ["crate::sync::atomic::Ordering::Acquire"]
|
||||
|
||||
- name: "vldap1{neon_type[1].lane_nox}"
|
||||
doc: "Load-acquire RCpc one single-element structure to one lane of one register"
|
||||
arguments: ["ptr: {type[0]}","src: {type[1]}"]
|
||||
static_defs: ["const LANE: i32"]
|
||||
return_type: "{type[1]}"
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
attr:
|
||||
- FnCall: [rustc_legacy_const_generics, ["2"]]
|
||||
- FnCall: [target_feature, ['enable = "neon,rcpc3"']]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [ldap1, 'LANE = 0']]}]]
|
||||
- *neon-unstable-feat-lrcpc3
|
||||
types:
|
||||
- ['*const u64', uint64x1_t,'static_assert!', 'LANE == 0','']
|
||||
#- ['*const f64', float64x1_t,'static_assert!', 'LANE == 0',''] # Fails due to bad IR gen from rust
|
||||
- ['*const p64', poly64x1_t,'static_assert!', 'LANE == 0','']
|
||||
- ['*const u64', uint64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
|
||||
- ['*const f64', float64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
|
||||
- ['*const p64', poly64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
|
||||
compose:
|
||||
- FnCall: ['{type[2]}', ['{type[3]}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- 'vldap1{type[4]}_lane_s64::<LANE>'
|
||||
- - "ptr as *mut i64"
|
||||
- FnCall: [transmute,[src]]
|
||||
|
||||
- name: "vstl1{neon_type[1].lane_nox}"
|
||||
doc: "Store-Release a single-element structure from one lane of one register."
|
||||
arguments: ["ptr: {type[0]}", "val: {neon_type[1]}"]
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,rcpc3"']]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [stl1, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ["2"]]
|
||||
- *neon-unstable-feat-lrcpc3
|
||||
types:
|
||||
- ['*mut i64', int64x1_t,'static_assert!', 'LANE == 0']
|
||||
- ['*mut i64', int64x2_t,'static_assert_uimm_bits!', 'LANE, 1']
|
||||
compose:
|
||||
- FnCall: ['{type[2]}', ['{type[3]}']]
|
||||
- Let:
|
||||
- "atomic_dst"
|
||||
- "ptr as *mut crate::sync::atomic::AtomicI64"
|
||||
- Identifier: [';', Symbol]
|
||||
- Let:
|
||||
- "lane"
|
||||
- i64
|
||||
- FnCall: [simd_extract!, [val, 'LANE as u32']]
|
||||
- MethodCall:
|
||||
- "(*atomic_dst)"
|
||||
- store
|
||||
- [FnCall: [transmute, [lane]],"crate::sync::atomic::Ordering::Release"]
|
||||
|
||||
- name: "vstl1{neon_type[1].lane_nox}"
|
||||
doc: "Store-Release a single-element structure from one lane of one register."
|
||||
arguments: ["ptr: {type[0]}", "val: {neon_type[1]}"]
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,rcpc3"']]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [stl1, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ["2"]]
|
||||
- *neon-unstable-feat-lrcpc3
|
||||
types:
|
||||
- ['*mut u64', uint64x1_t, 'static_assert!', 'LANE == 0','']
|
||||
- ['*mut f64', float64x1_t,'static_assert!', 'LANE == 0','']
|
||||
- ['*mut p64', poly64x1_t, 'static_assert!', 'LANE == 0','']
|
||||
- ['*mut u64', uint64x2_t ,'static_assert_uimm_bits!', 'LANE, 1','q']
|
||||
- ['*mut f64', float64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
|
||||
- ['*mut p64', poly64x2_t ,'static_assert_uimm_bits!', 'LANE, 1','q']
|
||||
compose:
|
||||
- FnCall: ['{type[2]}', ['{type[3]}']]
|
||||
- FnCall:
|
||||
- "vstl1{type[4]}_lane_s64::<LANE>"
|
||||
- - "ptr as *mut i64"
|
||||
- FnCall: [transmute, [val]]
|
||||
|
||||
- name: "vst1{neon_type[1].lane_nox}"
|
||||
doc: "Store multiple single-element structures from one, two, three, or four registers"
|
||||
arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
|
||||
|
|
@ -5081,56 +5199,6 @@ intrinsics:
|
|||
arch: aarch64,arm64ec
|
||||
- FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
|
||||
|
||||
- name: "vusdot{neon_type[0].laneq_nox}"
|
||||
doc: "Dot product index form with unsigned and signed integers"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-i8mm
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usdot, 'LANE = 3']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]','']
|
||||
- [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
|
||||
- Let: [c, int32x4_t, {FnCall: ['vreinterpretq_s32_s8', [c]]}]
|
||||
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
|
||||
- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]]
|
||||
|
||||
- name: "vsudot{neon_type[0].laneq_nox}"
|
||||
doc: "Dot product index form with signed and unsigned integers"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-i8mm
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t]
|
||||
- [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 2]]
|
||||
- Let:
|
||||
- c
|
||||
- uint32x4_t
|
||||
- FnCall: [transmute, [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{type[4]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vusdot{neon_type[0].no}"
|
||||
- - a
|
||||
- FnCall: [transmute, [c]]
|
||||
- b
|
||||
|
||||
- name: "vmul{neon_type.no}"
|
||||
doc: Multiply
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
|
|
@ -6552,7 +6620,6 @@ intrinsics:
|
|||
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
|
||||
|
||||
|
||||
- name: "vcmla{neon_type[0].rot270_lane}"
|
||||
doc: Floating-point complex multiply accumulate
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
|
||||
|
|
@ -6574,66 +6641,6 @@ intrinsics:
|
|||
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
|
||||
|
||||
- name: "vdot{neon_type[0].laneq_nox}"
|
||||
doc: Dot product arithmetic (indexed)
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
static_defs: ["const LANE: i32"]
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]', '']
|
||||
- [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
|
||||
- FnCall:
|
||||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
|
||||
- name: "vdot{neon_type[0].laneq_nox}"
|
||||
doc: Dot product arithmetic (indexed)
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
static_defs: ["const LANE: i32"]
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]','']
|
||||
- [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
|
||||
- FnCall:
|
||||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
|
||||
- name: "vmax{neon_type.no}"
|
||||
doc: Maximum (vector)
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
|
|
@ -13966,10 +13973,12 @@ intrinsics:
|
|||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,faminmax"']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [famax]]}]]
|
||||
- FnCall: [unstable, ['feature = "faminmax"', 'issue = "137933"']]
|
||||
safety: safe
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
- float32x2_t
|
||||
- float32x4_t
|
||||
- float64x2_t
|
||||
|
|
@ -13986,10 +13995,12 @@ intrinsics:
|
|||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,faminmax"']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [famin]]}]]
|
||||
- FnCall: [unstable, ['feature = "faminmax"', 'issue = "137933"']]
|
||||
safety: safe
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
- float32x2_t
|
||||
- float32x4_t
|
||||
- float64x2_t
|
||||
|
|
@ -14030,36 +14041,101 @@ intrinsics:
|
|||
arch: aarch64,arm64ec
|
||||
- FnCall: ['_vluti2{neon_type[0].lane_nox}', [a, b, LANE]]
|
||||
|
||||
- name: "vluti2{neon_type[0].laneq_nox}"
|
||||
doc: "Lookup table read with 2-bit indices"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = {type[4]}']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'INDEX = 1']]}]]
|
||||
- *neon-unstable-feat-lut
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
static_defs: ["const INDEX: i32"]
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
- [int8x8_t, uint8x16_t, int8x16_t, 'INDEX >= 0 && INDEX <= 3', '"neon,lut"']
|
||||
- [int8x16_t, uint8x16_t, int8x16_t, 'INDEX >= 0 && INDEX <= 3', '"neon,lut"']
|
||||
- [int16x4_t, uint8x16_t, int16x8_t, 'INDEX >= 0 && INDEX <= 7', '"neon,lut"']
|
||||
- [int16x8_t, uint8x16_t, int16x8_t, 'INDEX >= 0 && INDEX <= 7', '"neon,lut"']
|
||||
compose:
|
||||
- FnCall: ['static_assert!', ['{type[3]}']]
|
||||
- LLVMLink:
|
||||
name: "vluti2{neon_type[0].laneq_nox}"
|
||||
arguments:
|
||||
- 'a: {neon_type[0]}'
|
||||
- 'b: {neon_type[1]}'
|
||||
- 'n: i32'
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.vluti2.laneq.{neon_type[2]}.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall: ['_vluti2{neon_type[0].laneq_nox}', [a, b, INDEX]]
|
||||
|
||||
- name: "vluti2{neon_type[0].lane_nox}"
|
||||
doc: "Lookup table read with 2-bit indices"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,lut"']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'INDEX = 1']]}]]
|
||||
- *neon-unstable-feat-lut
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
static_defs: ["const LANE: i32"]
|
||||
static_defs: ["const INDEX: i32"]
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
- [uint8x8_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
|
||||
- [uint8x16_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
|
||||
- [poly8x8_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
|
||||
- [poly8x16_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
|
||||
- [uint16x4_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
|
||||
- [uint16x8_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
|
||||
- [poly16x4_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
|
||||
- [poly16x8_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
|
||||
- [uint8x8_t, uint8x8_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x8_t']
|
||||
- [uint8x16_t, uint8x8_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x16_t']
|
||||
- [poly8x8_t, uint8x8_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x8_t']
|
||||
- [poly8x16_t, uint8x8_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x16_t']
|
||||
- [uint16x4_t, uint8x8_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x4_t']
|
||||
- [uint16x8_t, uint8x8_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x8_t']
|
||||
- [poly16x4_t, uint8x8_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x4_t']
|
||||
- [poly16x8_t, uint8x8_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x8_t']
|
||||
- [float16x4_t, uint8x8_t, float16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x4_t']
|
||||
- [float16x8_t, uint8x8_t, float16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x8_t']
|
||||
compose:
|
||||
- FnCall: ['static_assert!', ['{type[3]}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- 'vluti2{neon_type[4].lane_nox}::<LANE>'
|
||||
- 'vluti2{neon_type[4].lane_nox}::<INDEX>'
|
||||
- - FnCall: [transmute, [a]]
|
||||
- b
|
||||
|
||||
- name: "vluti2{neon_type[0].laneq_nox}"
|
||||
doc: "Lookup table read with 2-bit indices"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,lut"']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'INDEX = 1']]}]]
|
||||
- *neon-unstable-feat-lut
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
static_defs: ["const INDEX: i32"]
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
- [uint8x8_t, uint8x16_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x8_t']
|
||||
- [uint8x16_t, uint8x16_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x16_t']
|
||||
- [poly8x8_t, uint8x16_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x8_t']
|
||||
- [poly8x16_t, uint8x16_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x16_t']
|
||||
- [uint16x4_t, uint8x16_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x4_t']
|
||||
- [uint16x8_t, uint8x16_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x8_t']
|
||||
- [poly16x4_t, uint8x16_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x4_t']
|
||||
- [poly16x8_t, uint8x16_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x8_t']
|
||||
- [float16x4_t, uint8x16_t, float16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x4_t']
|
||||
- [float16x8_t, uint8x16_t, float16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x8_t']
|
||||
compose:
|
||||
- FnCall: ['static_assert!', ['{type[3]}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- 'vluti2{neon_type[4].laneq_nox}::<INDEX>'
|
||||
- - FnCall: [transmute, [a]]
|
||||
- b
|
||||
|
||||
|
||||
- name: "vluti4{neon_type[0].lane_nox}"
|
||||
doc: "Lookup table read with 4-bit indices"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
|
|
@ -14268,6 +14344,28 @@ intrinsics:
|
|||
- - FnCall: [transmute, [a]]
|
||||
- b
|
||||
|
||||
- name: "vscale{neon_type[0].no}"
|
||||
doc: "Multi-vector floating-point adjust exponent"
|
||||
arguments: ["vn: {type[0]}", "vm: {type[1]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- *neon-unstable-fp8
|
||||
- FnCall: [target_feature, ['enable = "neon,fp8"']]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [fscale]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, int16x4_t]
|
||||
- [float16x8_t, int16x8_t]
|
||||
- [float32x2_t, int32x2_t]
|
||||
- [float32x4_t, int32x4_t]
|
||||
- [float64x2_t, int64x2_t]
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vscale{neon_type[0].no}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.fp8.fscale.{neon_type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "__jcvt"
|
||||
doc: "Floating-point JavaScript convert to signed fixed-point, rounding toward zero"
|
||||
arguments: ["a: {type}"]
|
||||
|
|
|
|||
|
|
@ -7096,6 +7096,132 @@ intrinsics:
|
|||
- FnCall: [simd_cast, [b]]
|
||||
- FnCall: [simd_sub, [c, d]]
|
||||
|
||||
- name: "vusdot{neon_type[0].laneq_nox}"
|
||||
doc: "Dot product index form with unsigned and signed integers"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
attr:
|
||||
- *neon-v8
|
||||
- *neon-i8mm
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot, 'LANE = 3']]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot, 'LANE = 3']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]','']
|
||||
- [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
|
||||
- Let: [c, int32x4_t, {FnCall: [transmute, [c]]}]
|
||||
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
|
||||
- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]]
|
||||
#- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]]
|
||||
|
||||
- name: "vsudot{neon_type[0].laneq_nox}"
|
||||
doc: "Dot product index form with signed and unsigned integers"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-v8
|
||||
- *neon-i8mm
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t]
|
||||
- [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 2]]
|
||||
- Let:
|
||||
- c
|
||||
- uint32x4_t
|
||||
- FnCall: [transmute, [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{type[4]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vusdot{neon_type[0].no}"
|
||||
- - a
|
||||
- FnCall: [transmute, [c]]
|
||||
- b
|
||||
|
||||
- name: "vdot{neon_type[0].laneq_nox}"
|
||||
doc: Dot product arithmetic (indexed)
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
static_defs: ["const LANE: i32"]
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
attr:
|
||||
- *neon-v8
|
||||
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsdot, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]', '']
|
||||
- [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [transmute, [c]]
|
||||
#- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
|
||||
- FnCall:
|
||||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [transmute, [c]]
|
||||
#- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
|
||||
- name: "vdot{neon_type[0].laneq_nox}"
|
||||
doc: Dot product arithmetic (indexed)
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
static_defs: ["const LANE: i32"]
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
attr:
|
||||
- *neon-v8
|
||||
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vudot, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]','']
|
||||
- [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [transmute, [c]]
|
||||
#- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
|
||||
- FnCall:
|
||||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [transmute, [c]]
|
||||
#- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
|
||||
- name: "vdot{neon_type[0].no}"
|
||||
doc: Dot product arithmetic (vector)
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
|
||||
|
|
@ -8785,7 +8911,6 @@ intrinsics:
|
|||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-not-arm-stable-fp16
|
||||
- *neon-cfg-arm-unstable
|
||||
- *target-not-arm64ec
|
||||
|
|
@ -8849,7 +8974,6 @@ intrinsics:
|
|||
- *neon-v8
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-not-arm-stable-fp16
|
||||
- *neon-cfg-arm-unstable
|
||||
- *target-not-arm64ec
|
||||
|
|
|
|||
|
|
@ -840,7 +840,7 @@ impl fmt::Display for UnsafetyComment {
|
|||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Custom(s) => s.fmt(f),
|
||||
Self::Neon => write!(f, "Neon instrinsic unsafe"),
|
||||
Self::Neon => write!(f, "Neon intrinsic unsafe"),
|
||||
Self::Uninitialized => write!(
|
||||
f,
|
||||
"This creates an uninitialized value, and may be unsound (like \
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -25,7 +25,6 @@ backtrace = ["std/backtrace"]
|
|||
backtrace-trace-only = ["std/backtrace-trace-only"]
|
||||
compiler-builtins-c = ["std/compiler-builtins-c"]
|
||||
compiler-builtins-mem = ["std/compiler-builtins-mem"]
|
||||
compiler-builtins-no-f16-f128 = ["std/compiler-builtins-no-f16-f128"]
|
||||
debug_refcell = ["std/debug_refcell"]
|
||||
llvm-libunwind = ["std/llvm-libunwind"]
|
||||
system-llvm-libunwind = ["std/system-llvm-libunwind"]
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
//@ has 'glob_shadowing/index.html'
|
||||
//@ count - '//dt' 6
|
||||
//@ !has - '//dd' 'sub1::describe'
|
||||
//@ count - '//dt' 7
|
||||
//@ !has - '//dd' 'sub1::describe1'
|
||||
//@ has - '//dd' 'sub2::describe'
|
||||
|
||||
//@ !has - '//dd' 'sub1::describe2'
|
||||
//@ has - '//dd' 'sub1::describe2'
|
||||
|
||||
//@ !has - '//dd' 'sub1::prelude'
|
||||
//@ has - '//dd' 'mod::prelude'
|
||||
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
mod sub1 {
|
||||
// this should be shadowed by sub2::describe
|
||||
/// sub1::describe
|
||||
/// sub1::describe1
|
||||
pub fn describe() -> &'static str {
|
||||
"sub1::describe"
|
||||
}
|
||||
|
|
@ -33,7 +33,9 @@ mod sub1 {
|
|||
pub struct Foo;
|
||||
|
||||
// this should be shadowed,
|
||||
// because both sub1::describe2 and sub3::describe2 are from glob reexport
|
||||
// because both sub1::describe2 and sub3::describe2 are from glob reexport,
|
||||
// but it is still usable from other crates under the `ambiguous_glob_imports` lint,
|
||||
// so it is reachable and documented
|
||||
/// sub1::describe2
|
||||
pub fn describe2() -> &'static str {
|
||||
"sub1::describe2"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
// Regression test for https://github.com/rust-lang/rust/issues/100973
|
||||
// Update: the rules has changed after #147984, one of the colliding items is now available
|
||||
// from other crates under a deprecation lint.
|
||||
|
||||
//@ set m1 = "$.index[?(@.name == 'm1' && @.inner.module)].id"
|
||||
//@ is "$.index[?(@.name == 'm1')].inner.module.items" []
|
||||
//@ is "$.index[?(@.name == 'm1')].inner.module.items" [0]
|
||||
//@ is "$.index[?(@.name == 'm1')].inner.module.is_stripped" true
|
||||
mod m1 {
|
||||
pub fn f() {}
|
||||
|
|
|
|||
|
|
@ -14,3 +14,20 @@ pub mod bar {
|
|||
//~| ERROR: unresolved link
|
||||
pub fn sql_function_proc() {}
|
||||
}
|
||||
|
||||
// From here, this is a regression test for <https://github.com/rust-lang/rust/issues/151411>.
|
||||
pub use fuzz_test_helpers::*;
|
||||
|
||||
/// A type referenced in the deprecation note.
|
||||
pub struct Env;
|
||||
|
||||
impl Env {
|
||||
pub fn try_invoke(&self) {}
|
||||
}
|
||||
|
||||
mod fuzz_test_helpers {
|
||||
#[deprecated(note = "use [Env::try_invoke] instead")]
|
||||
//~^ ERROR: unresolved link
|
||||
//~| ERROR: unresolved link
|
||||
pub fn fuzz_catch_panic() {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,18 @@ note: the lint level is defined here
|
|||
LL | #![deny(rustdoc::broken_intra_doc_links)]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
error: unresolved link to `Env::try_invoke`
|
||||
--> $DIR/deprecated-note-from-reexported.rs:29:25
|
||||
|
|
||||
LL | #[deprecated(note = "use [Env::try_invoke] instead")]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
||||
= note: the link appears in this line:
|
||||
|
||||
use [Env::try_invoke] instead
|
||||
^^^^^^^^^^^^^^^
|
||||
= note: no item named `Env` in scope
|
||||
|
||||
error: unresolved link to `define_sql_function`
|
||||
--> $DIR/deprecated-note-from-reexported.rs:12:25
|
||||
|
|
||||
|
|
@ -30,5 +42,18 @@ LL | #[deprecated(note = "Use [`define_sql_function`] instead")]
|
|||
= help: to escape `[` and `]` characters, add '\' before them like `\[` or `\]`
|
||||
= note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no`
|
||||
|
||||
error: aborting due to 2 previous errors
|
||||
error: unresolved link to `Env::try_invoke`
|
||||
--> $DIR/deprecated-note-from-reexported.rs:29:25
|
||||
|
|
||||
LL | #[deprecated(note = "use [Env::try_invoke] instead")]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
||||
= note: the link appears in this line:
|
||||
|
||||
use [Env::try_invoke] instead
|
||||
^^^^^^^^^^^^^^^
|
||||
= note: no item named `Env` in scope
|
||||
= note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no`
|
||||
|
||||
error: aborting due to 4 previous errors
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
fn a() {
|
||||
if let x = 1 && i = 2 {}
|
||||
//~^ ERROR cannot find value `i` in this scope
|
||||
//~| ERROR mismatched types
|
||||
//~| ERROR expected expression, found `let` statement
|
||||
//~^ ERROR expected expression, found `let` statement
|
||||
}
|
||||
|
||||
fn b() {
|
||||
|
|
|
|||
|
|
@ -15,13 +15,7 @@ LL | if let x = 1 && i == 2 {}
|
|||
| +
|
||||
|
||||
error[E0425]: cannot find value `i` in this scope
|
||||
--> $DIR/bad-if-let-suggestion.rs:2:21
|
||||
|
|
||||
LL | if let x = 1 && i = 2 {}
|
||||
| ^ not found in this scope
|
||||
|
||||
error[E0425]: cannot find value `i` in this scope
|
||||
--> $DIR/bad-if-let-suggestion.rs:9:9
|
||||
--> $DIR/bad-if-let-suggestion.rs:7:9
|
||||
|
|
||||
LL | fn a() {
|
||||
| ------ similarly named function `a` defined here
|
||||
|
|
@ -36,7 +30,7 @@ LL + if (a + j) = i {}
|
|||
|
|
||||
|
||||
error[E0425]: cannot find value `j` in this scope
|
||||
--> $DIR/bad-if-let-suggestion.rs:9:13
|
||||
--> $DIR/bad-if-let-suggestion.rs:7:13
|
||||
|
|
||||
LL | fn a() {
|
||||
| ------ similarly named function `a` defined here
|
||||
|
|
@ -51,7 +45,7 @@ LL + if (i + a) = i {}
|
|||
|
|
||||
|
||||
error[E0425]: cannot find value `i` in this scope
|
||||
--> $DIR/bad-if-let-suggestion.rs:9:18
|
||||
--> $DIR/bad-if-let-suggestion.rs:7:18
|
||||
|
|
||||
LL | fn a() {
|
||||
| ------ similarly named function `a` defined here
|
||||
|
|
@ -66,7 +60,7 @@ LL + if (i + j) = a {}
|
|||
|
|
||||
|
||||
error[E0425]: cannot find value `x` in this scope
|
||||
--> $DIR/bad-if-let-suggestion.rs:16:8
|
||||
--> $DIR/bad-if-let-suggestion.rs:14:8
|
||||
|
|
||||
LL | fn a() {
|
||||
| ------ similarly named function `a` defined here
|
||||
|
|
@ -80,18 +74,6 @@ LL - if x[0] = 1 {}
|
|||
LL + if a[0] = 1 {}
|
||||
|
|
||||
|
||||
error[E0308]: mismatched types
|
||||
--> $DIR/bad-if-let-suggestion.rs:2:8
|
||||
|
|
||||
LL | if let x = 1 && i = 2 {}
|
||||
| ^^^^^^^^^^^^^^^^^^ expected `bool`, found `()`
|
||||
|
|
||||
help: you might have meant to compare for equality
|
||||
|
|
||||
LL | if let x = 1 && i == 2 {}
|
||||
| +
|
||||
error: aborting due to 5 previous errors
|
||||
|
||||
error: aborting due to 7 previous errors
|
||||
|
||||
Some errors have detailed explanations: E0308, E0425.
|
||||
For more information about an error, try `rustc --explain E0308`.
|
||||
For more information about this error, try `rustc --explain E0425`.
|
||||
|
|
|
|||
8
tests/ui/imports/ambiguous-reachable.rs
Normal file
8
tests/ui/imports/ambiguous-reachable.rs
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
//@ build-pass
|
||||
//@ aux-crate: ambiguous_reachable_extern=ambiguous-reachable-extern.rs
|
||||
|
||||
#![allow(ambiguous_glob_imports)]
|
||||
|
||||
fn main() {
|
||||
ambiguous_reachable_extern::generic::<u8>();
|
||||
}
|
||||
23
tests/ui/imports/ambiguous-reachable.stderr
Normal file
23
tests/ui/imports/ambiguous-reachable.stderr
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
Future incompatibility report: Future breakage diagnostic:
|
||||
warning: `generic` is ambiguous
|
||||
--> $DIR/ambiguous-reachable.rs:7:33
|
||||
|
|
||||
LL | ambiguous_reachable_extern::generic::<u8>();
|
||||
| ^^^^^^^ ambiguous name
|
||||
|
|
||||
= warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release!
|
||||
= note: for more information, see issue #114095 <https://github.com/rust-lang/rust/issues/114095>
|
||||
= note: ambiguous because of multiple glob imports of a name in the same module
|
||||
note: `generic` could refer to the function defined here
|
||||
--> $DIR/auxiliary/ambiguous-reachable-extern.rs:13:9
|
||||
|
|
||||
LL | pub use m1::*;
|
||||
| ^^
|
||||
= help: consider updating this dependency to resolve this error
|
||||
= help: if updating the dependency does not resolve the problem report the problem to the author of the relevant crate
|
||||
note: `generic` could also refer to the function defined here
|
||||
--> $DIR/auxiliary/ambiguous-reachable-extern.rs:14:9
|
||||
|
|
||||
LL | pub use m2::*;
|
||||
| ^^
|
||||
|
||||
14
tests/ui/imports/auxiliary/ambiguous-reachable-extern.rs
Normal file
14
tests/ui/imports/auxiliary/ambiguous-reachable-extern.rs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
mod m1 {
|
||||
pub fn generic<T>() {
|
||||
let x = 10;
|
||||
let y = 11;
|
||||
println!("hello {x} world {:?}", y);
|
||||
}
|
||||
}
|
||||
|
||||
mod m2 {
|
||||
pub fn generic() {}
|
||||
}
|
||||
|
||||
pub use m1::*;
|
||||
pub use m2::*;
|
||||
6
tests/ui/missing/missing-let.rs
Normal file
6
tests/ui/missing/missing-let.rs
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
fn main() {
|
||||
let x = Some(42);
|
||||
if let Some(_) = x
|
||||
&& Some(x) = x //~^ ERROR expected expression, found `let` statement
|
||||
{}
|
||||
}
|
||||
18
tests/ui/missing/missing-let.stderr
Normal file
18
tests/ui/missing/missing-let.stderr
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
error: expected expression, found `let` statement
|
||||
--> $DIR/missing-let.rs:3:8
|
||||
|
|
||||
LL | if let Some(_) = x
|
||||
| ^^^^^^^^^^^^^^^
|
||||
|
|
||||
= note: only supported directly in conditions of `if` and `while` expressions
|
||||
help: you might have meant to continue the let-chain
|
||||
|
|
||||
LL | && let Some(x) = x
|
||||
| +++
|
||||
help: you might have meant to compare for equality
|
||||
|
|
||||
LL | && Some(x) == x
|
||||
| +
|
||||
|
||||
error: aborting due to 1 previous error
|
||||
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
// issue: https://github.com/rust-lang/rust/issues/146515
|
||||
|
||||
use std::rc::Rc;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct ContainsRc<T> {
|
||||
value: Rc<T>,
|
||||
}
|
||||
|
||||
fn clone_me<T>(x: &ContainsRc<T>) -> ContainsRc<T> {
|
||||
//~^ NOTE expected `ContainsRc<T>` because of return type
|
||||
x.clone()
|
||||
//~^ ERROR mismatched types
|
||||
//~| NOTE expected `ContainsRc<T>`, found `&ContainsRc<T>`
|
||||
//~| NOTE expected struct `ContainsRc<_>`
|
||||
//~| NOTE `ContainsRc<T>` does not implement `Clone`, so `&ContainsRc<T>` was cloned instead
|
||||
//~| NOTE the trait `Clone` must be implemented
|
||||
}
|
||||
|
||||
fn main() {}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
error[E0308]: mismatched types
|
||||
--> $DIR/derive-clone-already-present-issue-146515.rs:12:5
|
||||
|
|
||||
LL | fn clone_me<T>(x: &ContainsRc<T>) -> ContainsRc<T> {
|
||||
| ------------- expected `ContainsRc<T>` because of return type
|
||||
LL |
|
||||
LL | x.clone()
|
||||
| ^^^^^^^^^ expected `ContainsRc<T>`, found `&ContainsRc<T>`
|
||||
|
|
||||
= note: expected struct `ContainsRc<_>`
|
||||
found reference `&ContainsRc<_>`
|
||||
note: `ContainsRc<T>` does not implement `Clone`, so `&ContainsRc<T>` was cloned instead
|
||||
--> $DIR/derive-clone-already-present-issue-146515.rs:12:5
|
||||
|
|
||||
LL | x.clone()
|
||||
| ^
|
||||
= help: `Clone` is not implemented because the trait bound `T: Clone` is not satisfied
|
||||
note: the trait `Clone` must be implemented
|
||||
--> $SRC_DIR/core/src/clone.rs:LL:COL
|
||||
|
||||
error: aborting due to 1 previous error
|
||||
|
||||
For more information about this error, try `rustc --explain E0308`.
|
||||
Loading…
Add table
Add a link
Reference in a new issue