Auto merge of #151794 - JonathanBrouwer:rollup-rGbYGX2, r=JonathanBrouwer

Rollup of 12 pull requests

Successful merges:

 - rust-lang/rust#150491 (resolve: Mark items under exported ambiguous imports as exported)
 - rust-lang/rust#150720 (Do not suggest `derive` if there is already an impl)
 - rust-lang/rust#150968 (compiler-builtins: Remove the no-f16-f128 feature)
 - rust-lang/rust#151493 ([RFC] rustc_parse: improve the error diagnostic for "missing let in let chain")
 - rust-lang/rust#151660 (Bump `std`'s `backtrace`'s `rustc-demangle`)
 - rust-lang/rust#151696 (Borrowck: Simplify SCC annotation computation, placeholder rewriting)
 - rust-lang/rust#151704 (Implement `set_output_kind` for Emscripten linker)
 - rust-lang/rust#151706 (Remove Fuchsia from target OS list in unix.rs for sleep)
 - rust-lang/rust#151769 (fix undefined behavior in VecDeque::splice)
 - rust-lang/rust#151779 (stdarch subtree update)
 - rust-lang/rust#151449 ([rustdoc] Add regression test for rust-lang/rust#151411)
 - rust-lang/rust#151773 (clean up checks for constant promotion of integer division/remainder operations)
This commit is contained in:
bors 2026-01-28 21:40:46 +00:00
commit ba284f468c
72 changed files with 7314 additions and 3873 deletions

View file

@ -62,57 +62,23 @@ impl scc::Annotations<RegionVid> for SccAnnotations<'_, '_, RegionTracker> {
}
#[derive(Copy, Debug, Clone, PartialEq, Eq)]
enum PlaceholderReachability {
/// This SCC reaches no placeholders.
NoPlaceholders,
/// This SCC reaches at least one placeholder.
Placeholders {
/// The largest-universed placeholder we can reach
max_universe: (UniverseIndex, RegionVid),
struct PlaceholderReachability {
/// The largest-universed placeholder we can reach
max_universe: (UniverseIndex, RegionVid),
/// The placeholder with the smallest ID
min_placeholder: RegionVid,
/// The placeholder with the smallest ID
min_placeholder: RegionVid,
/// The placeholder with the largest ID
max_placeholder: RegionVid,
},
/// The placeholder with the largest ID
max_placeholder: RegionVid,
}
impl PlaceholderReachability {
/// Merge the reachable placeholders of two graph components.
fn merge(self, other: PlaceholderReachability) -> PlaceholderReachability {
use PlaceholderReachability::*;
match (self, other) {
(NoPlaceholders, NoPlaceholders) => NoPlaceholders,
(NoPlaceholders, p @ Placeholders { .. })
| (p @ Placeholders { .. }, NoPlaceholders) => p,
(
Placeholders {
min_placeholder: min_pl,
max_placeholder: max_pl,
max_universe: max_u,
},
Placeholders { min_placeholder, max_placeholder, max_universe },
) => Placeholders {
min_placeholder: min_pl.min(min_placeholder),
max_placeholder: max_pl.max(max_placeholder),
max_universe: max_u.max(max_universe),
},
}
}
fn max_universe(&self) -> Option<(UniverseIndex, RegionVid)> {
match self {
Self::NoPlaceholders => None,
Self::Placeholders { max_universe, .. } => Some(*max_universe),
}
}
/// If we have reached placeholders, determine if they can
/// be named from this universe.
fn can_be_named_by(&self, from: UniverseIndex) -> bool {
self.max_universe()
.is_none_or(|(max_placeholder_universe, _)| from.can_name(max_placeholder_universe))
fn merge(&mut self, other: &Self) {
self.max_universe = self.max_universe.max(other.max_universe);
self.min_placeholder = self.min_placeholder.min(other.min_placeholder);
self.max_placeholder = self.max_placeholder.max(other.max_placeholder);
}
}
@ -120,7 +86,7 @@ impl PlaceholderReachability {
/// the values of its elements. This annotates a single SCC.
#[derive(Copy, Debug, Clone)]
pub(crate) struct RegionTracker {
reachable_placeholders: PlaceholderReachability,
reachable_placeholders: Option<PlaceholderReachability>,
/// The largest universe nameable from this SCC.
/// It is the smallest nameable universes of all
@ -135,13 +101,13 @@ impl RegionTracker {
pub(crate) fn new(rvid: RegionVid, definition: &RegionDefinition<'_>) -> Self {
let reachable_placeholders =
if matches!(definition.origin, NllRegionVariableOrigin::Placeholder(_)) {
PlaceholderReachability::Placeholders {
Some(PlaceholderReachability {
max_universe: (definition.universe, rvid),
min_placeholder: rvid,
max_placeholder: rvid,
}
})
} else {
PlaceholderReachability::NoPlaceholders
None
};
Self {
@ -159,43 +125,46 @@ impl RegionTracker {
}
pub(crate) fn max_placeholder_universe_reached(self) -> UniverseIndex {
if let Some((universe, _)) = self.reachable_placeholders.max_universe() {
universe
} else {
UniverseIndex::ROOT
}
self.reachable_placeholders.map(|pls| pls.max_universe.0).unwrap_or(UniverseIndex::ROOT)
}
/// Can all reachable placeholders be named from `from`?
/// True vacuously in case no placeholders were reached.
fn placeholders_can_be_named_by(&self, from: UniverseIndex) -> bool {
self.reachable_placeholders.is_none_or(|pls| from.can_name(pls.max_universe.0))
}
/// Determine if we can name all the placeholders in `other`.
pub(crate) fn can_name_all_placeholders(&self, other: Self) -> bool {
other.reachable_placeholders.can_be_named_by(self.max_nameable_universe.0)
// HACK: We first check whether we can name the highest existential universe
// of `other`. This only exists to avoid errors in case that scc already
// depends on a placeholder it cannot name itself.
self.max_nameable_universe().can_name(other.max_nameable_universe())
|| other.placeholders_can_be_named_by(self.max_nameable_universe.0)
}
/// If this SCC reaches a placeholder it can't name, return it.
fn unnameable_placeholder(&self) -> Option<(UniverseIndex, RegionVid)> {
self.reachable_placeholders.max_universe().filter(|&(placeholder_universe, _)| {
!self.max_nameable_universe().can_name(placeholder_universe)
})
self.reachable_placeholders
.filter(|pls| !self.max_nameable_universe().can_name(pls.max_universe.0))
.map(|pls| pls.max_universe)
}
}
impl scc::Annotation for RegionTracker {
fn merge_scc(self, other: Self) -> Self {
fn update_scc(&mut self, other: &Self) {
trace!("{:?} << {:?}", self.representative, other.representative);
Self {
representative: self.representative.min(other.representative),
max_nameable_universe: self.max_nameable_universe.min(other.max_nameable_universe),
reachable_placeholders: self.reachable_placeholders.merge(other.reachable_placeholders),
}
self.representative = self.representative.min(other.representative);
self.update_reachable(other);
}
fn merge_reached(self, other: Self) -> Self {
Self {
max_nameable_universe: self.max_nameable_universe.min(other.max_nameable_universe),
reachable_placeholders: self.reachable_placeholders.merge(other.reachable_placeholders),
representative: self.representative,
}
fn update_reachable(&mut self, other: &Self) {
self.max_nameable_universe = self.max_nameable_universe.min(other.max_nameable_universe);
match (self.reachable_placeholders.as_mut(), other.reachable_placeholders.as_ref()) {
(None, None) | (Some(_), None) => (),
(None, Some(theirs)) => self.reachable_placeholders = Some(*theirs),
(Some(ours), Some(theirs)) => ours.merge(theirs),
};
}
}

View file

@ -1208,10 +1208,23 @@ impl<'a> Linker for EmLinker<'a> {
fn set_output_kind(
&mut self,
_output_kind: LinkOutputKind,
output_kind: LinkOutputKind,
_crate_type: CrateType,
_out_filename: &Path,
) {
match output_kind {
LinkOutputKind::DynamicNoPicExe | LinkOutputKind::DynamicPicExe => {
self.cmd.arg("-sMAIN_MODULE=2");
}
LinkOutputKind::DynamicDylib | LinkOutputKind::StaticDylib => {
self.cmd.arg("-sSIDE_MODULE=2");
}
// -fno-pie is the default on Emscripten.
LinkOutputKind::StaticNoPicExe | LinkOutputKind::StaticPicExe => {}
LinkOutputKind::WasiReactorExe => {
unreachable!();
}
}
}
fn link_dylib_by_name(&mut self, name: &str, _verbatim: bool, _as_needed: bool) {

View file

@ -27,26 +27,18 @@ mod tests;
/// the max/min element of the SCC, or all of the above.
///
/// Concretely, the both merge operations must commute, e.g. where `merge`
/// is `merge_scc` and `merge_reached`: `a.merge(b) == b.merge(a)`
/// is `update_scc` and `update_reached`: `a.merge(b) == b.merge(a)`
///
/// In general, what you want is probably always min/max according
/// to some ordering, potentially with side constraints (min x such
/// that P holds).
pub trait Annotation: Debug + Copy {
/// Merge two existing annotations into one during
/// path compression.o
fn merge_scc(self, other: Self) -> Self;
/// path compression.
fn update_scc(&mut self, other: &Self);
/// Merge a successor into this annotation.
fn merge_reached(self, other: Self) -> Self;
fn update_scc(&mut self, other: Self) {
*self = self.merge_scc(other)
}
fn update_reachable(&mut self, other: Self) {
*self = self.merge_reached(other)
}
fn update_reachable(&mut self, other: &Self);
}
/// An accumulator for annotations.
@ -70,12 +62,8 @@ impl<N: Idx, S: Idx + Ord> Annotations<N> for NoAnnotations<S> {
/// The empty annotation, which does nothing.
impl Annotation for () {
fn merge_reached(self, _other: Self) -> Self {
()
}
fn merge_scc(self, _other: Self) -> Self {
()
}
fn update_reachable(&mut self, _other: &Self) {}
fn update_scc(&mut self, _other: &Self) {}
}
/// Strongly connected components (SCC) of a graph. The type `N` is
@ -614,7 +602,7 @@ where
*min_depth = successor_min_depth;
*min_cycle_root = successor_node;
}
current_component_annotation.update_scc(successor_annotation);
current_component_annotation.update_scc(&successor_annotation);
}
// The starting node `node` is succeeded by a fully identified SCC
// which is now added to the set under `scc_index`.
@ -629,7 +617,7 @@ where
// the `successors_stack` for later.
trace!(?node, ?successor_scc_index);
successors_stack.push(successor_scc_index);
current_component_annotation.update_reachable(successor_annotation);
current_component_annotation.update_reachable(&successor_annotation);
}
// `node` has no more (direct) successors; search recursively.
None => {

View file

@ -32,12 +32,12 @@ impl Maxes {
}
impl Annotation for MaxReached {
fn merge_scc(self, other: Self) -> Self {
Self(std::cmp::max(other.0, self.0))
fn update_scc(&mut self, other: &Self) {
self.0 = self.0.max(other.0);
}
fn merge_reached(self, other: Self) -> Self {
Self(std::cmp::max(other.0, self.0))
fn update_reachable(&mut self, other: &Self) {
self.0 = self.0.max(other.0);
}
}
@ -75,13 +75,12 @@ impl Annotations<usize> for MinMaxes {
}
impl Annotation for MinMaxIn {
fn merge_scc(self, other: Self) -> Self {
Self { min: std::cmp::min(self.min, other.min), max: std::cmp::max(self.max, other.max) }
fn update_scc(&mut self, other: &Self) {
self.min = self.min.min(other.min);
self.max = self.max.max(other.max);
}
fn merge_reached(self, _other: Self) -> Self {
self
}
fn update_reachable(&mut self, _other: &Self) {}
}
#[test]

View file

@ -3281,6 +3281,63 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
}
}
/// Checks if we can suggest a derive macro for the unmet trait bound.
/// Returns Some(list_of_derives) if possible, or None if not.
fn consider_suggesting_derives_for_ty(
&self,
trait_pred: ty::TraitPredicate<'tcx>,
adt: ty::AdtDef<'tcx>,
) -> Option<Vec<(String, Span, Symbol)>> {
let diagnostic_name = self.tcx.get_diagnostic_name(trait_pred.def_id())?;
let can_derive = match diagnostic_name {
sym::Default
| sym::Eq
| sym::PartialEq
| sym::Ord
| sym::PartialOrd
| sym::Clone
| sym::Copy
| sym::Hash
| sym::Debug => true,
_ => false,
};
if !can_derive {
return None;
}
let trait_def_id = trait_pred.def_id();
let self_ty = trait_pred.self_ty();
// We need to check if there is already a manual implementation of the trait
// for this specific ADT to avoid suggesting `#[derive(..)]` that would conflict.
if self.tcx.non_blanket_impls_for_ty(trait_def_id, self_ty).any(|impl_def_id| {
self.tcx
.type_of(impl_def_id)
.instantiate_identity()
.ty_adt_def()
.is_some_and(|def| def.did() == adt.did())
}) {
return None;
}
let mut derives = Vec::new();
let self_name = self_ty.to_string();
let self_span = self.tcx.def_span(adt.did());
for super_trait in supertraits(self.tcx, ty::Binder::dummy(trait_pred.trait_ref)) {
if let Some(parent_diagnostic_name) = self.tcx.get_diagnostic_name(super_trait.def_id())
{
derives.push((self_name.clone(), self_span, parent_diagnostic_name));
}
}
derives.push((self_name, self_span, diagnostic_name));
Some(derives)
}
fn note_predicate_source_and_get_derives(
&self,
err: &mut Diag<'_>,
@ -3298,35 +3355,8 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
Some(adt) if adt.did().is_local() => adt,
_ => continue,
};
if let Some(diagnostic_name) = self.tcx.get_diagnostic_name(trait_pred.def_id()) {
let can_derive = match diagnostic_name {
sym::Default
| sym::Eq
| sym::PartialEq
| sym::Ord
| sym::PartialOrd
| sym::Clone
| sym::Copy
| sym::Hash
| sym::Debug => true,
_ => false,
};
if can_derive {
let self_name = trait_pred.self_ty().to_string();
let self_span = self.tcx.def_span(adt.did());
for super_trait in
supertraits(self.tcx, ty::Binder::dummy(trait_pred.trait_ref))
{
if let Some(parent_diagnostic_name) =
self.tcx.get_diagnostic_name(super_trait.def_id())
{
derives.push((self_name.clone(), self_span, parent_diagnostic_name));
}
}
derives.push((self_name, self_span, diagnostic_name));
} else {
traits.push(trait_pred.def_id());
}
if let Some(new_derives) = self.consider_suggesting_derives_for_ty(trait_pred, adt) {
derives.extend(new_derives);
} else {
traits.push(trait_pred.def_id());
}

View file

@ -485,47 +485,33 @@ impl<'tcx> Validator<'_, 'tcx> {
if lhs_ty.is_integral() {
let sz = lhs_ty.primitive_size(self.tcx);
// Integer division: the RHS must be a non-zero const.
let rhs_val = match rhs {
Operand::Constant(c)
if self.should_evaluate_for_promotion_checks(c.const_) =>
{
c.const_.try_eval_scalar_int(self.tcx, self.typing_env)
}
_ => None,
};
match rhs_val.map(|x| x.to_uint(sz)) {
let rhs_val = if let Operand::Constant(rhs_c) = rhs
&& self.should_evaluate_for_promotion_checks(rhs_c.const_)
&& let Some(rhs_val) =
rhs_c.const_.try_eval_scalar_int(self.tcx, self.typing_env)
// for the zero test, int vs uint does not matter
Some(x) if x != 0 => {} // okay
_ => return Err(Unpromotable), // value not known or 0 -- not okay
}
&& rhs_val.to_uint(sz) != 0
{
rhs_val
} else {
// value not known or 0 -- not okay
return Err(Unpromotable);
};
// Furthermore, for signed division, we also have to exclude `int::MIN /
// -1`.
if lhs_ty.is_signed() {
match rhs_val.map(|x| x.to_int(sz)) {
Some(-1) | None => {
// The RHS is -1 or unknown, so we have to be careful.
// But is the LHS int::MIN?
let lhs_val = match lhs {
Operand::Constant(c)
if self.should_evaluate_for_promotion_checks(
c.const_,
) =>
{
c.const_
.try_eval_scalar_int(self.tcx, self.typing_env)
}
_ => None,
};
let lhs_min = sz.signed_int_min();
match lhs_val.map(|x| x.to_int(sz)) {
// okay
Some(x) if x != lhs_min => {}
// value not known or int::MIN -- not okay
_ => return Err(Unpromotable),
}
}
_ => {}
if lhs_ty.is_signed() && rhs_val.to_int(sz) == -1 {
// The RHS is -1, so we have to be careful. But is the LHS int::MIN?
if let Operand::Constant(lhs_c) = lhs
&& self.should_evaluate_for_promotion_checks(lhs_c.const_)
&& let Some(lhs_val) =
lhs_c.const_.try_eval_scalar_int(self.tcx, self.typing_env)
&& let lhs_min = sz.signed_int_min()
&& lhs_val.to_int(sz) != lhs_min
{
// okay
} else {
// value not known or int::MIN -- not okay
return Err(Unpromotable);
}
}
}

View file

@ -2760,9 +2760,13 @@ impl<'a> Parser<'a> {
let (mut cond, _) =
self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL | Restrictions::ALLOW_LET, attrs)?;
CondChecker::new(self, let_chains_policy).visit_expr(&mut cond);
Ok(cond)
let mut checker = CondChecker::new(self, let_chains_policy);
checker.visit_expr(&mut cond);
Ok(if let Some(guar) = checker.found_incorrect_let_chain {
self.mk_expr_err(cond.span, guar)
} else {
cond
})
}
/// Parses a `let $pat = $expr` pseudo-expression.
@ -3484,13 +3488,19 @@ impl<'a> Parser<'a> {
let if_span = self.prev_token.span;
let mut cond = self.parse_match_guard_condition()?;
CondChecker::new(self, LetChainsPolicy::AlwaysAllowed).visit_expr(&mut cond);
let mut checker = CondChecker::new(self, LetChainsPolicy::AlwaysAllowed);
checker.visit_expr(&mut cond);
if has_let_expr(&cond) {
let span = if_span.to(cond.span);
self.psess.gated_spans.gate(sym::if_let_guard, span);
}
Ok(Some(cond))
Ok(Some(if let Some(guar) = checker.found_incorrect_let_chain {
self.mk_expr_err(cond.span, guar)
} else {
cond
}))
}
fn parse_match_arm_pat_and_guard(&mut self) -> PResult<'a, (Pat, Option<Box<Expr>>)> {
@ -3511,13 +3521,23 @@ impl<'a> Parser<'a> {
let ast::PatKind::Paren(subpat) = pat.kind else { unreachable!() };
let ast::PatKind::Guard(_, mut cond) = subpat.kind else { unreachable!() };
self.psess.gated_spans.ungate_last(sym::guard_patterns, cond.span);
CondChecker::new(self, LetChainsPolicy::AlwaysAllowed).visit_expr(&mut cond);
let mut checker = CondChecker::new(self, LetChainsPolicy::AlwaysAllowed);
checker.visit_expr(&mut cond);
let right = self.prev_token.span;
self.dcx().emit_err(errors::ParenthesesInMatchPat {
span: vec![left, right],
sugg: errors::ParenthesesInMatchPatSugg { left, right },
});
Ok((self.mk_pat(span, ast::PatKind::Wild), Some(cond)))
Ok((
self.mk_pat(span, ast::PatKind::Wild),
(if let Some(guar) = checker.found_incorrect_let_chain {
Some(self.mk_expr_err(cond.span, guar))
} else {
Some(cond)
}),
))
} else {
Ok((pat, self.parse_match_arm_guard()?))
}
@ -4208,6 +4228,7 @@ struct CondChecker<'a> {
forbid_let_reason: Option<ForbiddenLetReason>,
missing_let: Option<errors::MaybeMissingLet>,
comparison: Option<errors::MaybeComparison>,
found_incorrect_let_chain: Option<ErrorGuaranteed>,
}
impl<'a> CondChecker<'a> {
@ -4218,6 +4239,7 @@ impl<'a> CondChecker<'a> {
missing_let: None,
comparison: None,
let_chains_policy,
found_incorrect_let_chain: None,
depth: 0,
}
}
@ -4236,12 +4258,19 @@ impl MutVisitor for CondChecker<'_> {
NotSupportedOr(or_span) => {
self.parser.dcx().emit_err(errors::OrInLetChain { span: or_span })
}
_ => self.parser.dcx().emit_err(errors::ExpectedExpressionFoundLet {
span,
reason,
missing_let: self.missing_let,
comparison: self.comparison,
}),
_ => {
let guar =
self.parser.dcx().emit_err(errors::ExpectedExpressionFoundLet {
span,
reason,
missing_let: self.missing_let,
comparison: self.comparison,
});
if let Some(_) = self.missing_let {
self.found_incorrect_let_chain = Some(guar);
}
guar
}
};
*recovered = Recovered::Yes(error);
} else if self.depth > 1 {

View file

@ -96,13 +96,10 @@ impl<'a, 'ra, 'tcx> EffectiveVisibilitiesVisitor<'a, 'ra, 'tcx> {
// is the maximum value among visibilities of declarations corresponding to that def id.
for (decl, eff_vis) in visitor.import_effective_visibilities.iter() {
let DeclKind::Import { import, .. } = decl.kind else { unreachable!() };
if !decl.is_ambiguity_recursive() {
if let Some(node_id) = import.id() {
r.effective_visibilities.update_eff_vis(r.local_def_id(node_id), eff_vis, r.tcx)
}
} else if decl.ambiguity.get().is_some()
&& eff_vis.is_public_at_level(Level::Reexported)
{
if let Some(node_id) = import.id() {
r.effective_visibilities.update_eff_vis(r.local_def_id(node_id), eff_vis, r.tcx)
}
if decl.ambiguity.get().is_some() && eff_vis.is_public_at_level(Level::Reexported) {
exported_ambiguities.insert(*decl);
}
}
@ -123,31 +120,13 @@ impl<'a, 'ra, 'tcx> EffectiveVisibilitiesVisitor<'a, 'ra, 'tcx> {
// Set the given effective visibility level to `Level::Direct` and
// sets the rest of the `use` chain to `Level::Reexported` until
// we hit the actual exported item.
//
// If the binding is ambiguous, put the root ambiguity binding and all reexports
// leading to it into the table. They are used by the `ambiguous_glob_reexports`
// lint. For all bindings added to the table this way `is_ambiguity` returns true.
let is_ambiguity =
|decl: Decl<'ra>, warn: bool| decl.ambiguity.get().is_some() && !warn;
let mut parent_id = ParentId::Def(module_id);
let mut warn_ambiguity = decl.warn_ambiguity.get();
while let DeclKind::Import { source_decl, .. } = decl.kind {
self.update_import(decl, parent_id);
if is_ambiguity(decl, warn_ambiguity) {
// Stop at the root ambiguity, further bindings in the chain should not
// be reexported because the root ambiguity blocks any access to them.
// (Those further bindings are most likely not ambiguities themselves.)
break;
}
parent_id = ParentId::Import(decl);
decl = source_decl;
warn_ambiguity |= source_decl.warn_ambiguity.get();
}
if !is_ambiguity(decl, warn_ambiguity)
&& let Some(def_id) = decl.res().opt_def_id().and_then(|id| id.as_local())
{
if let Some(def_id) = decl.res().opt_def_id().and_then(|id| id.as_local()) {
self.update_def(def_id, decl.vis().expect_local(), parent_id);
}
}

View file

@ -19,6 +19,8 @@ pub(crate) fn target() -> Target {
pre_link_args,
post_link_args,
relocation_model: RelocModel::Pic,
crt_static_respected: true,
crt_static_default: true,
panic_strategy: PanicStrategy::Unwind,
no_default_libraries: false,
families: cvs!["unix", "wasm"],

View file

@ -274,9 +274,9 @@ dependencies = [
[[package]]
name = "rustc-demangle"
version = "0.1.26"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
dependencies = [
"rustc-std-workspace-core",
]

View file

@ -21,7 +21,6 @@ compiler_builtins = { path = "../compiler-builtins/compiler-builtins", features
[features]
compiler-builtins-mem = ['compiler_builtins/mem']
compiler-builtins-c = ["compiler_builtins/c"]
compiler-builtins-no-f16-f128 = ["compiler_builtins/no-f16-f128"]
# Choose algorithms that are optimized for binary size instead of runtime performance
optimize_for_size = ["core/optimize_for_size"]

View file

@ -143,7 +143,11 @@ impl<T, A: Allocator> Drain<'_, T, A> {
let new_tail_start = tail_start + additional;
unsafe {
deque.wrap_copy(tail_start, new_tail_start, self.tail_len);
deque.wrap_copy(
deque.to_physical_idx(tail_start),
deque.to_physical_idx(new_tail_start),
self.tail_len,
);
}
self.drain_len += additional;
}

View file

@ -2336,3 +2336,14 @@ fn test_splice_forget() {
std::mem::forget(v.splice(2..4, a));
assert_eq!(v, &[1, 2]);
}
#[test]
fn test_splice_wrapping() {
let mut vec = VecDeque::with_capacity(10);
vec.push_front(7u8);
vec.push_back(9);
vec.splice(1..1, [8]);
assert_eq!(Vec::from(vec), [7, 8, 9]);
}

View file

@ -47,10 +47,6 @@ c = ["dep:cc"]
# the generic versions on all platforms.
no-asm = []
# Workaround for codegen backends which haven't yet implemented `f16` and
# `f128` support. Disabled any intrinsics which use those types.
no-f16-f128 = []
# Flag this library as the unstable compiler-builtins lib
compiler-builtins = []

View file

@ -33,7 +33,6 @@ utest-macros = { git = "https://github.com/japaric/utest" }
default = ["mangled-names"]
c = ["compiler_builtins/c"]
no-asm = ["compiler_builtins/no-asm"]
no-f16-f128 = ["compiler_builtins/no-f16-f128"]
mem = ["compiler_builtins/mem"]
mangled-names = ["compiler_builtins/mangled-names"]
# Skip tests that rely on f128 symbols being available on the system

View file

@ -36,8 +36,6 @@ else
"${test_builtins[@]}" --features c --release
"${test_builtins[@]}" --features no-asm
"${test_builtins[@]}" --features no-asm --release
"${test_builtins[@]}" --features no-f16-f128
"${test_builtins[@]}" --features no-f16-f128 --release
"${test_builtins[@]}" --benches
"${test_builtins[@]}" --benches --release
@ -63,8 +61,6 @@ symcheck+=(-- build-and-check)
"${symcheck[@]}" "$target" -- -p compiler_builtins --features c --release
"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm
"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm --release
"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128
"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128 --release
run_intrinsics_test() {
build_args=(--verbose --manifest-path builtins-test-intrinsics/Cargo.toml)

View file

@ -45,10 +45,6 @@ c = ["dep:cc"]
# the generic versions on all platforms.
no-asm = []
# Workaround for codegen backends which haven't yet implemented `f16` and
# `f128` support. Disabled any intrinsics which use those types.
no-f16-f128 = []
# Flag this library as the unstable compiler-builtins lib
compiler-builtins = []

View file

@ -95,16 +95,13 @@ pub fn configure_aliases(target: &Target) {
* * https://github.com/rust-lang/rustc_codegen_cranelift/blob/c713ffab3c6e28ab4b4dd4e392330f786ea657ad/src/lib.rs#L196-L226
*/
// If the feature is set, disable both of these types.
let no_f16_f128 = target.cargo_features.iter().any(|s| s == "no-f16-f128");
println!("cargo::rustc-check-cfg=cfg(f16_enabled)");
if target.reliable_f16 && !no_f16_f128 {
if target.reliable_f16 {
println!("cargo::rustc-cfg=f16_enabled");
}
println!("cargo::rustc-check-cfg=cfg(f128_enabled)");
if target.reliable_f128 && !no_f16_f128 {
if target.reliable_f128 {
println!("cargo::rustc-cfg=f128_enabled");
}
}

View file

@ -143,16 +143,13 @@ fn emit_f16_f128_cfg(cfg: &Config) {
/* See the compiler-builtins configure file for info about the meaning of these options */
// If the feature is set, disable both of these types.
let no_f16_f128 = cfg.cargo_features.iter().any(|s| s == "no-f16-f128");
println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
if cfg.reliable_f16 && !no_f16_f128 {
if cfg.reliable_f16 {
println!("cargo:rustc-cfg=f16_enabled");
}
println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
if cfg.reliable_f128 && !no_f16_f128 {
if cfg.reliable_f128 {
println!("cargo:rustc-cfg=f128_enabled");
}
}

View file

@ -26,7 +26,7 @@ hashbrown = { version = "0.16.1", default-features = false, features = [
std_detect = { path = "../std_detect", public = true }
# Dependencies of the `backtrace` crate
rustc-demangle = { version = "0.1.24", features = ['rustc-dep-of-std'] }
rustc-demangle = { version = "0.1.27", features = ['rustc-dep-of-std'] }
[target.'cfg(not(all(windows, target_env = "msvc", not(target_vendor = "uwp"))))'.dependencies]
miniz_oxide = { version = "0.8.0", optional = true, default-features = false }
@ -115,7 +115,6 @@ backtrace-trace-only = []
panic-unwind = ["dep:panic_unwind"]
compiler-builtins-c = ["alloc/compiler-builtins-c"]
compiler-builtins-mem = ["alloc/compiler-builtins-mem"]
compiler-builtins-no-f16-f128 = ["alloc/compiler-builtins-no-f16-f128"]
llvm-libunwind = ["unwind/llvm-libunwind"]
system-llvm-libunwind = ["unwind/system-llvm-libunwind"]

View file

@ -70,7 +70,6 @@ cfg_select! {
target_os = "illumos",
target_os = "dragonfly",
target_os = "hurd",
target_os = "fuchsia",
target_os = "vxworks",
target_os = "wasi",
target_vendor = "apple",
@ -131,7 +130,6 @@ cfg_select! {
target_os = "illumos",
target_os = "dragonfly",
target_os = "hurd",
target_os = "fuchsia",
target_os = "vxworks",
target_os = "wasi",
target_vendor = "apple",

View file

@ -542,7 +542,6 @@ pub fn sleep(dur: Duration) {
target_os = "illumos",
target_os = "dragonfly",
target_os = "hurd",
target_os = "fuchsia",
target_os = "vxworks",
target_os = "wasi",
) => {
@ -640,7 +639,6 @@ pub fn sleep(dur: Duration) {
target_os = "illumos",
target_os = "dragonfly",
target_os = "hurd",
target_os = "fuchsia",
target_os = "vxworks",
target_os = "wasi",
))]

View file

@ -316,7 +316,6 @@ pub fn sleep(dur: Duration) {
/// | Illumos | [clock_nanosleep] (Monotonic Clock)] |
/// | Dragonfly | [clock_nanosleep] (Monotonic Clock)] |
/// | Hurd | [clock_nanosleep] (Monotonic Clock)] |
/// | Fuchsia | [clock_nanosleep] (Monotonic Clock)] |
/// | Vxworks | [clock_nanosleep] (Monotonic Clock)] |
/// | Apple | `mach_wait_until` |
/// | Other | `sleep_until` uses [`sleep`] and does not issue a syscall itself |

File diff suppressed because it is too large Load diff

View file

@ -3363,6 +3363,7 @@ unsafe extern "C" {
#[cfg(test)]
mod tests {
use crate::core_arch::assert_eq_const as assert_eq;
use crate::core_arch::simd::*;
use crate::hint::black_box;
use crate::ptr;
use stdarch_test::simd_test;
@ -3458,7 +3459,7 @@ mod tests {
}
#[simd_test(enable = "avx")]
unsafe fn test_mm256_max_pd() {
fn test_mm256_max_pd() {
let a = _mm256_setr_pd(1., 4., 5., 8.);
let b = _mm256_setr_pd(2., 3., 6., 7.);
let r = _mm256_max_pd(a, b);
@ -3468,23 +3469,22 @@ mod tests {
// > value in the second operand (source operand) is returned.
let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
let wu: [u64; 4] = transmute(w);
let xu: [u64; 4] = transmute(x);
assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
assert_eq!(xu, [0u64; 4]);
let wu = _mm256_castpd_si256(w).as_u64x4();
let xu = _mm256_castpd_si256(x).as_u64x4();
assert_eq!(wu, u64x4::splat(0x8000_0000_0000_0000u64));
assert_eq!(xu, u64x4::splat(0u64));
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
// > second operand (source operand), either a NaN or a valid
// > floating-point value, is written to the result.
let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
let yf: [f64; 4] = transmute(y);
let zf: [f64; 4] = transmute(z);
assert_eq!(yf, [0.0; 4]);
assert_eq_m256d(y, _mm256_set1_pd(0.0));
let zf = *z.as_f64x4().as_array();
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
}
#[simd_test(enable = "avx")]
unsafe fn test_mm256_max_ps() {
fn test_mm256_max_ps() {
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_max_ps(a, b);
@ -3494,23 +3494,22 @@ mod tests {
// > value in the second operand (source operand) is returned.
let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
let wu: [u32; 8] = transmute(w);
let xu: [u32; 8] = transmute(x);
assert_eq!(wu, [0x8000_0000u32; 8]);
assert_eq!(xu, [0u32; 8]);
let wu = _mm256_castps_si256(w).as_u32x8();
let xu = _mm256_castps_si256(x).as_u32x8();
assert_eq!(wu, u32x8::splat(0x8000_0000u32));
assert_eq!(xu, u32x8::splat(0u32));
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
// > second operand (source operand), either a NaN or a valid
// > floating-point value, is written to the result.
let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
let yf: [f32; 8] = transmute(y);
let zf: [f32; 8] = transmute(z);
assert_eq!(yf, [0.0; 8]);
assert_eq_m256(y, _mm256_set1_ps(0.0));
let zf = *z.as_f32x8().as_array();
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
}
#[simd_test(enable = "avx")]
unsafe fn test_mm256_min_pd() {
fn test_mm256_min_pd() {
let a = _mm256_setr_pd(1., 4., 5., 8.);
let b = _mm256_setr_pd(2., 3., 6., 7.);
let r = _mm256_min_pd(a, b);
@ -3520,23 +3519,22 @@ mod tests {
// > value in the second operand (source operand) is returned.
let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
let wu: [u64; 4] = transmute(w);
let xu: [u64; 4] = transmute(x);
assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
assert_eq!(xu, [0u64; 4]);
let wu = _mm256_castpd_si256(w).as_u64x4();
let xu = _mm256_castpd_si256(x).as_u64x4();
assert_eq!(wu, u64x4::splat(0x8000_0000_0000_0000u64));
assert_eq!(xu, u64x4::splat(0u64));
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
// > second operand (source operand), either a NaN or a valid
// > floating-point value, is written to the result.
let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
let yf: [f64; 4] = transmute(y);
let zf: [f64; 4] = transmute(z);
assert_eq!(yf, [0.0; 4]);
assert_eq_m256d(y, _mm256_set1_pd(0.0));
let zf = *z.as_f64x4().as_array();
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
}
#[simd_test(enable = "avx")]
unsafe fn test_mm256_min_ps() {
fn test_mm256_min_ps() {
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_min_ps(a, b);
@ -3546,18 +3544,17 @@ mod tests {
// > value in the second operand (source operand) is returned.
let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
let wu: [u32; 8] = transmute(w);
let xu: [u32; 8] = transmute(x);
assert_eq!(wu, [0x8000_0000u32; 8]);
assert_eq!(xu, [0u32; 8]);
let wu = _mm256_castps_si256(w).as_u32x8();
let xu = _mm256_castps_si256(x).as_u32x8();
assert_eq!(wu, u32x8::splat(0x8000_0000u32));
assert_eq!(xu, u32x8::splat(0u32));
// > If only one value is a NaN (SNaN or QNaN) for this instruction, the
// > second operand (source operand), either a NaN or a valid
// > floating-point value, is written to the result.
let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
let yf: [f32; 8] = transmute(y);
let zf: [f32; 8] = transmute(z);
assert_eq!(yf, [0.0; 8]);
assert_eq_m256(y, _mm256_set1_ps(0.0));
let zf = *z.as_f32x8().as_array();
assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
}
@ -4241,183 +4238,203 @@ mod tests {
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_load_pd() {
const fn test_mm256_load_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let p = ptr::addr_of!(a) as *const f64;
let r = _mm256_load_pd(p);
let r = unsafe { _mm256_load_pd(p) };
let e = _mm256_setr_pd(1., 2., 3., 4.);
assert_eq_m256d(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_store_pd() {
const fn test_mm256_store_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let mut r = _mm256_undefined_pd();
_mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
unsafe {
_mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
}
assert_eq_m256d(r, a);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_load_ps() {
const fn test_mm256_load_ps() {
let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
let p = ptr::addr_of!(a) as *const f32;
let r = _mm256_load_ps(p);
let r = unsafe { _mm256_load_ps(p) };
let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_store_ps() {
const fn test_mm256_store_ps() {
let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
let mut r = _mm256_undefined_ps();
_mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
unsafe {
_mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
}
assert_eq_m256(r, a);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_loadu_pd() {
const fn test_mm256_loadu_pd() {
let a = &[1.0f64, 2., 3., 4.];
let p = a.as_ptr();
let r = _mm256_loadu_pd(black_box(p));
let r = unsafe { _mm256_loadu_pd(black_box(p)) };
let e = _mm256_setr_pd(1., 2., 3., 4.);
assert_eq_m256d(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_storeu_pd() {
const fn test_mm256_storeu_pd() {
let a = _mm256_set1_pd(9.);
let mut r = _mm256_undefined_pd();
_mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
unsafe {
_mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
}
assert_eq_m256d(r, a);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_loadu_ps() {
const fn test_mm256_loadu_ps() {
let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
let p = a.as_ptr();
let r = _mm256_loadu_ps(black_box(p));
let r = unsafe { _mm256_loadu_ps(black_box(p)) };
let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_storeu_ps() {
const fn test_mm256_storeu_ps() {
let a = _mm256_set1_ps(9.);
let mut r = _mm256_undefined_ps();
_mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
unsafe {
_mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
}
assert_eq_m256(r, a);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_load_si256() {
const fn test_mm256_load_si256() {
let a = _mm256_setr_epi64x(1, 2, 3, 4);
let p = ptr::addr_of!(a);
let r = _mm256_load_si256(p);
let r = unsafe { _mm256_load_si256(p) };
let e = _mm256_setr_epi64x(1, 2, 3, 4);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_store_si256() {
const fn test_mm256_store_si256() {
let a = _mm256_setr_epi64x(1, 2, 3, 4);
let mut r = _mm256_undefined_si256();
_mm256_store_si256(ptr::addr_of_mut!(r), a);
unsafe {
_mm256_store_si256(ptr::addr_of_mut!(r), a);
}
assert_eq_m256i(r, a);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_loadu_si256() {
const fn test_mm256_loadu_si256() {
let a = _mm256_setr_epi64x(1, 2, 3, 4);
let p = ptr::addr_of!(a);
let r = _mm256_loadu_si256(black_box(p));
let r = unsafe { _mm256_loadu_si256(black_box(p)) };
let e = _mm256_setr_epi64x(1, 2, 3, 4);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_storeu_si256() {
const fn test_mm256_storeu_si256() {
let a = _mm256_set1_epi8(9);
let mut r = _mm256_undefined_si256();
_mm256_storeu_si256(ptr::addr_of_mut!(r), a);
unsafe {
_mm256_storeu_si256(ptr::addr_of_mut!(r), a);
}
assert_eq_m256i(r, a);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_maskload_pd() {
const fn test_mm256_maskload_pd() {
let a = &[1.0f64, 2., 3., 4.];
let p = a.as_ptr();
let mask = _mm256_setr_epi64x(0, !0, 0, !0);
let r = _mm256_maskload_pd(black_box(p), mask);
let r = unsafe { _mm256_maskload_pd(black_box(p), mask) };
let e = _mm256_setr_pd(0., 2., 0., 4.);
assert_eq_m256d(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_maskstore_pd() {
const fn test_mm256_maskstore_pd() {
let mut r = _mm256_set1_pd(0.);
let mask = _mm256_setr_epi64x(0, !0, 0, !0);
let a = _mm256_setr_pd(1., 2., 3., 4.);
_mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
unsafe {
_mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
}
let e = _mm256_setr_pd(0., 2., 0., 4.);
assert_eq_m256d(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm_maskload_pd() {
const fn test_mm_maskload_pd() {
let a = &[1.0f64, 2.];
let p = a.as_ptr();
let mask = _mm_setr_epi64x(0, !0);
let r = _mm_maskload_pd(black_box(p), mask);
let r = unsafe { _mm_maskload_pd(black_box(p), mask) };
let e = _mm_setr_pd(0., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm_maskstore_pd() {
const fn test_mm_maskstore_pd() {
let mut r = _mm_set1_pd(0.);
let mask = _mm_setr_epi64x(0, !0);
let a = _mm_setr_pd(1., 2.);
_mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
unsafe {
_mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
}
let e = _mm_setr_pd(0., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_maskload_ps() {
const fn test_mm256_maskload_ps() {
let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
let p = a.as_ptr();
let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
let r = _mm256_maskload_ps(black_box(p), mask);
let r = unsafe { _mm256_maskload_ps(black_box(p), mask) };
let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_maskstore_ps() {
const fn test_mm256_maskstore_ps() {
let mut r = _mm256_set1_ps(0.);
let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
_mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
unsafe {
_mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
}
let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm_maskload_ps() {
const fn test_mm_maskload_ps() {
let a = &[1.0f32, 2., 3., 4.];
let p = a.as_ptr();
let mask = _mm_setr_epi32(0, !0, 0, !0);
let r = _mm_maskload_ps(black_box(p), mask);
let r = unsafe { _mm_maskload_ps(black_box(p), mask) };
let e = _mm_setr_ps(0., 2., 0., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm_maskstore_ps() {
const fn test_mm_maskstore_ps() {
let mut r = _mm_set1_ps(0.);
let mask = _mm_setr_epi32(0, !0, 0, !0);
let a = _mm_setr_ps(1., 2., 3., 4.);
_mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
unsafe {
_mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
}
let e = _mm_setr_ps(0., 2., 0., 4.);
assert_eq_m128(r, e);
}
@ -4447,7 +4464,7 @@ mod tests {
}
#[simd_test(enable = "avx")]
unsafe fn test_mm256_lddqu_si256() {
fn test_mm256_lddqu_si256() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
@ -4456,7 +4473,7 @@ mod tests {
25, 26, 27, 28, 29, 30, 31, 32,
);
let p = ptr::addr_of!(a);
let r = _mm256_lddqu_si256(black_box(p));
let r = unsafe { _mm256_lddqu_si256(black_box(p)) };
#[rustfmt::skip]
let e = _mm256_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
@ -4469,17 +4486,19 @@ mod tests {
#[simd_test(enable = "avx")]
#[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
unsafe fn test_mm256_stream_si256() {
fn test_mm256_stream_si256() {
let a = _mm256_setr_epi64x(1, 2, 3, 4);
let mut r = _mm256_undefined_si256();
_mm256_stream_si256(ptr::addr_of_mut!(r), a);
unsafe {
_mm256_stream_si256(ptr::addr_of_mut!(r), a);
}
_mm_sfence();
assert_eq_m256i(r, a);
}
#[simd_test(enable = "avx")]
#[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
unsafe fn test_mm256_stream_pd() {
fn test_mm256_stream_pd() {
#[repr(align(32))]
struct Memory {
pub data: [f64; 4],
@ -4487,7 +4506,9 @@ mod tests {
let a = _mm256_set1_pd(7.0);
let mut mem = Memory { data: [-1.0; 4] };
_mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
unsafe {
_mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
}
_mm_sfence();
for i in 0..4 {
assert_eq!(mem.data[i], get_m256d(a, i));
@ -4496,7 +4517,7 @@ mod tests {
#[simd_test(enable = "avx")]
#[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
unsafe fn test_mm256_stream_ps() {
fn test_mm256_stream_ps() {
#[repr(align(32))]
struct Memory {
pub data: [f32; 8],
@ -4504,7 +4525,9 @@ mod tests {
let a = _mm256_set1_ps(7.0);
let mut mem = Memory { data: [-1.0; 8] };
_mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
unsafe {
_mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
}
_mm_sfence();
for i in 0..8 {
assert_eq!(mem.data[i], get_m256(a, i));
@ -5141,29 +5164,29 @@ mod tests {
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_loadu2_m128() {
const fn test_mm256_loadu2_m128() {
let hi = &[5., 6., 7., 8.];
let hiaddr = hi.as_ptr();
let lo = &[1., 2., 3., 4.];
let loaddr = lo.as_ptr();
let r = _mm256_loadu2_m128(hiaddr, loaddr);
let r = unsafe { _mm256_loadu2_m128(hiaddr, loaddr) };
let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_loadu2_m128d() {
const fn test_mm256_loadu2_m128d() {
let hi = &[3., 4.];
let hiaddr = hi.as_ptr();
let lo = &[1., 2.];
let loaddr = lo.as_ptr();
let r = _mm256_loadu2_m128d(hiaddr, loaddr);
let r = unsafe { _mm256_loadu2_m128d(hiaddr, loaddr) };
let e = _mm256_setr_pd(1., 2., 3., 4.);
assert_eq_m256d(r, e);
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_loadu2_m128i() {
const fn test_mm256_loadu2_m128i() {
#[rustfmt::skip]
let hi = _mm_setr_epi8(
17, 18, 19, 20, 21, 22, 23, 24,
@ -5174,7 +5197,9 @@ mod tests {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
);
let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
let r = unsafe {
_mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _)
};
#[rustfmt::skip]
let e = _mm256_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
@ -5186,35 +5211,39 @@ mod tests {
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_storeu2_m128() {
const fn test_mm256_storeu2_m128() {
let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let mut hi = _mm_undefined_ps();
let mut lo = _mm_undefined_ps();
_mm256_storeu2_m128(
ptr::addr_of_mut!(hi) as *mut f32,
ptr::addr_of_mut!(lo) as *mut f32,
a,
);
unsafe {
_mm256_storeu2_m128(
ptr::addr_of_mut!(hi) as *mut f32,
ptr::addr_of_mut!(lo) as *mut f32,
a,
);
}
assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_storeu2_m128d() {
const fn test_mm256_storeu2_m128d() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let mut hi = _mm_undefined_pd();
let mut lo = _mm_undefined_pd();
_mm256_storeu2_m128d(
ptr::addr_of_mut!(hi) as *mut f64,
ptr::addr_of_mut!(lo) as *mut f64,
a,
);
unsafe {
_mm256_storeu2_m128d(
ptr::addr_of_mut!(hi) as *mut f64,
ptr::addr_of_mut!(lo) as *mut f64,
a,
);
}
assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
}
#[simd_test(enable = "avx")]
const unsafe fn test_mm256_storeu2_m128i() {
const fn test_mm256_storeu2_m128i() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
@ -5224,7 +5253,9 @@ mod tests {
);
let mut hi = _mm_undefined_si128();
let mut lo = _mm_undefined_si128();
_mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
unsafe {
_mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
}
#[rustfmt::skip]
let e_hi = _mm_setr_epi8(
17, 18, 19, 20, 21, 22, 23, 24,

View file

@ -4672,81 +4672,89 @@ mod tests {
}
#[simd_test(enable = "avx2")]
const unsafe fn test_mm_maskload_epi32() {
const fn test_mm_maskload_epi32() {
let nums = [1, 2, 3, 4];
let a = &nums as *const i32;
let mask = _mm_setr_epi32(-1, 0, 0, -1);
let r = _mm_maskload_epi32(a, mask);
let r = unsafe { _mm_maskload_epi32(a, mask) };
let e = _mm_setr_epi32(1, 0, 0, 4);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx2")]
const unsafe fn test_mm256_maskload_epi32() {
const fn test_mm256_maskload_epi32() {
let nums = [1, 2, 3, 4, 5, 6, 7, 8];
let a = &nums as *const i32;
let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
let r = _mm256_maskload_epi32(a, mask);
let r = unsafe { _mm256_maskload_epi32(a, mask) };
let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx2")]
const unsafe fn test_mm_maskload_epi64() {
const fn test_mm_maskload_epi64() {
let nums = [1_i64, 2_i64];
let a = &nums as *const i64;
let mask = _mm_setr_epi64x(0, -1);
let r = _mm_maskload_epi64(a, mask);
let r = unsafe { _mm_maskload_epi64(a, mask) };
let e = _mm_setr_epi64x(0, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx2")]
const unsafe fn test_mm256_maskload_epi64() {
const fn test_mm256_maskload_epi64() {
let nums = [1_i64, 2_i64, 3_i64, 4_i64];
let a = &nums as *const i64;
let mask = _mm256_setr_epi64x(0, -1, -1, 0);
let r = _mm256_maskload_epi64(a, mask);
let r = unsafe { _mm256_maskload_epi64(a, mask) };
let e = _mm256_setr_epi64x(0, 2, 3, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx2")]
const unsafe fn test_mm_maskstore_epi32() {
const fn test_mm_maskstore_epi32() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let mut arr = [-1, -1, -1, -1];
let mask = _mm_setr_epi32(-1, 0, 0, -1);
_mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
unsafe {
_mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
}
let e = [1, -1, -1, 4];
assert_eq!(arr, e);
}
#[simd_test(enable = "avx2")]
const unsafe fn test_mm256_maskstore_epi32() {
const fn test_mm256_maskstore_epi32() {
let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
_mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
unsafe {
_mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
}
let e = [1, -1, -1, 42, -1, 6, 7, -1];
assert_eq!(arr, e);
}
#[simd_test(enable = "avx2")]
const unsafe fn test_mm_maskstore_epi64() {
const fn test_mm_maskstore_epi64() {
let a = _mm_setr_epi64x(1_i64, 2_i64);
let mut arr = [-1_i64, -1_i64];
let mask = _mm_setr_epi64x(0, -1);
_mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
unsafe {
_mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
}
let e = [-1, 2];
assert_eq!(arr, e);
}
#[simd_test(enable = "avx2")]
const unsafe fn test_mm256_maskstore_epi64() {
const fn test_mm256_maskstore_epi64() {
let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
let mask = _mm256_setr_epi64x(0, -1, -1, 0);
_mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
unsafe {
_mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
}
let e = [-1, 2, 3, -1];
assert_eq!(arr, e);
}
@ -5301,9 +5309,9 @@ mod tests {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_stream_load_si256() {
fn test_mm256_stream_load_si256() {
let a = _mm256_set_epi64x(5, 6, 7, 8);
let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
let r = unsafe { _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _) };
assert_eq_m256i(a, r);
}
@ -5506,88 +5514,98 @@ mod tests {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_i32gather_epi32() {
fn test_mm_i32gather_epi32() {
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
// A multiplier of 4 is word-addressing
let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
let r = unsafe { _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_mask_i32gather_epi32() {
fn test_mm_mask_i32gather_epi32() {
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
// A multiplier of 4 is word-addressing
let r = _mm_mask_i32gather_epi32::<4>(
_mm_set1_epi32(256),
arr.as_ptr(),
_mm_setr_epi32(0, 16, 64, 96),
_mm_setr_epi32(-1, -1, -1, 0),
);
let r = unsafe {
_mm_mask_i32gather_epi32::<4>(
_mm_set1_epi32(256),
arr.as_ptr(),
_mm_setr_epi32(0, 16, 64, 96),
_mm_setr_epi32(-1, -1, -1, 0),
)
};
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_i32gather_epi32() {
fn test_mm256_i32gather_epi32() {
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
// A multiplier of 4 is word-addressing
let r =
_mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
let r = unsafe {
_mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
};
assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mask_i32gather_epi32() {
fn test_mm256_mask_i32gather_epi32() {
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
// A multiplier of 4 is word-addressing
let r = _mm256_mask_i32gather_epi32::<4>(
_mm256_set1_epi32(256),
arr.as_ptr(),
_mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
_mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
);
let r = unsafe {
_mm256_mask_i32gather_epi32::<4>(
_mm256_set1_epi32(256),
arr.as_ptr(),
_mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
_mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
)
};
assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_i32gather_ps() {
fn test_mm_i32gather_ps() {
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
// A multiplier of 4 is word-addressing for f32s
let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
let r = unsafe { _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_mask_i32gather_ps() {
fn test_mm_mask_i32gather_ps() {
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
// A multiplier of 4 is word-addressing for f32s
let r = _mm_mask_i32gather_ps::<4>(
_mm_set1_ps(256.0),
arr.as_ptr(),
_mm_setr_epi32(0, 16, 64, 96),
_mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
);
let r = unsafe {
_mm_mask_i32gather_ps::<4>(
_mm_set1_ps(256.0),
arr.as_ptr(),
_mm_setr_epi32(0, 16, 64, 96),
_mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
)
};
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_i32gather_ps() {
fn test_mm256_i32gather_ps() {
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
// A multiplier of 4 is word-addressing for f32s
let r =
_mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
let r = unsafe {
_mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
};
assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mask_i32gather_ps() {
fn test_mm256_mask_i32gather_ps() {
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
// A multiplier of 4 is word-addressing for f32s
let r = _mm256_mask_i32gather_ps::<4>(
_mm256_set1_ps(256.0),
arr.as_ptr(),
_mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
_mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
);
let r = unsafe {
_mm256_mask_i32gather_ps::<4>(
_mm256_set1_ps(256.0),
arr.as_ptr(),
_mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
_mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
)
};
assert_eq_m256(
r,
_mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
@ -5595,254 +5613,282 @@ mod tests {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_i32gather_epi64() {
fn test_mm_i32gather_epi64() {
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
// A multiplier of 8 is word-addressing for i64s
let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
let r = unsafe { _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_mask_i32gather_epi64() {
fn test_mm_mask_i32gather_epi64() {
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
// A multiplier of 8 is word-addressing for i64s
let r = _mm_mask_i32gather_epi64::<8>(
_mm_set1_epi64x(256),
arr.as_ptr(),
_mm_setr_epi32(16, 16, 16, 16),
_mm_setr_epi64x(-1, 0),
);
let r = unsafe {
_mm_mask_i32gather_epi64::<8>(
_mm_set1_epi64x(256),
arr.as_ptr(),
_mm_setr_epi32(16, 16, 16, 16),
_mm_setr_epi64x(-1, 0),
)
};
assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_i32gather_epi64() {
fn test_mm256_i32gather_epi64() {
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
// A multiplier of 8 is word-addressing for i64s
let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
let r = unsafe { _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mask_i32gather_epi64() {
fn test_mm256_mask_i32gather_epi64() {
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
// A multiplier of 8 is word-addressing for i64s
let r = _mm256_mask_i32gather_epi64::<8>(
_mm256_set1_epi64x(256),
arr.as_ptr(),
_mm_setr_epi32(0, 16, 64, 96),
_mm256_setr_epi64x(-1, -1, -1, 0),
);
let r = unsafe {
_mm256_mask_i32gather_epi64::<8>(
_mm256_set1_epi64x(256),
arr.as_ptr(),
_mm_setr_epi32(0, 16, 64, 96),
_mm256_setr_epi64x(-1, -1, -1, 0),
)
};
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_i32gather_pd() {
fn test_mm_i32gather_pd() {
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
// A multiplier of 8 is word-addressing for f64s
let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
let r = unsafe { _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_mask_i32gather_pd() {
fn test_mm_mask_i32gather_pd() {
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
// A multiplier of 8 is word-addressing for f64s
let r = _mm_mask_i32gather_pd::<8>(
_mm_set1_pd(256.0),
arr.as_ptr(),
_mm_setr_epi32(16, 16, 16, 16),
_mm_setr_pd(-1.0, 0.0),
);
let r = unsafe {
_mm_mask_i32gather_pd::<8>(
_mm_set1_pd(256.0),
arr.as_ptr(),
_mm_setr_epi32(16, 16, 16, 16),
_mm_setr_pd(-1.0, 0.0),
)
};
assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_i32gather_pd() {
fn test_mm256_i32gather_pd() {
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
// A multiplier of 8 is word-addressing for f64s
let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
let r = unsafe { _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mask_i32gather_pd() {
fn test_mm256_mask_i32gather_pd() {
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
// A multiplier of 8 is word-addressing for f64s
let r = _mm256_mask_i32gather_pd::<8>(
_mm256_set1_pd(256.0),
arr.as_ptr(),
_mm_setr_epi32(0, 16, 64, 96),
_mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
);
let r = unsafe {
_mm256_mask_i32gather_pd::<8>(
_mm256_set1_pd(256.0),
arr.as_ptr(),
_mm_setr_epi32(0, 16, 64, 96),
_mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
)
};
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_i64gather_epi32() {
fn test_mm_i64gather_epi32() {
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
// A multiplier of 4 is word-addressing
let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
let r = unsafe { _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_mask_i64gather_epi32() {
fn test_mm_mask_i64gather_epi32() {
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
// A multiplier of 4 is word-addressing
let r = _mm_mask_i64gather_epi32::<4>(
_mm_set1_epi32(256),
arr.as_ptr(),
_mm_setr_epi64x(0, 16),
_mm_setr_epi32(-1, 0, -1, 0),
);
let r = unsafe {
_mm_mask_i64gather_epi32::<4>(
_mm_set1_epi32(256),
arr.as_ptr(),
_mm_setr_epi64x(0, 16),
_mm_setr_epi32(-1, 0, -1, 0),
)
};
assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_i64gather_epi32() {
fn test_mm256_i64gather_epi32() {
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
// A multiplier of 4 is word-addressing
let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
let r =
unsafe { _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mask_i64gather_epi32() {
fn test_mm256_mask_i64gather_epi32() {
let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
// A multiplier of 4 is word-addressing
let r = _mm256_mask_i64gather_epi32::<4>(
_mm_set1_epi32(256),
arr.as_ptr(),
_mm256_setr_epi64x(0, 16, 64, 96),
_mm_setr_epi32(-1, -1, -1, 0),
);
let r = unsafe {
_mm256_mask_i64gather_epi32::<4>(
_mm_set1_epi32(256),
arr.as_ptr(),
_mm256_setr_epi64x(0, 16, 64, 96),
_mm_setr_epi32(-1, -1, -1, 0),
)
};
assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_i64gather_ps() {
fn test_mm_i64gather_ps() {
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
// A multiplier of 4 is word-addressing for f32s
let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
let r = unsafe { _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_mask_i64gather_ps() {
fn test_mm_mask_i64gather_ps() {
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
// A multiplier of 4 is word-addressing for f32s
let r = _mm_mask_i64gather_ps::<4>(
_mm_set1_ps(256.0),
arr.as_ptr(),
_mm_setr_epi64x(0, 16),
_mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
);
let r = unsafe {
_mm_mask_i64gather_ps::<4>(
_mm_set1_ps(256.0),
arr.as_ptr(),
_mm_setr_epi64x(0, 16),
_mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
)
};
assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_i64gather_ps() {
fn test_mm256_i64gather_ps() {
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
// A multiplier of 4 is word-addressing for f32s
let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
let r =
unsafe { _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mask_i64gather_ps() {
fn test_mm256_mask_i64gather_ps() {
let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
// A multiplier of 4 is word-addressing for f32s
let r = _mm256_mask_i64gather_ps::<4>(
_mm_set1_ps(256.0),
arr.as_ptr(),
_mm256_setr_epi64x(0, 16, 64, 96),
_mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
);
let r = unsafe {
_mm256_mask_i64gather_ps::<4>(
_mm_set1_ps(256.0),
arr.as_ptr(),
_mm256_setr_epi64x(0, 16, 64, 96),
_mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
)
};
assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_i64gather_epi64() {
fn test_mm_i64gather_epi64() {
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
// A multiplier of 8 is word-addressing for i64s
let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
let r = unsafe { _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_mask_i64gather_epi64() {
fn test_mm_mask_i64gather_epi64() {
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
// A multiplier of 8 is word-addressing for i64s
let r = _mm_mask_i64gather_epi64::<8>(
_mm_set1_epi64x(256),
arr.as_ptr(),
_mm_setr_epi64x(16, 16),
_mm_setr_epi64x(-1, 0),
);
let r = unsafe {
_mm_mask_i64gather_epi64::<8>(
_mm_set1_epi64x(256),
arr.as_ptr(),
_mm_setr_epi64x(16, 16),
_mm_setr_epi64x(-1, 0),
)
};
assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_i64gather_epi64() {
fn test_mm256_i64gather_epi64() {
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
// A multiplier of 8 is word-addressing for i64s
let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
let r =
unsafe { _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mask_i64gather_epi64() {
fn test_mm256_mask_i64gather_epi64() {
let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
// A multiplier of 8 is word-addressing for i64s
let r = _mm256_mask_i64gather_epi64::<8>(
_mm256_set1_epi64x(256),
arr.as_ptr(),
_mm256_setr_epi64x(0, 16, 64, 96),
_mm256_setr_epi64x(-1, -1, -1, 0),
);
let r = unsafe {
_mm256_mask_i64gather_epi64::<8>(
_mm256_set1_epi64x(256),
arr.as_ptr(),
_mm256_setr_epi64x(0, 16, 64, 96),
_mm256_setr_epi64x(-1, -1, -1, 0),
)
};
assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_i64gather_pd() {
fn test_mm_i64gather_pd() {
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
// A multiplier of 8 is word-addressing for f64s
let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
let r = unsafe { _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_mask_i64gather_pd() {
fn test_mm_mask_i64gather_pd() {
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
// A multiplier of 8 is word-addressing for f64s
let r = _mm_mask_i64gather_pd::<8>(
_mm_set1_pd(256.0),
arr.as_ptr(),
_mm_setr_epi64x(16, 16),
_mm_setr_pd(-1.0, 0.0),
);
let r = unsafe {
_mm_mask_i64gather_pd::<8>(
_mm_set1_pd(256.0),
arr.as_ptr(),
_mm_setr_epi64x(16, 16),
_mm_setr_pd(-1.0, 0.0),
)
};
assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_i64gather_pd() {
fn test_mm256_i64gather_pd() {
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
// A multiplier of 8 is word-addressing for f64s
let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
let r =
unsafe { _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mask_i64gather_pd() {
fn test_mm256_mask_i64gather_pd() {
let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
// A multiplier of 8 is word-addressing for f64s
let r = _mm256_mask_i64gather_pd::<8>(
_mm256_set1_pd(256.0),
arr.as_ptr(),
_mm256_setr_epi64x(0, 16, 64, 96),
_mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
);
let r = unsafe {
_mm256_mask_i64gather_pd::<8>(
_mm256_set1_pd(256.0),
arr.as_ptr(),
_mm256_setr_epi64x(0, 16, 64, 96),
_mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
)
};
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
}

View file

@ -593,7 +593,7 @@ pub fn _mm_cvtness_sbh(a: f32) -> bf16 {
#[cfg(test)]
mod tests {
use crate::core_arch::simd::u16x4;
use crate::core_arch::simd::{f32x4, f32x8, f32x16, u16x4, u16x8, u16x16, u16x32};
use crate::{
core_arch::x86::*,
mem::{transmute, transmute_copy},
@ -601,13 +601,13 @@ mod tests {
use stdarch_test::simd_test;
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm_cvtne2ps_pbh() {
fn test_mm_cvtne2ps_pbh() {
let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
let a: __m128 = transmute(a_array);
let b: __m128 = transmute(b_array);
let a = f32x4::from_array(a_array).as_m128();
let b = f32x4::from_array(b_array).as_m128();
let c: __m128bh = _mm_cvtne2ps_pbh(a, b);
let result: [u16; 8] = transmute(c.as_u16x8());
let result = *c.as_u16x8().as_array();
#[rustfmt::skip]
let expected_result: [u16; 8] = [
0b1_10000110_0110010,
@ -623,7 +623,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm_mask_cvtne2ps_pbh() {
fn test_mm_mask_cvtne2ps_pbh() {
let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
#[rustfmt::skip]
@ -637,12 +637,12 @@ mod tests {
0b0_10000000_1110000,
0b0_10000100_1001001,
];
let src: __m128bh = transmute(src_array);
let a: __m128 = transmute(a_array);
let b: __m128 = transmute(b_array);
let src = u16x8::from_array(src_array).as_m128bh();
let a = f32x4::from_array(a_array).as_m128();
let b = f32x4::from_array(b_array).as_m128();
let k: __mmask8 = 0b1111_1111;
let c: __m128bh = _mm_mask_cvtne2ps_pbh(src, k, a, b);
let result: [u16; 8] = transmute(c.as_u16x8());
let result = *c.as_u16x8().as_array();
#[rustfmt::skip]
let expected_result: [u16; 8] = [
0b1_10000110_0110010,
@ -657,20 +657,20 @@ mod tests {
assert_eq!(result, expected_result);
let k = 0b0000_0000;
let c = _mm_mask_cvtne2ps_pbh(src, k, a, b);
let result: [u16; 8] = transmute(c.as_u16x8());
let result = *c.as_u16x8().as_array();
let expected_result = src_array;
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm_maskz_cvtne2ps_pbh() {
fn test_mm_maskz_cvtne2ps_pbh() {
let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
let a: __m128 = transmute(a_array);
let b: __m128 = transmute(b_array);
let a = f32x4::from_array(a_array).as_m128();
let b = f32x4::from_array(b_array).as_m128();
let k: __mmask8 = 0b1111_1111;
let c: __m128bh = _mm_maskz_cvtne2ps_pbh(k, a, b);
let result: [u16; 8] = transmute(c.as_u16x8());
let result = *c.as_u16x8().as_array();
#[rustfmt::skip]
let expected_result: [u16; 8] = [
0b1_10000110_0110010,
@ -685,7 +685,7 @@ mod tests {
assert_eq!(result, expected_result);
let k = 0b0011_1100;
let c = _mm_maskz_cvtne2ps_pbh(k, a, b);
let result: [u16; 8] = transmute(c.as_u16x8());
let result = *c.as_u16x8().as_array();
#[rustfmt::skip]
let expected_result: [u16; 8] = [
0,
@ -701,7 +701,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm256_cvtne2ps_pbh() {
fn test_mm256_cvtne2ps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -723,10 +723,10 @@ mod tests {
-1000.158_f32,
-575.575_f32,
];
let a: __m256 = transmute(a_array);
let b: __m256 = transmute(b_array);
let a = f32x8::from_array(a_array).as_m256();
let b = f32x8::from_array(b_array).as_m256();
let c: __m256bh = _mm256_cvtne2ps_pbh(a, b);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
#[rustfmt::skip]
let expected_result: [u16; 16] = [
0b1_10000110_0110010,
@ -750,7 +750,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm256_mask_cvtne2ps_pbh() {
fn test_mm256_mask_cvtne2ps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -790,12 +790,12 @@ mod tests {
0b0_10000000_1110000,
0b0_10000100_1001001,
];
let src: __m256bh = transmute(src_array);
let a: __m256 = transmute(a_array);
let b: __m256 = transmute(b_array);
let src = u16x16::from_array(src_array).as_m256bh();
let a = f32x8::from_array(a_array).as_m256();
let b = f32x8::from_array(b_array).as_m256();
let k: __mmask16 = 0xffff;
let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
#[rustfmt::skip]
let expected_result: [u16; 16] = [
0b1_10000110_0110010,
@ -818,13 +818,13 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask16 = 0;
let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
let expected_result = src_array;
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm256_maskz_cvtne2ps_pbh() {
fn test_mm256_maskz_cvtne2ps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -846,11 +846,11 @@ mod tests {
-1000.158_f32,
-575.575_f32,
];
let a: __m256 = transmute(a_array);
let b: __m256 = transmute(b_array);
let a = f32x8::from_array(a_array).as_m256();
let b = f32x8::from_array(b_array).as_m256();
let k: __mmask16 = 0xffff;
let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
#[rustfmt::skip]
let expected_result: [u16; 16] = [
0b1_10000110_0110010,
@ -873,7 +873,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask16 = 0b0110_1100_0011_0110;
let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
#[rustfmt::skip]
let expected_result: [u16; 16] = [
0,
@ -897,7 +897,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512f")]
unsafe fn test_mm512_cvtne2ps_pbh() {
fn test_mm512_cvtne2ps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -935,10 +935,10 @@ mod tests {
-1000.158_f32,
-575.575_f32,
];
let a: __m512 = transmute(a_array);
let b: __m512 = transmute(b_array);
let a = f32x16::from_array(a_array).as_m512();
let b = f32x16::from_array(b_array).as_m512();
let c: __m512bh = _mm512_cvtne2ps_pbh(a, b);
let result: [u16; 32] = transmute(c.as_u16x32());
let result = *c.as_u16x32().as_array();
#[rustfmt::skip]
let expected_result: [u16; 32] = [
0b1_10000110_0110010,
@ -978,7 +978,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512f")]
unsafe fn test_mm512_mask_cvtne2ps_pbh() {
fn test_mm512_mask_cvtne2ps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -1050,12 +1050,12 @@ mod tests {
0b0_10000000_1110000,
0b0_10000100_1001001,
];
let src: __m512bh = transmute(src_array);
let a: __m512 = transmute(a_array);
let b: __m512 = transmute(b_array);
let src = u16x32::from_array(src_array).as_m512bh();
let a = f32x16::from_array(a_array).as_m512();
let b = f32x16::from_array(b_array).as_m512();
let k: __mmask32 = 0xffffffff;
let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
let result: [u16; 32] = transmute(c.as_u16x32());
let result = *c.as_u16x32().as_array();
#[rustfmt::skip]
let expected_result: [u16; 32] = [
0b1_10000110_0110010,
@ -1094,13 +1094,13 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask32 = 0;
let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
let result: [u16; 32] = transmute(c.as_u16x32());
let result = *c.as_u16x32().as_array();
let expected_result = src_array;
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512f")]
unsafe fn test_mm512_maskz_cvtne2ps_pbh() {
fn test_mm512_maskz_cvtne2ps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -1138,11 +1138,11 @@ mod tests {
-1000.158_f32,
-575.575_f32,
];
let a: __m512 = transmute(a_array);
let b: __m512 = transmute(b_array);
let a = f32x16::from_array(a_array).as_m512();
let b = f32x16::from_array(b_array).as_m512();
let k: __mmask32 = 0xffffffff;
let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
let result: [u16; 32] = transmute(c.as_u16x32());
let result = *c.as_u16x32().as_array();
#[rustfmt::skip]
let expected_result: [u16; 32] = [
0b1_10000110_0110010,
@ -1181,7 +1181,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask32 = 0b1100_1010_1001_0110_1010_0011_0101_0110;
let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
let result: [u16; 32] = transmute(c.as_u16x32());
let result = *c.as_u16x32().as_array();
#[rustfmt::skip]
let expected_result: [u16; 32] = [
0,
@ -1221,7 +1221,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm256_cvtneps_pbh() {
fn test_mm256_cvtneps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -1233,9 +1233,9 @@ mod tests {
1000.158_f32,
575.575_f32,
];
let a: __m256 = transmute(a_array);
let a = f32x8::from_array(a_array).as_m256();
let c: __m128bh = _mm256_cvtneps_pbh(a);
let result: [u16; 8] = transmute(c.as_u16x8());
let result = *c.as_u16x8().as_array();
#[rustfmt::skip]
let expected_result: [u16; 8] = [
0b0_10000110_0110010,
@ -1251,7 +1251,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm256_mask_cvtneps_pbh() {
fn test_mm256_mask_cvtneps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -1273,11 +1273,11 @@ mod tests {
0b1_10001000_1111010,
0b1_10001000_0010000,
];
let src: __m128bh = transmute(src_array);
let a: __m256 = transmute(a_array);
let src = u16x8::from_array(src_array).as_m128bh();
let a = f32x8::from_array(a_array).as_m256();
let k: __mmask8 = 0xff;
let b = _mm256_mask_cvtneps_pbh(src, k, a);
let result: [u16; 8] = transmute(b.as_u16x8());
let result = *b.as_u16x8().as_array();
#[rustfmt::skip]
let expected_result: [u16; 8] = [
0b0_10000110_0110010,
@ -1292,13 +1292,13 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask8 = 0x0;
let b: __m128bh = _mm256_mask_cvtneps_pbh(src, k, a);
let result: [u16; 8] = transmute(b.as_u16x8());
let result = *b.as_u16x8().as_array();
let expected_result: [u16; 8] = src_array;
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm256_maskz_cvtneps_pbh() {
fn test_mm256_maskz_cvtneps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -1310,10 +1310,10 @@ mod tests {
1000.158_f32,
575.575_f32,
];
let a: __m256 = transmute(a_array);
let a = f32x8::from_array(a_array).as_m256();
let k: __mmask8 = 0xff;
let b = _mm256_maskz_cvtneps_pbh(k, a);
let result: [u16; 8] = transmute(b.as_u16x8());
let result = *b.as_u16x8().as_array();
#[rustfmt::skip]
let expected_result: [u16; 8] = [
0b0_10000110_0110010,
@ -1328,14 +1328,14 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask8 = 0x6;
let b: __m128bh = _mm256_maskz_cvtneps_pbh(k, a);
let result: [u16; 8] = transmute(b.as_u16x8());
let result = *b.as_u16x8().as_array();
let expected_result: [u16; 8] =
[0, 0b0_10000010_0101000, 0b0_10000000_1110000, 0, 0, 0, 0, 0];
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512f")]
unsafe fn test_mm512_cvtneps_pbh() {
fn test_mm512_cvtneps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -1355,9 +1355,9 @@ mod tests {
1000.158_f32,
575.575_f32,
];
let a: __m512 = transmute(a_array);
let a = f32x16::from_array(a_array).as_m512();
let c: __m256bh = _mm512_cvtneps_pbh(a);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
#[rustfmt::skip]
let expected_result: [u16; 16] = [
0b0_10000110_0110010,
@ -1381,7 +1381,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512f")]
unsafe fn test_mm512_mask_cvtneps_pbh() {
fn test_mm512_mask_cvtneps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -1419,11 +1419,11 @@ mod tests {
0b1_10001000_1111010,
0b1_10001000_0010000,
];
let src: __m256bh = transmute(src_array);
let a: __m512 = transmute(a_array);
let src = u16x16::from_array(src_array).as_m256bh();
let a = f32x16::from_array(a_array).as_m512();
let k: __mmask16 = 0xffff;
let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
#[rustfmt::skip]
let expected_result: [u16; 16] = [
0b0_10000110_0110010,
@ -1446,13 +1446,13 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask16 = 0;
let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
let expected_result = src_array;
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512f")]
unsafe fn test_mm512_maskz_cvtneps_pbh() {
fn test_mm512_maskz_cvtneps_pbh() {
#[rustfmt::skip]
let a_array = [
178.125_f32,
@ -1472,10 +1472,10 @@ mod tests {
1000.158_f32,
575.575_f32,
];
let a: __m512 = transmute(a_array);
let a = f32x16::from_array(a_array).as_m512();
let k: __mmask16 = 0xffff;
let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
#[rustfmt::skip]
let expected_result: [u16; 16] = [
0b0_10000110_0110010,
@ -1498,7 +1498,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask16 = 0x653a;
let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
let result: [u16; 16] = transmute(c.as_u16x16());
let result = *c.as_u16x16().as_array();
#[rustfmt::skip]
let expected_result: [u16; 16] = [
0,
@ -1522,74 +1522,74 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm_dpbf16_ps() {
fn test_mm_dpbf16_ps() {
let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
let a1: __m128 = transmute(a_array);
let b1: __m128 = transmute(b_array);
let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
let a1 = f32x4::from_array(a_array).as_m128();
let b1 = f32x4::from_array(b_array).as_m128();
let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
let c: __m128 = _mm_dpbf16_ps(src, a, b);
let result: [f32; 4] = transmute(c.as_f32x4());
let result = *c.as_f32x4().as_array();
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm_mask_dpbf16_ps() {
fn test_mm_mask_dpbf16_ps() {
let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
let a1: __m128 = transmute(a_array);
let b1: __m128 = transmute(b_array);
let a1 = f32x4::from_array(a_array).as_m128();
let b1 = f32x4::from_array(b_array).as_m128();
let k: __mmask8 = 0xf3;
let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
let result: [f32; 4] = transmute(c.as_f32x4());
let result = *c.as_f32x4().as_array();
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32];
assert_eq!(result, expected_result);
let k: __mmask8 = 0xff;
let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
let result: [f32; 4] = transmute(c.as_f32x4());
let result = *c.as_f32x4().as_array();
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
assert_eq!(result, expected_result);
let k: __mmask8 = 0;
let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
let result: [f32; 4] = transmute(c.as_f32x4());
let result = *c.as_f32x4().as_array();
let expected_result: [f32; 4] = [1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32];
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm_maskz_dpbf16_ps() {
fn test_mm_maskz_dpbf16_ps() {
let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
let a1: __m128 = transmute(a_array);
let b1: __m128 = transmute(b_array);
let a1 = f32x4::from_array(a_array).as_m128();
let b1 = f32x4::from_array(b_array).as_m128();
let k: __mmask8 = 0xf3;
let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
let result: [f32; 4] = transmute(c.as_f32x4());
let result = *c.as_f32x4().as_array();
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 0.0, 0.0];
assert_eq!(result, expected_result);
let k: __mmask8 = 0xff;
let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
let result: [f32; 4] = transmute(c.as_f32x4());
let result = *c.as_f32x4().as_array();
let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
assert_eq!(result, expected_result);
let k: __mmask8 = 0;
let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
let result: [f32; 4] = transmute(c.as_f32x4());
let result = *c.as_f32x4().as_array();
let expected_result: [f32; 4] = [0.0, 0.0, 0.0, 0.0];
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm256_dpbf16_ps() {
fn test_mm256_dpbf16_ps() {
#[rustfmt::skip]
let a_array = [
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@ -1597,16 +1597,16 @@ mod tests {
let b_array = [
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
];
let a1: __m256 = transmute(a_array);
let b1: __m256 = transmute(b_array);
let a1 = f32x8::from_array(a_array).as_m256();
let b1 = f32x8::from_array(b_array).as_m256();
#[rustfmt::skip]
let src: __m256 = transmute([
let src = f32x8::from_array([
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
]);
]).as_m256();
let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
let c: __m256 = _mm256_dpbf16_ps(src, a, b);
let result: [f32; 8] = transmute(c.as_f32x8());
let result = *c.as_f32x8().as_array();
#[rustfmt::skip]
let expected_result: [f32; 8] = [
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@ -1615,7 +1615,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm256_mask_dpbf16_ps() {
fn test_mm256_mask_dpbf16_ps() {
#[rustfmt::skip]
let a_array = [
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@ -1623,17 +1623,17 @@ mod tests {
let b_array = [
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
];
let a1: __m256 = transmute(a_array);
let b1: __m256 = transmute(b_array);
let a1 = f32x8::from_array(a_array).as_m256();
let b1 = f32x8::from_array(b_array).as_m256();
let k: __mmask8 = 0x33;
#[rustfmt::skip]
let src: __m256 = transmute([
let src = f32x8::from_array([
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
]);
]).as_m256();
let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
let result: [f32; 8] = transmute(c.as_f32x8());
let result = *c.as_f32x8().as_array();
#[rustfmt::skip]
let expected_result: [f32; 8] = [
-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
@ -1641,7 +1641,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask8 = 0xff;
let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
let result: [f32; 8] = transmute(c.as_f32x8());
let result = *c.as_f32x8().as_array();
#[rustfmt::skip]
let expected_result: [f32; 8] = [
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@ -1649,7 +1649,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask8 = 0;
let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
let result: [f32; 8] = transmute(c.as_f32x8());
let result = *c.as_f32x8().as_array();
#[rustfmt::skip]
let expected_result: [f32; 8] = [
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
@ -1658,7 +1658,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm256_maskz_dpbf16_ps() {
fn test_mm256_maskz_dpbf16_ps() {
#[rustfmt::skip]
let a_array = [
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@ -1666,17 +1666,17 @@ mod tests {
let b_array = [
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
];
let a1: __m256 = transmute(a_array);
let b1: __m256 = transmute(b_array);
let a1 = f32x8::from_array(a_array).as_m256();
let b1 = f32x8::from_array(b_array).as_m256();
let k: __mmask8 = 0x33;
#[rustfmt::skip]
let src: __m256 = transmute([
let src = f32x8::from_array([
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
]);
]).as_m256();
let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
let result: [f32; 8] = transmute(c.as_f32x8());
let result = *c.as_f32x8().as_array();
#[rustfmt::skip]
let expected_result: [f32; 8] = [
-18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0,
@ -1684,7 +1684,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask8 = 0xff;
let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
let result: [f32; 8] = transmute(c.as_f32x8());
let result = *c.as_f32x8().as_array();
#[rustfmt::skip]
let expected_result: [f32; 8] = [
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@ -1692,13 +1692,13 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask8 = 0;
let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
let result: [f32; 8] = transmute(c.as_f32x8());
let result = *c.as_f32x8().as_array();
let expected_result: [f32; 8] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
assert_eq!(result, expected_result);
}
#[simd_test(enable = "avx512bf16,avx512f")]
unsafe fn test_mm512_dpbf16_ps() {
fn test_mm512_dpbf16_ps() {
#[rustfmt::skip]
let a_array = [
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@ -1708,16 +1708,17 @@ mod tests {
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
];
let a1: __m512 = transmute(a_array);
let b1: __m512 = transmute(b_array);
let src: __m512 = transmute([
let a1 = f32x16::from_array(a_array).as_m512();
let b1 = f32x16::from_array(b_array).as_m512();
let src = f32x16::from_array([
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
]);
])
.as_m512();
let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
let c: __m512 = _mm512_dpbf16_ps(src, a, b);
let result: [f32; 16] = transmute(c.as_f32x16());
let result = *c.as_f32x16().as_array();
#[rustfmt::skip]
let expected_result: [f32; 16] = [
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@ -1727,7 +1728,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512f")]
unsafe fn test_mm512_mask_dpbf16_ps() {
fn test_mm512_mask_dpbf16_ps() {
#[rustfmt::skip]
let a_array = [
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@ -1737,18 +1738,18 @@ mod tests {
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
];
let a1: __m512 = transmute(a_array);
let b1: __m512 = transmute(b_array);
let a1 = f32x16::from_array(a_array).as_m512();
let b1 = f32x16::from_array(b_array).as_m512();
let k: __mmask16 = 0x3333;
#[rustfmt::skip]
let src: __m512 = transmute([
let src = f32x16::from_array([
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
]);
]).as_m512();
let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
let result: [f32; 16] = transmute(c.as_f32x16());
let result = *c.as_f32x16().as_array();
#[rustfmt::skip]
let expected_result: [f32; 16] = [
-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
@ -1757,7 +1758,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask16 = 0xffff;
let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
let result: [f32; 16] = transmute(c.as_f32x16());
let result = *c.as_f32x16().as_array();
#[rustfmt::skip]
let expected_result: [f32; 16] = [
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@ -1766,7 +1767,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask16 = 0;
let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
let result: [f32; 16] = transmute(c.as_f32x16());
let result = *c.as_f32x16().as_array();
#[rustfmt::skip]
let expected_result: [f32; 16] = [
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
@ -1776,7 +1777,7 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512f")]
unsafe fn test_mm512_maskz_dpbf16_ps() {
fn test_mm512_maskz_dpbf16_ps() {
#[rustfmt::skip]
let a_array = [
8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@ -1786,18 +1787,18 @@ mod tests {
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
];
let a1: __m512 = transmute(a_array);
let b1: __m512 = transmute(b_array);
let a1 = f32x16::from_array(a_array).as_m512();
let b1 = f32x16::from_array(b_array).as_m512();
let k: __mmask16 = 0x3333;
#[rustfmt::skip]
let src: __m512 = transmute([
let src = f32x16::from_array([
1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
]);
]).as_m512();
let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
let result: [f32; 16] = transmute(c.as_f32x16());
let result = *c.as_f32x16().as_array();
#[rustfmt::skip]
let expected_result: [f32; 16] = [
-18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32,
@ -1806,7 +1807,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask16 = 0xffff;
let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
let result: [f32; 16] = transmute(c.as_f32x16());
let result = *c.as_f32x16().as_array();
#[rustfmt::skip]
let expected_result: [f32; 16] = [
-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@ -1815,7 +1816,7 @@ mod tests {
assert_eq!(result, expected_result);
let k: __mmask16 = 0;
let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
let result: [f32; 16] = transmute(c.as_f32x16());
let result = *c.as_f32x16().as_array();
#[rustfmt::skip]
let expected_result: [f32; 16] = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
@ -1943,28 +1944,28 @@ mod tests {
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm_cvtneps_pbh() {
fn test_mm_cvtneps_pbh() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r: u16x4 = transmute_copy(&_mm_cvtneps_pbh(a));
let r: u16x4 = unsafe { transmute_copy(&_mm_cvtneps_pbh(a)) };
let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
assert_eq!(r, e);
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm_mask_cvtneps_pbh() {
fn test_mm_mask_cvtneps_pbh() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let src = __m128bh([5, 6, 7, 8, !0, !0, !0, !0]);
let k = 0b1010;
let r: u16x4 = transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a));
let r: u16x4 = unsafe { transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a)) };
let e = u16x4::new(5, BF16_TWO, 7, BF16_FOUR);
assert_eq!(r, e);
}
#[simd_test(enable = "avx512bf16,avx512vl")]
unsafe fn test_mm_maskz_cvtneps_pbh() {
fn test_mm_maskz_cvtneps_pbh() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let k = 0b1010;
let r: u16x4 = transmute_copy(&_mm_maskz_cvtneps_pbh(k, a));
let r: u16x4 = unsafe { transmute_copy(&_mm_maskz_cvtneps_pbh(k, a)) };
let e = u16x4::new(0, BF16_TWO, 0, BF16_FOUR);
assert_eq!(r, e);
}

View file

@ -17098,37 +17098,37 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_loadu_epi16() {
const fn test_mm512_loadu_epi16() {
#[rustfmt::skip]
let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let r = _mm512_loadu_epi16(&a[0]);
let r = unsafe { _mm512_loadu_epi16(&a[0]) };
#[rustfmt::skip]
let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_loadu_epi16() {
const fn test_mm256_loadu_epi16() {
let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let r = _mm256_loadu_epi16(&a[0]);
let r = unsafe { _mm256_loadu_epi16(&a[0]) };
let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_loadu_epi16() {
const fn test_mm_loadu_epi16() {
let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let r = _mm_loadu_epi16(&a[0]);
let r = unsafe { _mm_loadu_epi16(&a[0]) };
let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_loadu_epi8() {
const fn test_mm512_loadu_epi8() {
#[rustfmt::skip]
let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let r = _mm512_loadu_epi8(&a[0]);
let r = unsafe { _mm512_loadu_epi8(&a[0]) };
#[rustfmt::skip]
let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
@ -17136,73 +17136,85 @@ mod tests {
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_loadu_epi8() {
const fn test_mm256_loadu_epi8() {
#[rustfmt::skip]
let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let r = _mm256_loadu_epi8(&a[0]);
let r = unsafe { _mm256_loadu_epi8(&a[0]) };
#[rustfmt::skip]
let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_loadu_epi8() {
const fn test_mm_loadu_epi8() {
let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let r = _mm_loadu_epi8(&a[0]);
let r = unsafe { _mm_loadu_epi8(&a[0]) };
let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_storeu_epi16() {
const fn test_mm512_storeu_epi16() {
let a = _mm512_set1_epi16(9);
let mut r = _mm512_undefined_epi32();
_mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
unsafe {
_mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
}
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_storeu_epi16() {
const fn test_mm256_storeu_epi16() {
let a = _mm256_set1_epi16(9);
let mut r = _mm256_set1_epi32(0);
_mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
unsafe {
_mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
}
assert_eq_m256i(r, a);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_storeu_epi16() {
const fn test_mm_storeu_epi16() {
let a = _mm_set1_epi16(9);
let mut r = _mm_set1_epi32(0);
_mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
unsafe {
_mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
}
assert_eq_m128i(r, a);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_storeu_epi8() {
const fn test_mm512_storeu_epi8() {
let a = _mm512_set1_epi8(9);
let mut r = _mm512_undefined_epi32();
_mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
unsafe {
_mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
}
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_storeu_epi8() {
const fn test_mm256_storeu_epi8() {
let a = _mm256_set1_epi8(9);
let mut r = _mm256_set1_epi32(0);
_mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
unsafe {
_mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
}
assert_eq_m256i(r, a);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_storeu_epi8() {
const fn test_mm_storeu_epi8() {
let a = _mm_set1_epi8(9);
let mut r = _mm_set1_epi32(0);
_mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
unsafe {
_mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
}
assert_eq_m128i(r, a);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_mask_loadu_epi16() {
const fn test_mm512_mask_loadu_epi16() {
let src = _mm512_set1_epi16(42);
let a = &[
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@ -17210,52 +17222,54 @@ mod tests {
];
let p = a.as_ptr();
let m = 0b10101010_11001100_11101000_11001010;
let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
let r = unsafe { _mm512_mask_loadu_epi16(src, m, black_box(p)) };
let e = &[
42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
];
let e = _mm512_loadu_epi16(e.as_ptr());
let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_maskz_loadu_epi16() {
const fn test_mm512_maskz_loadu_epi16() {
let a = &[
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32,
];
let p = a.as_ptr();
let m = 0b10101010_11001100_11101000_11001010;
let r = _mm512_maskz_loadu_epi16(m, black_box(p));
let r = unsafe { _mm512_maskz_loadu_epi16(m, black_box(p)) };
let e = &[
0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
26, 0, 28, 0, 30, 0, 32,
];
let e = _mm512_loadu_epi16(e.as_ptr());
let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_mask_storeu_epi16() {
const fn test_mm512_mask_storeu_epi16() {
let mut r = [42_i16; 32];
let a = &[
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32,
];
let a = _mm512_loadu_epi16(a.as_ptr());
let a = unsafe { _mm512_loadu_epi16(a.as_ptr()) };
let m = 0b10101010_11001100_11101000_11001010;
_mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
unsafe {
_mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
}
let e = &[
42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
];
let e = _mm512_loadu_epi16(e.as_ptr());
assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
assert_eq_m512i(unsafe { _mm512_loadu_epi16(r.as_ptr()) }, e);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_mask_loadu_epi8() {
const fn test_mm512_mask_loadu_epi8() {
let src = _mm512_set1_epi8(42);
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@ -17264,18 +17278,18 @@ mod tests {
];
let p = a.as_ptr();
let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
let r = unsafe { _mm512_mask_loadu_epi8(src, m, black_box(p)) };
let e = &[
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
];
let e = _mm512_loadu_epi8(e.as_ptr());
let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_maskz_loadu_epi8() {
const fn test_mm512_maskz_loadu_epi8() {
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
@ -17283,77 +17297,81 @@ mod tests {
];
let p = a.as_ptr();
let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
let r = _mm512_maskz_loadu_epi8(m, black_box(p));
let r = unsafe { _mm512_maskz_loadu_epi8(m, black_box(p)) };
let e = &[
0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
];
let e = _mm512_loadu_epi8(e.as_ptr());
let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_mm512_mask_storeu_epi8() {
const fn test_mm512_mask_storeu_epi8() {
let mut r = [42_i8; 64];
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
];
let a = _mm512_loadu_epi8(a.as_ptr());
let a = unsafe { _mm512_loadu_epi8(a.as_ptr()) };
let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
_mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
unsafe {
_mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
}
let e = &[
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
];
let e = _mm512_loadu_epi8(e.as_ptr());
assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
assert_eq_m512i(unsafe { _mm512_loadu_epi8(r.as_ptr()) }, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_mask_loadu_epi16() {
const fn test_mm256_mask_loadu_epi16() {
let src = _mm256_set1_epi16(42);
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
let m = 0b11101000_11001010;
let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
let r = unsafe { _mm256_mask_loadu_epi16(src, m, black_box(p)) };
let e = &[
42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
];
let e = _mm256_loadu_epi16(e.as_ptr());
let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_maskz_loadu_epi16() {
const fn test_mm256_maskz_loadu_epi16() {
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
let m = 0b11101000_11001010;
let r = _mm256_maskz_loadu_epi16(m, black_box(p));
let r = unsafe { _mm256_maskz_loadu_epi16(m, black_box(p)) };
let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
let e = _mm256_loadu_epi16(e.as_ptr());
let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_mask_storeu_epi16() {
const fn test_mm256_mask_storeu_epi16() {
let mut r = [42_i16; 16];
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let a = _mm256_loadu_epi16(a.as_ptr());
let a = unsafe { _mm256_loadu_epi16(a.as_ptr()) };
let m = 0b11101000_11001010;
_mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
unsafe {
_mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
}
let e = &[
42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
];
let e = _mm256_loadu_epi16(e.as_ptr());
assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
assert_eq_m256i(unsafe { _mm256_loadu_epi16(r.as_ptr()) }, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_mask_loadu_epi8() {
const fn test_mm256_mask_loadu_epi8() {
let src = _mm256_set1_epi8(42);
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@ -17361,122 +17379,124 @@ mod tests {
];
let p = a.as_ptr();
let m = 0b10101010_11001100_11101000_11001010;
let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
let r = unsafe { _mm256_mask_loadu_epi8(src, m, black_box(p)) };
let e = &[
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
];
let e = _mm256_loadu_epi8(e.as_ptr());
let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_maskz_loadu_epi8() {
const fn test_mm256_maskz_loadu_epi8() {
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32,
];
let p = a.as_ptr();
let m = 0b10101010_11001100_11101000_11001010;
let r = _mm256_maskz_loadu_epi8(m, black_box(p));
let r = unsafe { _mm256_maskz_loadu_epi8(m, black_box(p)) };
let e = &[
0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
26, 0, 28, 0, 30, 0, 32,
];
let e = _mm256_loadu_epi8(e.as_ptr());
let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm256_mask_storeu_epi8() {
const fn test_mm256_mask_storeu_epi8() {
let mut r = [42_i8; 32];
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32,
];
let a = _mm256_loadu_epi8(a.as_ptr());
let a = unsafe { _mm256_loadu_epi8(a.as_ptr()) };
let m = 0b10101010_11001100_11101000_11001010;
_mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
unsafe {
_mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
}
let e = &[
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
];
let e = _mm256_loadu_epi8(e.as_ptr());
assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
assert_eq_m256i(unsafe { _mm256_loadu_epi8(r.as_ptr()) }, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_mask_loadu_epi16() {
const fn test_mm_mask_loadu_epi16() {
let src = _mm_set1_epi16(42);
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
let p = a.as_ptr();
let m = 0b11001010;
let r = _mm_mask_loadu_epi16(src, m, black_box(p));
let r = unsafe { _mm_mask_loadu_epi16(src, m, black_box(p)) };
let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
let e = _mm_loadu_epi16(e.as_ptr());
let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_maskz_loadu_epi16() {
const fn test_mm_maskz_loadu_epi16() {
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
let p = a.as_ptr();
let m = 0b11001010;
let r = _mm_maskz_loadu_epi16(m, black_box(p));
let r = unsafe { _mm_maskz_loadu_epi16(m, black_box(p)) };
let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
let e = _mm_loadu_epi16(e.as_ptr());
let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_mask_storeu_epi16() {
const fn test_mm_mask_storeu_epi16() {
let mut r = [42_i16; 8];
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
let a = _mm_loadu_epi16(a.as_ptr());
let a = unsafe { _mm_loadu_epi16(a.as_ptr()) };
let m = 0b11001010;
_mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
unsafe { _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a) };
let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
let e = _mm_loadu_epi16(e.as_ptr());
assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
assert_eq_m128i(unsafe { _mm_loadu_epi16(r.as_ptr()) }, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_mask_loadu_epi8() {
const fn test_mm_mask_loadu_epi8() {
let src = _mm_set1_epi8(42);
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
let m = 0b11101000_11001010;
let r = _mm_mask_loadu_epi8(src, m, black_box(p));
let r = unsafe { _mm_mask_loadu_epi8(src, m, black_box(p)) };
let e = &[
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
];
let e = _mm_loadu_epi8(e.as_ptr());
let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_maskz_loadu_epi8() {
const fn test_mm_maskz_loadu_epi8() {
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
let m = 0b11101000_11001010;
let r = _mm_maskz_loadu_epi8(m, black_box(p));
let r = unsafe { _mm_maskz_loadu_epi8(m, black_box(p)) };
let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
let e = _mm_loadu_epi8(e.as_ptr());
let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
const unsafe fn test_mm_mask_storeu_epi8() {
const fn test_mm_mask_storeu_epi8() {
let mut r = [42_i8; 16];
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let a = _mm_loadu_epi8(a.as_ptr());
let a = unsafe { _mm_loadu_epi8(a.as_ptr()) };
let m = 0b11101000_11001010;
_mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
unsafe { _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a) };
let e = &[
42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
];
let e = _mm_loadu_epi8(e.as_ptr());
assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
assert_eq_m128i(unsafe { _mm_loadu_epi8(r.as_ptr()) }, e);
}
#[simd_test(enable = "avx512bw")]
@ -20714,36 +20734,40 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_store_mask64() {
const fn test_store_mask64() {
let a: __mmask64 =
0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
let mut r = 0;
_store_mask64(&mut r, a);
unsafe {
_store_mask64(&mut r, a);
}
assert_eq!(r, a);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_store_mask32() {
const fn test_store_mask32() {
let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
let mut r = 0;
_store_mask32(&mut r, a);
unsafe {
_store_mask32(&mut r, a);
}
assert_eq!(r, a);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_load_mask64() {
const fn test_load_mask64() {
let p: __mmask64 =
0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
let r = _load_mask64(&p);
let r = unsafe { _load_mask64(&p) };
let e: __mmask64 =
0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_load_mask32() {
const fn test_load_mask32() {
let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
let r = _load_mask32(&p);
let r = unsafe { _load_mask32(&p) };
let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
assert_eq!(r, e);
}
@ -21163,21 +21187,21 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_kortest_mask32_u8() {
const fn test_kortest_mask32_u8() {
let a: __mmask32 = 0b0110100101101001_0110100101101001;
let b: __mmask32 = 0b1011011010110110_1011011010110110;
let mut all_ones: u8 = 0;
let r = _kortest_mask32_u8(a, b, &mut all_ones);
let r = unsafe { _kortest_mask32_u8(a, b, &mut all_ones) };
assert_eq!(r, 0);
assert_eq!(all_ones, 1);
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_kortest_mask64_u8() {
const fn test_kortest_mask64_u8() {
let a: __mmask64 = 0b0110100101101001_0110100101101001;
let b: __mmask64 = 0b1011011010110110_1011011010110110;
let mut all_ones: u8 = 0;
let r = _kortest_mask64_u8(a, b, &mut all_ones);
let r = unsafe { _kortest_mask64_u8(a, b, &mut all_ones) };
assert_eq!(r, 0);
assert_eq!(all_ones, 0);
}
@ -21299,11 +21323,11 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_ktest_mask32_u8() {
const fn test_ktest_mask32_u8() {
let a: __mmask32 = 0b0110100100111100_0110100100111100;
let b: __mmask32 = 0b1001011011000011_1001011011000011;
let mut and_not: u8 = 0;
let r = _ktest_mask32_u8(a, b, &mut and_not);
let r = unsafe { _ktest_mask32_u8(a, b, &mut and_not) };
assert_eq!(r, 1);
assert_eq!(and_not, 0);
}
@ -21325,11 +21349,11 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
const unsafe fn test_ktest_mask64_u8() {
const fn test_ktest_mask64_u8() {
let a: __mmask64 = 0b0110100100111100_0110100100111100;
let b: __mmask64 = 0b1001011011000011_1001011011000011;
let mut and_not: u8 = 0;
let r = _ktest_mask64_u8(a, b, &mut and_not);
let r = unsafe { _ktest_mask64_u8(a, b, &mut and_not) };
assert_eq!(r, 1);
assert_eq!(and_not, 0);
}
@ -21951,32 +21975,38 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
fn test_mm512_mask_cvtsepi16_storeu_epi8() {
let a = _mm512_set1_epi16(i16::MAX);
let mut r = _mm256_undefined_si256();
_mm512_mask_cvtsepi16_storeu_epi8(
&mut r as *mut _ as *mut i8,
0b11111111_11111111_11111111_11111111,
a,
);
unsafe {
_mm512_mask_cvtsepi16_storeu_epi8(
&mut r as *mut _ as *mut i8,
0b11111111_11111111_11111111_11111111,
a,
);
}
let e = _mm256_set1_epi8(i8::MAX);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
fn test_mm256_mask_cvtsepi16_storeu_epi8() {
let a = _mm256_set1_epi16(i16::MAX);
let mut r = _mm_undefined_si128();
_mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
unsafe {
_mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
}
let e = _mm_set1_epi8(i8::MAX);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
fn test_mm_mask_cvtsepi16_storeu_epi8() {
let a = _mm_set1_epi16(i16::MAX);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
unsafe {
_mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
}
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0, 0, 0, 0, 0,
@ -21986,63 +22016,75 @@ mod tests {
}
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
fn test_mm512_mask_cvtepi16_storeu_epi8() {
let a = _mm512_set1_epi16(8);
let mut r = _mm256_undefined_si256();
_mm512_mask_cvtepi16_storeu_epi8(
&mut r as *mut _ as *mut i8,
0b11111111_11111111_11111111_11111111,
a,
);
unsafe {
_mm512_mask_cvtepi16_storeu_epi8(
&mut r as *mut _ as *mut i8,
0b11111111_11111111_11111111_11111111,
a,
);
}
let e = _mm256_set1_epi8(8);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
fn test_mm256_mask_cvtepi16_storeu_epi8() {
let a = _mm256_set1_epi16(8);
let mut r = _mm_undefined_si128();
_mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
unsafe {
_mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
}
let e = _mm_set1_epi8(8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
fn test_mm_mask_cvtepi16_storeu_epi8() {
let a = _mm_set1_epi16(8);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
unsafe {
_mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
}
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
fn test_mm512_mask_cvtusepi16_storeu_epi8() {
let a = _mm512_set1_epi16(i16::MAX);
let mut r = _mm256_undefined_si256();
_mm512_mask_cvtusepi16_storeu_epi8(
&mut r as *mut _ as *mut i8,
0b11111111_11111111_11111111_11111111,
a,
);
unsafe {
_mm512_mask_cvtusepi16_storeu_epi8(
&mut r as *mut _ as *mut i8,
0b11111111_11111111_11111111_11111111,
a,
);
}
let e = _mm256_set1_epi8(u8::MAX as i8);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
fn test_mm256_mask_cvtusepi16_storeu_epi8() {
let a = _mm256_set1_epi16(i16::MAX);
let mut r = _mm_undefined_si128();
_mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
unsafe {
_mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
}
let e = _mm_set1_epi8(u8::MAX as i8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
fn test_mm_mask_cvtusepi16_storeu_epi8() {
let a = _mm_set1_epi16(i16::MAX);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
unsafe {
_mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
}
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,

View file

@ -7401,27 +7401,25 @@ unsafe extern "C" {
mod tests {
use super::*;
use crate::core_arch::assert_eq_const as assert_eq;
use crate::core_arch::x86::*;
use stdarch_test::simd_test;
use crate::core_arch::x86::*;
use crate::mem::transmute;
const OPRND1_64: f64 = f64::from_bits(0x3333333333333333);
const OPRND2_64: f64 = f64::from_bits(0x5555555555555555);
const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) };
const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) };
const AND_64: f64 = f64::from_bits(0x1111111111111111);
const ANDN_64: f64 = f64::from_bits(0x4444444444444444);
const OR_64: f64 = f64::from_bits(0x7777777777777777);
const XOR_64: f64 = f64::from_bits(0x6666666666666666);
const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) };
const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) };
const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) };
const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) };
const OPRND1_32: f32 = f32::from_bits(0x33333333);
const OPRND2_32: f32 = f32::from_bits(0x55555555);
const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) };
const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) };
const AND_32: f32 = unsafe { transmute(0x11111111_u32) };
const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) };
const OR_32: f32 = unsafe { transmute(0x77777777_u32) };
const XOR_32: f32 = unsafe { transmute(0x66666666_u32) };
const AND_32: f32 = f32::from_bits(0x11111111);
const ANDN_32: f32 = f32::from_bits(0x44444444);
const OR_32: f32 = f32::from_bits(0x77777777);
const XOR_32: f32 = f32::from_bits(0x66666666);
#[simd_test(enable = "avx512dq,avx512vl")]
const fn test_mm_mask_and_pd() {
@ -10023,11 +10021,11 @@ mod tests {
}
#[simd_test(enable = "avx512dq")]
const unsafe fn test_kortest_mask8_u8() {
const fn test_kortest_mask8_u8() {
let a: __mmask8 = 0b01101001;
let b: __mmask8 = 0b10110110;
let mut all_ones: u8 = 0;
let r = _kortest_mask8_u8(a, b, &mut all_ones);
let r = unsafe { _kortest_mask8_u8(a, b, &mut all_ones) };
assert_eq!(r, 0);
assert_eq!(all_ones, 1);
}
@ -10049,7 +10047,7 @@ mod tests {
}
#[simd_test(enable = "avx512dq")]
const unsafe fn test_kshiftli_mask8() {
const fn test_kshiftli_mask8() {
let a: __mmask8 = 0b01101001;
let r = _kshiftli_mask8::<3>(a);
let e: __mmask8 = 0b01001000;
@ -10089,11 +10087,11 @@ mod tests {
}
#[simd_test(enable = "avx512dq")]
const unsafe fn test_ktest_mask8_u8() {
const fn test_ktest_mask8_u8() {
let a: __mmask8 = 0b01101001;
let b: __mmask8 = 0b10010110;
let mut and_not: u8 = 0;
let r = _ktest_mask8_u8(a, b, &mut and_not);
let r = unsafe { _ktest_mask8_u8(a, b, &mut and_not) };
assert_eq!(r, 1);
assert_eq!(and_not, 0);
}
@ -10115,11 +10113,11 @@ mod tests {
}
#[simd_test(enable = "avx512dq")]
const unsafe fn test_ktest_mask16_u8() {
const fn test_ktest_mask16_u8() {
let a: __mmask16 = 0b0110100100111100;
let b: __mmask16 = 0b1001011011000011;
let mut and_not: u8 = 0;
let r = _ktest_mask16_u8(a, b, &mut and_not);
let r = unsafe { _ktest_mask16_u8(a, b, &mut and_not) };
assert_eq!(r, 1);
assert_eq!(and_not, 0);
}
@ -10141,18 +10139,20 @@ mod tests {
}
#[simd_test(enable = "avx512dq")]
const unsafe fn test_load_mask8() {
const fn test_load_mask8() {
let a: __mmask8 = 0b01101001;
let r = _load_mask8(&a);
let r = unsafe { _load_mask8(&a) };
let e: __mmask8 = 0b01101001;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512dq")]
const unsafe fn test_store_mask8() {
const fn test_store_mask8() {
let a: __mmask8 = 0b01101001;
let mut r = 0;
_store_mask8(&mut r, a);
unsafe {
_store_mask8(&mut r, a);
}
let e: __mmask8 = 0b01101001;
assert_eq!(r, e);
}

File diff suppressed because it is too large Load diff

View file

@ -16932,7 +16932,6 @@ unsafe extern "C" {
mod tests {
use crate::core_arch::assert_eq_const as assert_eq;
use crate::core_arch::x86::*;
use crate::mem::transmute;
use crate::ptr::{addr_of, addr_of_mut};
use stdarch_test::simd_test;
@ -17569,72 +17568,72 @@ mod tests {
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm_load_ph() {
const fn test_mm_load_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
let b = _mm_load_ph(addr_of!(a).cast());
let b = unsafe { _mm_load_ph(addr_of!(a).cast()) };
assert_eq_m128h(a, b);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm256_load_ph() {
const fn test_mm256_load_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
let b = _mm256_load_ph(addr_of!(a).cast());
let b = unsafe { _mm256_load_ph(addr_of!(a).cast()) };
assert_eq_m256h(a, b);
}
#[simd_test(enable = "avx512fp16")]
const unsafe fn test_mm512_load_ph() {
const fn test_mm512_load_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
let b = _mm512_load_ph(addr_of!(a).cast());
let b = unsafe { _mm512_load_ph(addr_of!(a).cast()) };
assert_eq_m512h(a, b);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm_load_sh() {
const fn test_mm_load_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_load_sh(addr_of!(a).cast());
let b = unsafe { _mm_load_sh(addr_of!(a).cast()) };
assert_eq_m128h(a, b);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm_mask_load_sh() {
fn test_mm_mask_load_sh() {
let a = _mm_set_sh(1.0);
let src = _mm_set_sh(2.);
let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
let b = unsafe { _mm_mask_load_sh(src, 1, addr_of!(a).cast()) };
assert_eq_m128h(a, b);
let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
let b = unsafe { _mm_mask_load_sh(src, 0, addr_of!(a).cast()) };
assert_eq_m128h(src, b);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm_maskz_load_sh() {
fn test_mm_maskz_load_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
let b = unsafe { _mm_maskz_load_sh(1, addr_of!(a).cast()) };
assert_eq_m128h(a, b);
let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
let b = unsafe { _mm_maskz_load_sh(0, addr_of!(a).cast()) };
assert_eq_m128h(_mm_setzero_ph(), b);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm_loadu_ph() {
const fn test_mm_loadu_ph() {
let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let r = _mm_loadu_ph(array.as_ptr());
let r = unsafe { _mm_loadu_ph(array.as_ptr()) };
let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm256_loadu_ph() {
const fn test_mm256_loadu_ph() {
let array = [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
];
let r = _mm256_loadu_ph(array.as_ptr());
let r = unsafe { _mm256_loadu_ph(array.as_ptr()) };
let e = _mm256_setr_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
@ -17642,13 +17641,13 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
const unsafe fn test_mm512_loadu_ph() {
const fn test_mm512_loadu_ph() {
let array = [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
];
let r = _mm512_loadu_ph(array.as_ptr());
let r = unsafe { _mm512_loadu_ph(array.as_ptr()) };
let e = _mm512_setr_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@ -17686,81 +17685,99 @@ mod tests {
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm_store_ph() {
const fn test_mm_store_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
let mut b = _mm_setzero_ph();
_mm_store_ph(addr_of_mut!(b).cast(), a);
unsafe {
_mm_store_ph(addr_of_mut!(b).cast(), a);
}
assert_eq_m128h(a, b);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm256_store_ph() {
const fn test_mm256_store_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
let mut b = _mm256_setzero_ph();
_mm256_store_ph(addr_of_mut!(b).cast(), a);
unsafe {
_mm256_store_ph(addr_of_mut!(b).cast(), a);
}
assert_eq_m256h(a, b);
}
#[simd_test(enable = "avx512fp16")]
const unsafe fn test_mm512_store_ph() {
const fn test_mm512_store_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
let mut b = _mm512_setzero_ph();
_mm512_store_ph(addr_of_mut!(b).cast(), a);
unsafe {
_mm512_store_ph(addr_of_mut!(b).cast(), a);
}
assert_eq_m512h(a, b);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm_store_sh() {
const fn test_mm_store_sh() {
let a = _mm_set_sh(1.0);
let mut b = _mm_setzero_ph();
_mm_store_sh(addr_of_mut!(b).cast(), a);
unsafe {
_mm_store_sh(addr_of_mut!(b).cast(), a);
}
assert_eq_m128h(a, b);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm_mask_store_sh() {
fn test_mm_mask_store_sh() {
let a = _mm_set_sh(1.0);
let mut b = _mm_setzero_ph();
_mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
unsafe {
_mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
}
assert_eq_m128h(_mm_setzero_ph(), b);
_mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
unsafe {
_mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
}
assert_eq_m128h(a, b);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm_storeu_ph() {
const fn test_mm_storeu_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
let mut array = [0.0; 8];
_mm_storeu_ph(array.as_mut_ptr(), a);
assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
unsafe {
_mm_storeu_ph(array.as_mut_ptr(), a);
}
assert_eq_m128h(a, unsafe { _mm_loadu_ph(array.as_ptr()) });
}
#[simd_test(enable = "avx512fp16,avx512vl")]
const unsafe fn test_mm256_storeu_ph() {
const fn test_mm256_storeu_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
let mut array = [0.0; 16];
_mm256_storeu_ph(array.as_mut_ptr(), a);
assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
unsafe {
_mm256_storeu_ph(array.as_mut_ptr(), a);
}
assert_eq_m256h(a, unsafe { _mm256_loadu_ph(array.as_ptr()) });
}
#[simd_test(enable = "avx512fp16")]
const unsafe fn test_mm512_storeu_ph() {
const fn test_mm512_storeu_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
let mut array = [0.0; 32];
_mm512_storeu_ph(array.as_mut_ptr(), a);
assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
unsafe {
_mm512_storeu_ph(array.as_mut_ptr(), a);
}
assert_eq_m512h(a, unsafe { _mm512_loadu_ph(array.as_ptr()) });
}
#[simd_test(enable = "avx512fp16,avx512vl")]
@ -23993,16 +24010,16 @@ mod tests {
#[simd_test(enable = "avx512fp16,avx512vl")]
const fn test_mm256_reduce_mul_ph() {
let a = _mm256_set1_ph(2.0);
let a = _mm256_set1_ph(1.2);
let r = _mm256_reduce_mul_ph(a);
assert_eq!(r, 65536.0);
assert_eq!(r, 18.5);
}
#[simd_test(enable = "avx512fp16")]
const fn test_mm512_reduce_mul_ph() {
let a = _mm512_set1_ph(2.0);
let a = _mm512_set1_ph(1.2);
let r = _mm512_reduce_mul_ph(a);
assert_eq!(r, 16777216.0);
assert_eq!(r, 342.3);
}
#[simd_test(enable = "avx512fp16,avx512vl")]

View file

@ -3932,7 +3932,7 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2")]
unsafe fn test_mm512_mask_expandloadu_epi16() {
fn test_mm512_mask_expandloadu_epi16() {
let src = _mm512_set1_epi16(42);
let a = &[
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@ -3940,7 +3940,7 @@ mod tests {
];
let p = a.as_ptr();
let m = 0b11101000_11001010_11110000_00001111;
let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
let r = unsafe { _mm512_mask_expandloadu_epi16(src, m, black_box(p)) };
let e = _mm512_set_epi16(
16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
42, 42, 42, 42, 42, 4, 3, 2, 1,
@ -3949,14 +3949,14 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2")]
unsafe fn test_mm512_maskz_expandloadu_epi16() {
fn test_mm512_maskz_expandloadu_epi16() {
let a = &[
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32,
];
let p = a.as_ptr();
let m = 0b11101000_11001010_11110000_00001111;
let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
let r = unsafe { _mm512_maskz_expandloadu_epi16(m, black_box(p)) };
let e = _mm512_set_epi16(
16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
0, 4, 3, 2, 1,
@ -3965,49 +3965,49 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm256_mask_expandloadu_epi16() {
fn test_mm256_mask_expandloadu_epi16() {
let src = _mm256_set1_epi16(42);
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
let m = 0b11101000_11001010;
let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
let r = unsafe { _mm256_mask_expandloadu_epi16(src, m, black_box(p)) };
let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm256_maskz_expandloadu_epi16() {
fn test_mm256_maskz_expandloadu_epi16() {
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
let m = 0b11101000_11001010;
let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
let r = unsafe { _mm256_maskz_expandloadu_epi16(m, black_box(p)) };
let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm_mask_expandloadu_epi16() {
fn test_mm_mask_expandloadu_epi16() {
let src = _mm_set1_epi16(42);
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
let p = a.as_ptr();
let m = 0b11101000;
let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
let r = unsafe { _mm_mask_expandloadu_epi16(src, m, black_box(p)) };
let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm_maskz_expandloadu_epi16() {
fn test_mm_maskz_expandloadu_epi16() {
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
let p = a.as_ptr();
let m = 0b11101000;
let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
let r = unsafe { _mm_maskz_expandloadu_epi16(m, black_box(p)) };
let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512vbmi2")]
unsafe fn test_mm512_mask_expandloadu_epi8() {
fn test_mm512_mask_expandloadu_epi8() {
let src = _mm512_set1_epi8(42);
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@ -4016,7 +4016,7 @@ mod tests {
];
let p = a.as_ptr();
let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
let r = unsafe { _mm512_mask_expandloadu_epi8(src, m, black_box(p)) };
let e = _mm512_set_epi8(
32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42,
42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42,
@ -4026,7 +4026,7 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2")]
unsafe fn test_mm512_maskz_expandloadu_epi8() {
fn test_mm512_maskz_expandloadu_epi8() {
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
@ -4034,7 +4034,7 @@ mod tests {
];
let p = a.as_ptr();
let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
let r = unsafe { _mm512_maskz_expandloadu_epi8(m, black_box(p)) };
let e = _mm512_set_epi8(
32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0,
0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0,
@ -4044,7 +4044,7 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm256_mask_expandloadu_epi8() {
fn test_mm256_mask_expandloadu_epi8() {
let src = _mm256_set1_epi8(42);
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@ -4052,7 +4052,7 @@ mod tests {
];
let p = a.as_ptr();
let m = 0b11101000_11001010_11110000_00001111;
let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
let r = unsafe { _mm256_mask_expandloadu_epi8(src, m, black_box(p)) };
let e = _mm256_set_epi8(
16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
42, 42, 42, 42, 42, 4, 3, 2, 1,
@ -4061,14 +4061,14 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm256_maskz_expandloadu_epi8() {
fn test_mm256_maskz_expandloadu_epi8() {
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32,
];
let p = a.as_ptr();
let m = 0b11101000_11001010_11110000_00001111;
let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
let r = unsafe { _mm256_maskz_expandloadu_epi8(m, black_box(p)) };
let e = _mm256_set_epi8(
16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
0, 4, 3, 2, 1,
@ -4077,36 +4077,44 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm_mask_expandloadu_epi8() {
fn test_mm_mask_expandloadu_epi8() {
let src = _mm_set1_epi8(42);
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
let m = 0b11101000_11001010;
let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
let r = unsafe { _mm_mask_expandloadu_epi8(src, m, black_box(p)) };
let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm_maskz_expandloadu_epi8() {
fn test_mm_maskz_expandloadu_epi8() {
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
let m = 0b11101000_11001010;
let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
let r = unsafe { _mm_maskz_expandloadu_epi8(m, black_box(p)) };
let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512vbmi2")]
unsafe fn test_mm512_mask_compressstoreu_epi16() {
fn test_mm512_mask_compressstoreu_epi16() {
let a = _mm512_set_epi16(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
);
let mut r = [0_i16; 32];
_mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
unsafe {
_mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
}
assert_eq!(&r, &[0_i16; 32]);
_mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
unsafe {
_mm512_mask_compressstoreu_epi16(
r.as_mut_ptr(),
0b11110000_11001010_11111111_00000000,
a,
);
}
assert_eq!(
&r,
&[
@ -4117,40 +4125,52 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm256_mask_compressstoreu_epi16() {
fn test_mm256_mask_compressstoreu_epi16() {
let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
let mut r = [0_i16; 16];
_mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
unsafe {
_mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
}
assert_eq!(&r, &[0_i16; 16]);
_mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a);
unsafe {
_mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a);
}
assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm_mask_compressstoreu_epi16() {
fn test_mm_mask_compressstoreu_epi16() {
let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
let mut r = [0_i16; 8];
_mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
unsafe {
_mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
}
assert_eq!(&r, &[0_i16; 8]);
_mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a);
unsafe {
_mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a);
}
assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]);
}
#[simd_test(enable = "avx512vbmi2")]
unsafe fn test_mm512_mask_compressstoreu_epi8() {
fn test_mm512_mask_compressstoreu_epi8() {
let a = _mm512_set_epi8(
64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43,
42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21,
20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
);
let mut r = [0_i8; 64];
_mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
unsafe {
_mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
}
assert_eq!(&r, &[0_i8; 64]);
_mm512_mask_compressstoreu_epi8(
r.as_mut_ptr(),
0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
a,
);
unsafe {
_mm512_mask_compressstoreu_epi8(
r.as_mut_ptr(),
0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
a,
);
}
assert_eq!(
&r,
&[
@ -4162,15 +4182,23 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm256_mask_compressstoreu_epi8() {
fn test_mm256_mask_compressstoreu_epi8() {
let a = _mm256_set_epi8(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
);
let mut r = [0_i8; 32];
_mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
unsafe {
_mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
}
assert_eq!(&r, &[0_i8; 32]);
_mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
unsafe {
_mm256_mask_compressstoreu_epi8(
r.as_mut_ptr(),
0b11110000_11001010_11111111_00000000,
a,
);
}
assert_eq!(
&r,
&[
@ -4181,12 +4209,16 @@ mod tests {
}
#[simd_test(enable = "avx512vbmi2,avx512vl")]
unsafe fn test_mm_mask_compressstoreu_epi8() {
fn test_mm_mask_compressstoreu_epi8() {
let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
let mut r = [0_i8; 16];
_mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
unsafe {
_mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
}
assert_eq!(&r, &[0_i8; 16]);
_mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a);
unsafe {
_mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a);
}
assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
}
}

View file

@ -242,127 +242,127 @@ mod tests {
const BF16_EIGHT: u16 = 0b0_10000010_0000000;
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm_bcstnebf16_ps() {
fn test_mm_bcstnebf16_ps() {
let a = bf16::from_bits(BF16_ONE);
let r = _mm_bcstnebf16_ps(addr_of!(a));
let r = unsafe { _mm_bcstnebf16_ps(addr_of!(a)) };
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm256_bcstnebf16_ps() {
fn test_mm256_bcstnebf16_ps() {
let a = bf16::from_bits(BF16_ONE);
let r = _mm256_bcstnebf16_ps(addr_of!(a));
let r = unsafe { _mm256_bcstnebf16_ps(addr_of!(a)) };
let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm_bcstnesh_ps() {
fn test_mm_bcstnesh_ps() {
let a = 1.0_f16;
let r = _mm_bcstnesh_ps(addr_of!(a));
let r = unsafe { _mm_bcstnesh_ps(addr_of!(a)) };
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm256_bcstnesh_ps() {
fn test_mm256_bcstnesh_ps() {
let a = 1.0_f16;
let r = _mm256_bcstnesh_ps(addr_of!(a));
let r = unsafe { _mm256_bcstnesh_ps(addr_of!(a)) };
let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm_cvtneebf16_ps() {
fn test_mm_cvtneebf16_ps() {
let a = __m128bh([
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
]);
let r = _mm_cvtneebf16_ps(addr_of!(a));
let r = unsafe { _mm_cvtneebf16_ps(addr_of!(a)) };
let e = _mm_setr_ps(1., 3., 5., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm256_cvtneebf16_ps() {
fn test_mm256_cvtneebf16_ps() {
let a = __m256bh([
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
]);
let r = _mm256_cvtneebf16_ps(addr_of!(a));
let r = unsafe { _mm256_cvtneebf16_ps(addr_of!(a)) };
let e = _mm256_setr_ps(1., 3., 5., 7., 1., 3., 5., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm_cvtneeph_ps() {
fn test_mm_cvtneeph_ps() {
let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
let r = _mm_cvtneeph_ps(addr_of!(a));
let r = unsafe { _mm_cvtneeph_ps(addr_of!(a)) };
let e = _mm_setr_ps(1., 3., 5., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm256_cvtneeph_ps() {
fn test_mm256_cvtneeph_ps() {
let a = __m256h([
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
]);
let r = _mm256_cvtneeph_ps(addr_of!(a));
let r = unsafe { _mm256_cvtneeph_ps(addr_of!(a)) };
let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm_cvtneobf16_ps() {
fn test_mm_cvtneobf16_ps() {
let a = __m128bh([
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
]);
let r = _mm_cvtneobf16_ps(addr_of!(a));
let r = unsafe { _mm_cvtneobf16_ps(addr_of!(a)) };
let e = _mm_setr_ps(2., 4., 6., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm256_cvtneobf16_ps() {
fn test_mm256_cvtneobf16_ps() {
let a = __m256bh([
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
]);
let r = _mm256_cvtneobf16_ps(addr_of!(a));
let r = unsafe { _mm256_cvtneobf16_ps(addr_of!(a)) };
let e = _mm256_setr_ps(2., 4., 6., 8., 2., 4., 6., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm_cvtneoph_ps() {
fn test_mm_cvtneoph_ps() {
let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
let r = _mm_cvtneoph_ps(addr_of!(a));
let r = unsafe { _mm_cvtneoph_ps(addr_of!(a)) };
let e = _mm_setr_ps(2., 4., 6., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm256_cvtneoph_ps() {
fn test_mm256_cvtneoph_ps() {
let a = __m256h([
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
]);
let r = _mm256_cvtneoph_ps(addr_of!(a));
let r = unsafe { _mm256_cvtneoph_ps(addr_of!(a)) };
let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm_cvtneps_avx_pbh() {
fn test_mm_cvtneps_avx_pbh() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let r: u16x4 = transmute_copy(&_mm_cvtneps_avx_pbh(a));
let r: u16x4 = unsafe { transmute_copy(&_mm_cvtneps_avx_pbh(a)) };
let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
assert_eq!(r, e);
}
#[simd_test(enable = "avxneconvert")]
unsafe fn test_mm256_cvtneps_avx_pbh() {
fn test_mm256_cvtneps_avx_pbh() {
let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let r: u16x8 = transmute(_mm256_cvtneps_avx_pbh(a));
let r: u16x8 = _mm256_cvtneps_avx_pbh(a).as_u16x8();
let e = u16x8::new(
BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
);

View file

@ -106,7 +106,7 @@ pub fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
#[cfg(test)]
mod tests {
use crate::core_arch::assert_eq_const as assert_eq;
use crate::{core_arch::x86::*, mem::transmute};
use crate::core_arch::x86::*;
use stdarch_test::simd_test;
const F16_ONE: i16 = 0x3c00;

View file

@ -77,12 +77,14 @@ mod tests {
#[simd_test(enable = "fxsr")]
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
unsafe fn test_fxsave() {
fn test_fxsave() {
let mut a = FxsaveArea::new();
let mut b = FxsaveArea::new();
fxsr::_fxsave(a.ptr());
fxsr::_fxrstor(a.ptr());
fxsr::_fxsave(b.ptr());
unsafe {
fxsr::_fxsave(a.ptr());
fxsr::_fxrstor(a.ptr());
fxsr::_fxsave(b.ptr());
}
}
}

View file

@ -898,25 +898,25 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512f")]
unsafe fn test_mm512_gf2p8mul_epi8() {
fn test_mm512_gf2p8mul_epi8() {
let (left, right, expected) = generate_byte_mul_test_data();
for i in 0..NUM_TEST_WORDS_512 {
let left = load_m512i_word(&left, i);
let right = load_m512i_word(&right, i);
let expected = load_m512i_word(&expected, i);
let left = unsafe { load_m512i_word(&left, i) };
let right = unsafe { load_m512i_word(&right, i) };
let expected = unsafe { load_m512i_word(&expected, i) };
let result = _mm512_gf2p8mul_epi8(left, right);
assert_eq_m512i(result, expected);
}
}
#[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
fn test_mm512_maskz_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
for i in 0..NUM_TEST_WORDS_512 {
let left = load_m512i_word(&left, i);
let right = load_m512i_word(&right, i);
let left = unsafe { load_m512i_word(&left, i) };
let right = unsafe { load_m512i_word(&right, i) };
let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right);
assert_eq_m512i(result_zero, _mm512_setzero_si512());
let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
@ -930,12 +930,12 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_mask_gf2p8mul_epi8() {
fn test_mm512_mask_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
for i in 0..NUM_TEST_WORDS_512 {
let left = load_m512i_word(&left, i);
let right = load_m512i_word(&right, i);
let left = unsafe { load_m512i_word(&left, i) };
let right = unsafe { load_m512i_word(&right, i) };
let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right);
assert_eq_m512i(result_left, left);
let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
@ -948,25 +948,25 @@ mod tests {
}
#[simd_test(enable = "gfni,avx")]
unsafe fn test_mm256_gf2p8mul_epi8() {
fn test_mm256_gf2p8mul_epi8() {
let (left, right, expected) = generate_byte_mul_test_data();
for i in 0..NUM_TEST_WORDS_256 {
let left = load_m256i_word(&left, i);
let right = load_m256i_word(&right, i);
let expected = load_m256i_word(&expected, i);
let left = unsafe { load_m256i_word(&left, i) };
let right = unsafe { load_m256i_word(&right, i) };
let expected = unsafe { load_m256i_word(&expected, i) };
let result = _mm256_gf2p8mul_epi8(left, right);
assert_eq_m256i(result, expected);
}
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
fn test_mm256_maskz_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
for i in 0..NUM_TEST_WORDS_256 {
let left = load_m256i_word(&left, i);
let right = load_m256i_word(&right, i);
let left = unsafe { load_m256i_word(&left, i) };
let right = unsafe { load_m256i_word(&right, i) };
let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right);
assert_eq_m256i(result_zero, _mm256_setzero_si256());
let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
@ -980,12 +980,12 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_mask_gf2p8mul_epi8() {
fn test_mm256_mask_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
for i in 0..NUM_TEST_WORDS_256 {
let left = load_m256i_word(&left, i);
let right = load_m256i_word(&right, i);
let left = unsafe { load_m256i_word(&left, i) };
let right = unsafe { load_m256i_word(&right, i) };
let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right);
assert_eq_m256i(result_left, left);
let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
@ -998,25 +998,25 @@ mod tests {
}
#[simd_test(enable = "gfni")]
unsafe fn test_mm_gf2p8mul_epi8() {
fn test_mm_gf2p8mul_epi8() {
let (left, right, expected) = generate_byte_mul_test_data();
for i in 0..NUM_TEST_WORDS_128 {
let left = load_m128i_word(&left, i);
let right = load_m128i_word(&right, i);
let expected = load_m128i_word(&expected, i);
let left = unsafe { load_m128i_word(&left, i) };
let right = unsafe { load_m128i_word(&right, i) };
let expected = unsafe { load_m128i_word(&expected, i) };
let result = _mm_gf2p8mul_epi8(left, right);
assert_eq_m128i(result, expected);
}
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_maskz_gf2p8mul_epi8() {
fn test_mm_maskz_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
for i in 0..NUM_TEST_WORDS_128 {
let left = load_m128i_word(&left, i);
let right = load_m128i_word(&right, i);
let left = unsafe { load_m128i_word(&left, i) };
let right = unsafe { load_m128i_word(&right, i) };
let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right);
assert_eq_m128i(result_zero, _mm_setzero_si128());
let mask_bytes: __mmask16 = 0x0F_F0;
@ -1030,12 +1030,12 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_mask_gf2p8mul_epi8() {
fn test_mm_mask_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
for i in 0..NUM_TEST_WORDS_128 {
let left = load_m128i_word(&left, i);
let right = load_m128i_word(&right, i);
let left = unsafe { load_m128i_word(&left, i) };
let right = unsafe { load_m128i_word(&right, i) };
let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right);
assert_eq_m128i(result_left, left);
let mask_bytes: __mmask16 = 0x0F_F0;
@ -1048,7 +1048,7 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512f")]
unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
fn test_mm512_gf2p8affine_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
let constant: i64 = 0;
@ -1061,20 +1061,20 @@ mod tests {
let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
for i in 0..NUM_TEST_WORDS_512 {
let data = load_m512i_word(&bytes, i);
let data = unsafe { load_m512i_word(&bytes, i) };
let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
assert_eq_m512i(result, data);
let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
assert_eq_m512i(result, constant_reference);
let data = load_m512i_word(&more_bytes, i);
let data = unsafe { load_m512i_word(&more_bytes, i) };
let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
assert_eq_m512i(result, data);
let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
assert_eq_m512i(result, constant_reference);
let matrix = load_m512i_word(&matrices, i);
let vector = load_m512i_word(&vectors, i);
let reference = load_m512i_word(&references, i);
let matrix = unsafe { load_m512i_word(&matrices, i) };
let vector = unsafe { load_m512i_word(&vectors, i) };
let reference = unsafe { load_m512i_word(&references, i) };
let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
assert_eq_m512i(result, reference);
@ -1082,13 +1082,13 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_512 {
let matrix = load_m512i_word(&matrices, i);
let vector = load_m512i_word(&vectors, i);
let matrix = unsafe { load_m512i_word(&matrices, i) };
let vector = unsafe { load_m512i_word(&vectors, i) };
let result_zero =
_mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
assert_eq_m512i(result_zero, _mm512_setzero_si512());
@ -1104,13 +1104,13 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
fn test_mm512_mask_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_512 {
let left = load_m512i_word(&vectors, i);
let right = load_m512i_word(&matrices, i);
let left = unsafe { load_m512i_word(&vectors, i) };
let right = unsafe { load_m512i_word(&matrices, i) };
let result_left =
_mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
assert_eq_m512i(result_left, left);
@ -1125,7 +1125,7 @@ mod tests {
}
#[simd_test(enable = "gfni,avx")]
unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
fn test_mm256_gf2p8affine_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
let constant: i64 = 0;
@ -1138,20 +1138,20 @@ mod tests {
let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
for i in 0..NUM_TEST_WORDS_256 {
let data = load_m256i_word(&bytes, i);
let data = unsafe { load_m256i_word(&bytes, i) };
let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
assert_eq_m256i(result, data);
let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
assert_eq_m256i(result, constant_reference);
let data = load_m256i_word(&more_bytes, i);
let data = unsafe { load_m256i_word(&more_bytes, i) };
let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
assert_eq_m256i(result, data);
let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
assert_eq_m256i(result, constant_reference);
let matrix = load_m256i_word(&matrices, i);
let vector = load_m256i_word(&vectors, i);
let reference = load_m256i_word(&references, i);
let matrix = unsafe { load_m256i_word(&matrices, i) };
let vector = unsafe { load_m256i_word(&vectors, i) };
let reference = unsafe { load_m256i_word(&references, i) };
let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
assert_eq_m256i(result, reference);
@ -1159,13 +1159,13 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_256 {
let matrix = load_m256i_word(&matrices, i);
let vector = load_m256i_word(&vectors, i);
let matrix = unsafe { load_m256i_word(&matrices, i) };
let vector = unsafe { load_m256i_word(&vectors, i) };
let result_zero =
_mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
assert_eq_m256i(result_zero, _mm256_setzero_si256());
@ -1181,13 +1181,13 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
fn test_mm256_mask_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_256 {
let left = load_m256i_word(&vectors, i);
let right = load_m256i_word(&matrices, i);
let left = unsafe { load_m256i_word(&vectors, i) };
let right = unsafe { load_m256i_word(&matrices, i) };
let result_left =
_mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
assert_eq_m256i(result_left, left);
@ -1202,7 +1202,7 @@ mod tests {
}
#[simd_test(enable = "gfni")]
unsafe fn test_mm_gf2p8affine_epi64_epi8() {
fn test_mm_gf2p8affine_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
let constant: i64 = 0;
@ -1215,20 +1215,20 @@ mod tests {
let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
for i in 0..NUM_TEST_WORDS_128 {
let data = load_m128i_word(&bytes, i);
let data = unsafe { load_m128i_word(&bytes, i) };
let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
assert_eq_m128i(result, data);
let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
assert_eq_m128i(result, constant_reference);
let data = load_m128i_word(&more_bytes, i);
let data = unsafe { load_m128i_word(&more_bytes, i) };
let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
assert_eq_m128i(result, data);
let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
assert_eq_m128i(result, constant_reference);
let matrix = load_m128i_word(&matrices, i);
let vector = load_m128i_word(&vectors, i);
let reference = load_m128i_word(&references, i);
let matrix = unsafe { load_m128i_word(&matrices, i) };
let vector = unsafe { load_m128i_word(&vectors, i) };
let reference = unsafe { load_m128i_word(&references, i) };
let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
assert_eq_m128i(result, reference);
@ -1236,13 +1236,13 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
fn test_mm_maskz_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_128 {
let matrix = load_m128i_word(&matrices, i);
let vector = load_m128i_word(&vectors, i);
let matrix = unsafe { load_m128i_word(&matrices, i) };
let vector = unsafe { load_m128i_word(&vectors, i) };
let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
assert_eq_m128i(result_zero, _mm_setzero_si128());
let mask_bytes: __mmask16 = 0x0F_F0;
@ -1257,13 +1257,13 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
fn test_mm_mask_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_128 {
let left = load_m128i_word(&vectors, i);
let right = load_m128i_word(&matrices, i);
let left = unsafe { load_m128i_word(&vectors, i) };
let right = unsafe { load_m128i_word(&matrices, i) };
let result_left =
_mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
assert_eq_m128i(result_left, left);
@ -1278,7 +1278,7 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512f")]
unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
fn test_mm512_gf2p8affineinv_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
const CONSTANT_BYTE: i32 = 0x63;
@ -1288,8 +1288,8 @@ mod tests {
let (inputs, results) = generate_inv_tests_data();
for i in 0..NUM_BYTES_WORDS_512 {
let input = load_m512i_word(&inputs, i);
let reference = load_m512i_word(&results, i);
let input = unsafe { load_m512i_word(&inputs, i) };
let reference = unsafe { load_m512i_word(&results, i) };
let result = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
let remultiplied = _mm512_gf2p8mul_epi8(result, input);
assert_eq_m512i(remultiplied, reference);
@ -1300,8 +1300,8 @@ mod tests {
generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_512 {
let vector = load_m512i_word(&vectors, i);
let matrix = load_m512i_word(&matrices, i);
let vector = unsafe { load_m512i_word(&vectors, i) };
let matrix = unsafe { load_m512i_word(&matrices, i) };
let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
let reference = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
@ -1314,21 +1314,21 @@ mod tests {
let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX);
for i in 0..NUM_BYTES_WORDS_512 {
let reference = load_m512i_word(&AES_S_BOX, i);
let input = load_m512i_word(&inputs, i);
let reference = unsafe { load_m512i_word(&AES_S_BOX, i) };
let input = unsafe { load_m512i_word(&inputs, i) };
let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
assert_eq_m512i(result, reference);
}
}
#[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_512 {
let matrix = load_m512i_word(&matrices, i);
let vector = load_m512i_word(&vectors, i);
let matrix = unsafe { load_m512i_word(&matrices, i) };
let vector = unsafe { load_m512i_word(&vectors, i) };
let result_zero =
_mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
assert_eq_m512i(result_zero, _mm512_setzero_si512());
@ -1344,13 +1344,13 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_512 {
let left = load_m512i_word(&vectors, i);
let right = load_m512i_word(&matrices, i);
let left = unsafe { load_m512i_word(&vectors, i) };
let right = unsafe { load_m512i_word(&matrices, i) };
let result_left =
_mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
assert_eq_m512i(result_left, left);
@ -1366,7 +1366,7 @@ mod tests {
}
#[simd_test(enable = "gfni,avx")]
unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
fn test_mm256_gf2p8affineinv_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
const CONSTANT_BYTE: i32 = 0x63;
@ -1376,8 +1376,8 @@ mod tests {
let (inputs, results) = generate_inv_tests_data();
for i in 0..NUM_BYTES_WORDS_256 {
let input = load_m256i_word(&inputs, i);
let reference = load_m256i_word(&results, i);
let input = unsafe { load_m256i_word(&inputs, i) };
let reference = unsafe { load_m256i_word(&results, i) };
let result = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
let remultiplied = _mm256_gf2p8mul_epi8(result, input);
assert_eq_m256i(remultiplied, reference);
@ -1388,8 +1388,8 @@ mod tests {
generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_256 {
let vector = load_m256i_word(&vectors, i);
let matrix = load_m256i_word(&matrices, i);
let vector = unsafe { load_m256i_word(&vectors, i) };
let matrix = unsafe { load_m256i_word(&matrices, i) };
let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
let reference = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
@ -1402,21 +1402,21 @@ mod tests {
let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX);
for i in 0..NUM_BYTES_WORDS_256 {
let reference = load_m256i_word(&AES_S_BOX, i);
let input = load_m256i_word(&inputs, i);
let reference = unsafe { load_m256i_word(&AES_S_BOX, i) };
let input = unsafe { load_m256i_word(&inputs, i) };
let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
assert_eq_m256i(result, reference);
}
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_256 {
let matrix = load_m256i_word(&matrices, i);
let vector = load_m256i_word(&vectors, i);
let matrix = unsafe { load_m256i_word(&matrices, i) };
let vector = unsafe { load_m256i_word(&vectors, i) };
let result_zero =
_mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
assert_eq_m256i(result_zero, _mm256_setzero_si256());
@ -1432,13 +1432,13 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_256 {
let left = load_m256i_word(&vectors, i);
let right = load_m256i_word(&matrices, i);
let left = unsafe { load_m256i_word(&vectors, i) };
let right = unsafe { load_m256i_word(&matrices, i) };
let result_left =
_mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
assert_eq_m256i(result_left, left);
@ -1454,7 +1454,7 @@ mod tests {
}
#[simd_test(enable = "gfni")]
unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
fn test_mm_gf2p8affineinv_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
const CONSTANT_BYTE: i32 = 0x63;
@ -1464,8 +1464,8 @@ mod tests {
let (inputs, results) = generate_inv_tests_data();
for i in 0..NUM_BYTES_WORDS_128 {
let input = load_m128i_word(&inputs, i);
let reference = load_m128i_word(&results, i);
let input = unsafe { load_m128i_word(&inputs, i) };
let reference = unsafe { load_m128i_word(&results, i) };
let result = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
let remultiplied = _mm_gf2p8mul_epi8(result, input);
assert_eq_m128i(remultiplied, reference);
@ -1476,8 +1476,8 @@ mod tests {
generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_128 {
let vector = load_m128i_word(&vectors, i);
let matrix = load_m128i_word(&matrices, i);
let vector = unsafe { load_m128i_word(&vectors, i) };
let matrix = unsafe { load_m128i_word(&matrices, i) };
let inv_vec = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
let reference = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
@ -1490,21 +1490,21 @@ mod tests {
let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX);
for i in 0..NUM_BYTES_WORDS_128 {
let reference = load_m128i_word(&AES_S_BOX, i);
let input = load_m128i_word(&inputs, i);
let reference = unsafe { load_m128i_word(&AES_S_BOX, i) };
let input = unsafe { load_m128i_word(&inputs, i) };
let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
assert_eq_m128i(result, reference);
}
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_128 {
let matrix = load_m128i_word(&matrices, i);
let vector = load_m128i_word(&vectors, i);
let matrix = unsafe { load_m128i_word(&matrices, i) };
let vector = unsafe { load_m128i_word(&vectors, i) };
let result_zero =
_mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
assert_eq_m128i(result_zero, _mm_setzero_si128());
@ -1520,13 +1520,13 @@ mod tests {
}
#[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
for i in 0..NUM_TEST_WORDS_128 {
let left = load_m128i_word(&vectors, i);
let right = load_m128i_word(&matrices, i);
let left = unsafe { load_m128i_word(&vectors, i) };
let right = unsafe { load_m128i_word(&matrices, i) };
let result_left =
_mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
assert_eq_m128i(result_left, left);

View file

@ -352,45 +352,47 @@ mod tests {
use stdarch_test::simd_test;
#[target_feature(enable = "kl")]
unsafe fn encodekey128() -> [u8; 48] {
fn encodekey128() -> [u8; 48] {
let mut handle = [0; 48];
let _ = _mm_encodekey128_u32(0, _mm_setzero_si128(), handle.as_mut_ptr());
let _ = unsafe { _mm_encodekey128_u32(0, _mm_setzero_si128(), handle.as_mut_ptr()) };
handle
}
#[target_feature(enable = "kl")]
unsafe fn encodekey256() -> [u8; 64] {
fn encodekey256() -> [u8; 64] {
let mut handle = [0; 64];
let _ = _mm_encodekey256_u32(
0,
_mm_setzero_si128(),
_mm_setzero_si128(),
handle.as_mut_ptr(),
);
let _ = unsafe {
_mm_encodekey256_u32(
0,
_mm_setzero_si128(),
_mm_setzero_si128(),
handle.as_mut_ptr(),
)
};
handle
}
#[simd_test(enable = "kl")]
unsafe fn test_mm_encodekey128_u32() {
fn test_mm_encodekey128_u32() {
encodekey128();
}
#[simd_test(enable = "kl")]
unsafe fn test_mm_encodekey256_u32() {
fn test_mm_encodekey256_u32() {
encodekey256();
}
#[simd_test(enable = "kl")]
unsafe fn test_mm_aesenc128kl_u8() {
fn test_mm_aesenc128kl_u8() {
let mut buffer = _mm_setzero_si128();
let key = encodekey128();
for _ in 0..100 {
let status = _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr());
let status = unsafe { _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr()) };
assert_eq!(status, 0);
}
for _ in 0..100 {
let status = _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr());
let status = unsafe { _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr()) };
assert_eq!(status, 0);
}
@ -398,16 +400,16 @@ mod tests {
}
#[simd_test(enable = "kl")]
unsafe fn test_mm_aesdec128kl_u8() {
fn test_mm_aesdec128kl_u8() {
let mut buffer = _mm_setzero_si128();
let key = encodekey128();
for _ in 0..100 {
let status = _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr());
let status = unsafe { _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr()) };
assert_eq!(status, 0);
}
for _ in 0..100 {
let status = _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr());
let status = unsafe { _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr()) };
assert_eq!(status, 0);
}
@ -415,16 +417,16 @@ mod tests {
}
#[simd_test(enable = "kl")]
unsafe fn test_mm_aesenc256kl_u8() {
fn test_mm_aesenc256kl_u8() {
let mut buffer = _mm_setzero_si128();
let key = encodekey256();
for _ in 0..100 {
let status = _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr());
let status = unsafe { _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr()) };
assert_eq!(status, 0);
}
for _ in 0..100 {
let status = _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr());
let status = unsafe { _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr()) };
assert_eq!(status, 0);
}
@ -432,16 +434,16 @@ mod tests {
}
#[simd_test(enable = "kl")]
unsafe fn test_mm_aesdec256kl_u8() {
fn test_mm_aesdec256kl_u8() {
let mut buffer = _mm_setzero_si128();
let key = encodekey256();
for _ in 0..100 {
let status = _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr());
let status = unsafe { _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr()) };
assert_eq!(status, 0);
}
for _ in 0..100 {
let status = _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr());
let status = unsafe { _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr()) };
assert_eq!(status, 0);
}
@ -449,16 +451,20 @@ mod tests {
}
#[simd_test(enable = "widekl")]
unsafe fn test_mm_aesencwide128kl_u8() {
fn test_mm_aesencwide128kl_u8() {
let mut buffer = [_mm_setzero_si128(); 8];
let key = encodekey128();
for _ in 0..100 {
let status = _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
let status = unsafe {
_mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
};
assert_eq!(status, 0);
}
for _ in 0..100 {
let status = _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
let status = unsafe {
_mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
};
assert_eq!(status, 0);
}
@ -468,16 +474,20 @@ mod tests {
}
#[simd_test(enable = "widekl")]
unsafe fn test_mm_aesdecwide128kl_u8() {
fn test_mm_aesdecwide128kl_u8() {
let mut buffer = [_mm_setzero_si128(); 8];
let key = encodekey128();
for _ in 0..100 {
let status = _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
let status = unsafe {
_mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
};
assert_eq!(status, 0);
}
for _ in 0..100 {
let status = _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
let status = unsafe {
_mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
};
assert_eq!(status, 0);
}
@ -487,16 +497,20 @@ mod tests {
}
#[simd_test(enable = "widekl")]
unsafe fn test_mm_aesencwide256kl_u8() {
fn test_mm_aesencwide256kl_u8() {
let mut buffer = [_mm_setzero_si128(); 8];
let key = encodekey256();
for _ in 0..100 {
let status = _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
let status = unsafe {
_mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
};
assert_eq!(status, 0);
}
for _ in 0..100 {
let status = _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
let status = unsafe {
_mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
};
assert_eq!(status, 0);
}
@ -506,16 +520,20 @@ mod tests {
}
#[simd_test(enable = "widekl")]
unsafe fn test_mm_aesdecwide256kl_u8() {
fn test_mm_aesdecwide256kl_u8() {
let mut buffer = [_mm_setzero_si128(); 8];
let key = encodekey256();
for _ in 0..100 {
let status = _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
let status = unsafe {
_mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
};
assert_eq!(status, 0);
}
for _ in 0..100 {
let status = _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
let status = unsafe {
_mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
};
assert_eq!(status, 0);
}

View file

@ -120,13 +120,15 @@ mod tests {
use crate::core_arch::x86::*;
#[simd_test(enable = "rtm")]
unsafe fn test_xbegin() {
fn test_xbegin() {
let mut x = 0;
for _ in 0..10 {
let code = _xbegin();
let code = unsafe { _xbegin() };
if code == _XBEGIN_STARTED {
x += 1;
_xend();
unsafe {
_xend();
}
assert_eq!(x, 1);
break;
}
@ -135,19 +137,23 @@ mod tests {
}
#[simd_test(enable = "rtm")]
unsafe fn test_xabort() {
fn test_xabort() {
const ABORT_CODE: u32 = 42;
// aborting outside a transactional region does nothing
_xabort::<ABORT_CODE>();
unsafe {
_xabort::<ABORT_CODE>();
}
for _ in 0..10 {
let mut x = 0;
let code = rtm::_xbegin();
let code = unsafe { _xbegin() };
if code == _XBEGIN_STARTED {
x += 1;
rtm::_xabort::<ABORT_CODE>();
unsafe {
_xabort::<ABORT_CODE>();
}
} else if code & _XABORT_EXPLICIT != 0 {
let test_abort_code = rtm::_xabort_code(code);
let test_abort_code = _xabort_code(code);
assert_eq!(test_abort_code, ABORT_CODE);
}
assert_eq!(x, 0);
@ -155,14 +161,16 @@ mod tests {
}
#[simd_test(enable = "rtm")]
unsafe fn test_xtest() {
assert_eq!(_xtest(), 0);
fn test_xtest() {
assert_eq!(unsafe { _xtest() }, 0);
for _ in 0..10 {
let code = rtm::_xbegin();
let code = unsafe { _xbegin() };
if code == _XBEGIN_STARTED {
let in_tx = _xtest();
rtm::_xend();
let in_tx = unsafe { _xtest() };
unsafe {
_xend();
}
// putting the assert inside the transaction would abort the transaction on fail
// without any output/panic/etc

View file

@ -3147,21 +3147,21 @@ mod tests {
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_load_ss() {
const fn test_mm_load_ss() {
let a = 42.0f32;
let r = _mm_load_ss(ptr::addr_of!(a));
let r = unsafe { _mm_load_ss(ptr::addr_of!(a)) };
assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_load1_ps() {
const fn test_mm_load1_ps() {
let a = 42.0f32;
let r = _mm_load1_ps(ptr::addr_of!(a));
let r = unsafe { _mm_load1_ps(ptr::addr_of!(a)) };
assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_load_ps() {
const fn test_mm_load_ps() {
let vals = Memory {
data: [1.0f32, 2.0, 3.0, 4.0],
};
@ -3169,21 +3169,21 @@ mod tests {
// guaranteed to be aligned to 16 bytes
let p = vals.data.as_ptr();
let r = _mm_load_ps(p);
let r = unsafe { _mm_load_ps(p) };
let e = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_loadu_ps() {
const fn test_mm_loadu_ps() {
let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let p = vals.as_ptr().add(3);
let r = _mm_loadu_ps(black_box(p));
let p = unsafe { vals.as_ptr().add(3) };
let r = unsafe { _mm_loadu_ps(black_box(p)) };
assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_loadr_ps() {
const fn test_mm_loadr_ps() {
let vals = Memory {
data: [1.0f32, 2.0, 3.0, 4.0],
};
@ -3191,16 +3191,18 @@ mod tests {
// guaranteed to be aligned to 16 bytes
let p = vals.data.as_ptr();
let r = _mm_loadr_ps(p);
let r = unsafe { _mm_loadr_ps(p) };
let e = _mm_setr_ps(4.0, 3.0, 2.0, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_store_ss() {
const fn test_mm_store_ss() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
_mm_store_ss(vals.as_mut_ptr().add(1), a);
unsafe {
_mm_store_ss(vals.as_mut_ptr().add(1), a);
}
assert_eq!(vals[0], 0.0);
assert_eq!(vals[1], 1.0);
@ -3208,46 +3210,52 @@ mod tests {
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_store1_ps() {
const fn test_mm_store1_ps() {
let mut vals = Memory { data: [0.0f32; 4] };
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
// guaranteed to be aligned to 16 bytes
let p = vals.data.as_mut_ptr();
_mm_store1_ps(p, *black_box(&a));
unsafe {
_mm_store1_ps(p, *black_box(&a));
}
assert_eq!(vals.data, [1.0, 1.0, 1.0, 1.0]);
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_store_ps() {
const fn test_mm_store_ps() {
let mut vals = Memory { data: [0.0f32; 4] };
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
// guaranteed to be aligned to 16 bytes
let p = vals.data.as_mut_ptr();
_mm_store_ps(p, *black_box(&a));
unsafe {
_mm_store_ps(p, *black_box(&a));
}
assert_eq!(vals.data, [1.0, 2.0, 3.0, 4.0]);
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_storer_ps() {
const fn test_mm_storer_ps() {
let mut vals = Memory { data: [0.0f32; 4] };
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
// guaranteed to be aligned to 16 bytes
let p = vals.data.as_mut_ptr();
_mm_storer_ps(p, *black_box(&a));
unsafe {
_mm_storer_ps(p, *black_box(&a));
}
assert_eq!(vals.data, [4.0, 3.0, 2.0, 1.0]);
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_storeu_ps() {
const fn test_mm_storeu_ps() {
#[repr(align(16))]
struct Memory8 {
data: [f32; 8],
@ -3258,9 +3266,11 @@ mod tests {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
// guaranteed to be *not* aligned to 16 bytes
let p = vals.data.as_mut_ptr().offset(1);
let p = unsafe { vals.data.as_mut_ptr().offset(1) };
_mm_storeu_ps(p, *black_box(&a));
unsafe {
_mm_storeu_ps(p, *black_box(&a));
}
assert_eq!(vals.data, [0.0, 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0]);
}
@ -3315,11 +3325,13 @@ mod tests {
// Miri cannot support this until it is clear how it fits in the Rust memory model
// (non-temporal store)
#[cfg_attr(miri, ignore)]
unsafe fn test_mm_stream_ps() {
fn test_mm_stream_ps() {
let a = _mm_set1_ps(7.0);
let mut mem = Memory { data: [-1.0; 4] };
_mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
unsafe {
_mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
}
_mm_sfence();
for i in 0..4 {
assert_eq!(mem.data[i], get_m128(a, i));

View file

@ -3291,9 +3291,11 @@ mod tests {
}
#[simd_test(enable = "sse2")]
unsafe fn test_mm_clflush() {
fn test_mm_clflush() {
let x = 0_u8;
_mm_clflush(ptr::addr_of!(x));
unsafe {
_mm_clflush(ptr::addr_of!(x));
}
}
#[simd_test(enable = "sse2")]
@ -3725,7 +3727,7 @@ mod tests {
}
#[simd_test(enable = "sse2")]
unsafe fn test_mm_sll_epi16() {
fn test_mm_sll_epi16() {
let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
assert_eq_m128i(
@ -4071,7 +4073,7 @@ mod tests {
}
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cvtps_epi32() {
fn test_mm_cvtps_epi32() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_cvtps_epi32(a);
assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
@ -4178,23 +4180,23 @@ mod tests {
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_loadl_epi64() {
const fn test_mm_loadl_epi64() {
let a = _mm_setr_epi64x(6, 5);
let r = _mm_loadl_epi64(ptr::addr_of!(a));
let r = unsafe { _mm_loadl_epi64(ptr::addr_of!(a)) };
assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_load_si128() {
const fn test_mm_load_si128() {
let a = _mm_set_epi64x(5, 6);
let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
let r = unsafe { _mm_load_si128(ptr::addr_of!(a) as *const _) };
assert_eq_m128i(a, r);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_loadu_si128() {
const fn test_mm_loadu_si128() {
let a = _mm_set_epi64x(5, 6);
let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
let r = unsafe { _mm_loadu_si128(ptr::addr_of!(a) as *const _) };
assert_eq_m128i(a, r);
}
@ -4202,7 +4204,7 @@ mod tests {
// Miri cannot support this until it is clear how it fits in the Rust memory model
// (non-temporal store)
#[cfg_attr(miri, ignore)]
unsafe fn test_mm_maskmoveu_si128() {
fn test_mm_maskmoveu_si128() {
let a = _mm_set1_epi8(9);
#[rustfmt::skip]
let mask = _mm_set_epi8(
@ -4210,33 +4212,41 @@ mod tests {
0, 0, 0, 0, 0, 0, 0, 0,
);
let mut r = _mm_set1_epi8(0);
_mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
unsafe {
_mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
}
_mm_sfence();
let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_store_si128() {
const fn test_mm_store_si128() {
let a = _mm_set1_epi8(9);
let mut r = _mm_set1_epi8(0);
_mm_store_si128(&mut r, a);
unsafe {
_mm_store_si128(&mut r, a);
}
assert_eq_m128i(r, a);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_storeu_si128() {
const fn test_mm_storeu_si128() {
let a = _mm_set1_epi8(9);
let mut r = _mm_set1_epi8(0);
_mm_storeu_si128(&mut r, a);
unsafe {
_mm_storeu_si128(&mut r, a);
}
assert_eq_m128i(r, a);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_storel_epi64() {
const fn test_mm_storel_epi64() {
let a = _mm_setr_epi64x(2, 9);
let mut r = _mm_set1_epi8(0);
_mm_storel_epi64(&mut r, a);
unsafe {
_mm_storel_epi64(&mut r, a);
}
assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
}
@ -4244,10 +4254,12 @@ mod tests {
// Miri cannot support this until it is clear how it fits in the Rust memory model
// (non-temporal store)
#[cfg_attr(miri, ignore)]
unsafe fn test_mm_stream_si128() {
fn test_mm_stream_si128() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let mut r = _mm_undefined_si128();
_mm_stream_si128(ptr::addr_of_mut!(r), a);
unsafe {
_mm_stream_si128(ptr::addr_of_mut!(r), a);
}
_mm_sfence();
assert_eq_m128i(r, a);
}
@ -4256,10 +4268,12 @@ mod tests {
// Miri cannot support this until it is clear how it fits in the Rust memory model
// (non-temporal store)
#[cfg_attr(miri, ignore)]
unsafe fn test_mm_stream_si32() {
fn test_mm_stream_si32() {
let a: i32 = 7;
let mut mem = boxed::Box::<i32>::new(-1);
_mm_stream_si32(ptr::addr_of_mut!(*mem), a);
unsafe {
_mm_stream_si32(ptr::addr_of_mut!(*mem), a);
}
_mm_sfence();
assert_eq!(a, *mem);
}
@ -4909,40 +4923,40 @@ mod tests {
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_load_pd() {
const fn test_mm_load_pd() {
let mem = Memory {
data: [1.0f64, 2.0, 3.0, 4.0],
};
let vals = &mem.data;
let d = vals.as_ptr();
let r = _mm_load_pd(d);
let r = unsafe { _mm_load_pd(d) };
assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_load_sd() {
const fn test_mm_load_sd() {
let a = 1.;
let expected = _mm_setr_pd(a, 0.);
let r = _mm_load_sd(&a);
let r = unsafe { _mm_load_sd(&a) };
assert_eq_m128d(r, expected);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_loadh_pd() {
const fn test_mm_loadh_pd() {
let a = _mm_setr_pd(1., 2.);
let b = 3.;
let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
let r = _mm_loadh_pd(a, &b);
let r = unsafe { _mm_loadh_pd(a, &b) };
assert_eq_m128d(r, expected);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_loadl_pd() {
const fn test_mm_loadl_pd() {
let a = _mm_setr_pd(1., 2.);
let b = 3.;
let expected = _mm_setr_pd(3., get_m128d(a, 1));
let r = _mm_loadl_pd(a, &b);
let r = unsafe { _mm_loadl_pd(a, &b) };
assert_eq_m128d(r, expected);
}
@ -4950,7 +4964,7 @@ mod tests {
// Miri cannot support this until it is clear how it fits in the Rust memory model
// (non-temporal store)
#[cfg_attr(miri, ignore)]
unsafe fn test_mm_stream_pd() {
fn test_mm_stream_pd() {
#[repr(align(128))]
struct Memory {
pub data: [f64; 2],
@ -4958,7 +4972,9 @@ mod tests {
let a = _mm_set1_pd(7.0);
let mut mem = Memory { data: [-1.0; 2] };
_mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
unsafe {
_mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
}
_mm_sfence();
for i in 0..2 {
assert_eq!(mem.data[i], get_m128d(a, i));
@ -4966,132 +4982,154 @@ mod tests {
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_store_sd() {
const fn test_mm_store_sd() {
let mut dest = 0.;
let a = _mm_setr_pd(1., 2.);
_mm_store_sd(&mut dest, a);
unsafe {
_mm_store_sd(&mut dest, a);
}
assert_eq!(dest, _mm_cvtsd_f64(a));
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_store_pd() {
const fn test_mm_store_pd() {
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr();
_mm_store_pd(d, *black_box(&a));
unsafe {
_mm_store_pd(d, *black_box(&a));
}
assert_eq!(vals[0], 1.0);
assert_eq!(vals[1], 2.0);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_storeu_pd() {
const fn test_mm_storeu_pd() {
// guaranteed to be aligned to 16 bytes
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
// so p is *not* aligned to 16 bytes
let p = vals.as_mut_ptr().offset(1);
_mm_storeu_pd(p, *black_box(&a));
unsafe {
let p = vals.as_mut_ptr().offset(1);
_mm_storeu_pd(p, *black_box(&a));
}
assert_eq!(*vals, [0.0, 1.0, 2.0, 0.0]);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_storeu_si16() {
const fn test_mm_storeu_si16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
_mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
unsafe {
_mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
}
let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_storeu_si32() {
const fn test_mm_storeu_si32() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let mut r = _mm_setr_epi32(5, 6, 7, 8);
_mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
unsafe {
_mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
}
let e = _mm_setr_epi32(1, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_storeu_si64() {
const fn test_mm_storeu_si64() {
let a = _mm_setr_epi64x(1, 2);
let mut r = _mm_setr_epi64x(3, 4);
_mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
unsafe {
_mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
}
let e = _mm_setr_epi64x(1, 4);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_store1_pd() {
const fn test_mm_store1_pd() {
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr();
_mm_store1_pd(d, *black_box(&a));
unsafe {
_mm_store1_pd(d, *black_box(&a));
}
assert_eq!(vals[0], 1.0);
assert_eq!(vals[1], 1.0);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_store_pd1() {
const fn test_mm_store_pd1() {
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr();
_mm_store_pd1(d, *black_box(&a));
unsafe {
_mm_store_pd1(d, *black_box(&a));
}
assert_eq!(vals[0], 1.0);
assert_eq!(vals[1], 1.0);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_storer_pd() {
const fn test_mm_storer_pd() {
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr();
_mm_storer_pd(d, *black_box(&a));
unsafe {
_mm_storer_pd(d, *black_box(&a));
}
assert_eq!(vals[0], 2.0);
assert_eq!(vals[1], 1.0);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_storeh_pd() {
const fn test_mm_storeh_pd() {
let mut dest = 0.;
let a = _mm_setr_pd(1., 2.);
_mm_storeh_pd(&mut dest, a);
unsafe {
_mm_storeh_pd(&mut dest, a);
}
assert_eq!(dest, get_m128d(a, 1));
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_storel_pd() {
const fn test_mm_storel_pd() {
let mut dest = 0.;
let a = _mm_setr_pd(1., 2.);
_mm_storel_pd(&mut dest, a);
unsafe {
_mm_storel_pd(&mut dest, a);
}
assert_eq!(dest, _mm_cvtsd_f64(a));
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_loadr_pd() {
const fn test_mm_loadr_pd() {
let mut mem = Memory {
data: [1.0f64, 2.0, 3.0, 4.0],
};
let vals = &mut mem.data;
let d = vals.as_ptr();
let r = _mm_loadr_pd(d);
let r = unsafe { _mm_loadr_pd(d) };
assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_loadu_pd() {
const fn test_mm_loadu_pd() {
// guaranteed to be aligned to 16 bytes
let mut mem = Memory {
data: [1.0f64, 2.0, 3.0, 4.0],
@ -5099,31 +5137,31 @@ mod tests {
let vals = &mut mem.data;
// so this will *not* be aligned to 16 bytes
let d = vals.as_ptr().offset(1);
let d = unsafe { vals.as_ptr().offset(1) };
let r = _mm_loadu_pd(d);
let r = unsafe { _mm_loadu_pd(d) };
let e = _mm_setr_pd(2.0, 3.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_loadu_si16() {
const fn test_mm_loadu_si16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
let r = unsafe { _mm_loadu_si16(ptr::addr_of!(a) as *const _) };
assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_loadu_si32() {
const fn test_mm_loadu_si32() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
let r = unsafe { _mm_loadu_si32(ptr::addr_of!(a) as *const _) };
assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_loadu_si64() {
const fn test_mm_loadu_si64() {
let a = _mm_setr_epi64x(5, 6);
let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
let r = unsafe { _mm_loadu_si64(ptr::addr_of!(a) as *const _) };
assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
}
@ -5302,16 +5340,16 @@ mod tests {
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_load1_pd() {
const fn test_mm_load1_pd() {
let d = -5.0;
let r = _mm_load1_pd(&d);
let r = unsafe { _mm_load1_pd(&d) };
assert_eq_m128d(r, _mm_setr_pd(d, d));
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_load_pd1() {
const fn test_mm_load_pd1() {
let d = -5.0;
let r = _mm_load_pd1(&d);
let r = unsafe { _mm_load_pd1(&d) };
assert_eq_m128d(r, _mm_setr_pd(d, d));
}

View file

@ -239,7 +239,7 @@ mod tests {
}
#[simd_test(enable = "sse3")]
unsafe fn test_mm_lddqu_si128() {
fn test_mm_lddqu_si128() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4,
@ -247,7 +247,7 @@ mod tests {
9, 10, 11, 12,
13, 14, 15, 16,
);
let r = _mm_lddqu_si128(&a);
let r = unsafe { _mm_lddqu_si128(&a) };
assert_eq_m128i(a, r);
}
@ -273,9 +273,9 @@ mod tests {
}
#[simd_test(enable = "sse3")]
const unsafe fn test_mm_loaddup_pd() {
const fn test_mm_loaddup_pd() {
let d = -5.0;
let r = _mm_loaddup_pd(&d);
let r = unsafe { _mm_loaddup_pd(&d) };
assert_eq_m128d(r, _mm_setr_pd(d, d));
}
}

View file

@ -1219,20 +1219,20 @@ mod tests {
}
#[simd_test(enable = "sse4.1")]
const unsafe fn test_mm_blendv_pd() {
const fn test_mm_blendv_pd() {
let a = _mm_set1_pd(0.0);
let b = _mm_set1_pd(1.0);
let mask = transmute(_mm_setr_epi64x(0, -1));
let mask = _mm_castsi128_pd(_mm_setr_epi64x(0, -1));
let r = _mm_blendv_pd(a, b, mask);
let e = _mm_setr_pd(0.0, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "sse4.1")]
const unsafe fn test_mm_blendv_ps() {
const fn test_mm_blendv_ps() {
let a = _mm_set1_ps(0.0);
let b = _mm_set1_ps(1.0);
let mask = transmute(_mm_setr_epi32(0, -1, 0, -1));
let mask = _mm_castsi128_ps(_mm_setr_epi32(0, -1, 0, -1));
let r = _mm_blendv_ps(a, b, mask);
let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
assert_eq_m128(r, e);
@ -1949,9 +1949,9 @@ mod tests {
}
#[simd_test(enable = "sse4.1")]
unsafe fn test_mm_stream_load_si128() {
fn test_mm_stream_load_si128() {
let a = _mm_set_epi64x(5, 6);
let r = _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _);
let r = unsafe { _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _) };
assert_eq_m128i(a, r);
}
}

View file

@ -613,6 +613,7 @@ mod tests {
use crate::core_arch::assert_eq_const as assert_eq;
use stdarch_test::simd_test;
use crate::core_arch::simd::*;
use crate::core_arch::x86::*;
use std::ptr;
@ -625,7 +626,7 @@ mod tests {
assert!(s.len() <= 16);
let mut array = [0u8; 16];
array[..s.len()].copy_from_slice(s);
unsafe { transmute(array) }
u8x16::from_array(array).as_m128i()
}
#[simd_test(enable = "sse4.2")]

View file

@ -206,7 +206,7 @@ mod tests {
// Miri cannot support this until it is clear how it fits in the Rust memory model
// (non-temporal store)
#[cfg_attr(miri, ignore)]
unsafe fn test_mm_stream_sd() {
fn test_mm_stream_sd() {
let mut mem = MemoryF64 {
data: [1.0_f64, 2.0],
};
@ -216,7 +216,9 @@ mod tests {
let x = _mm_setr_pd(3.0, 4.0);
_mm_stream_sd(d, x);
unsafe {
_mm_stream_sd(d, x);
}
_mm_sfence();
}
assert_eq!(mem.data[0], 3.0);
@ -232,7 +234,7 @@ mod tests {
// Miri cannot support this until it is clear how it fits in the Rust memory model
// (non-temporal store)
#[cfg_attr(miri, ignore)]
unsafe fn test_mm_stream_ss() {
fn test_mm_stream_ss() {
let mut mem = MemoryF32 {
data: [1.0_f32, 2.0, 3.0, 4.0],
};
@ -242,7 +244,9 @@ mod tests {
let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
_mm_stream_ss(d, x);
unsafe {
_mm_stream_ss(d, x);
}
_mm_sfence();
}
assert_eq!(mem.data[0], 5.0);

View file

@ -197,47 +197,53 @@ mod tests {
#[simd_test(enable = "xsave")]
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
unsafe fn test_xsave() {
fn test_xsave() {
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
_xsave(a.ptr(), m);
_xrstor(a.ptr(), m);
_xsave(b.ptr(), m);
unsafe {
_xsave(a.ptr(), m);
_xrstor(a.ptr(), m);
_xsave(b.ptr(), m);
}
}
#[simd_test(enable = "xsave")]
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
unsafe fn test_xgetbv() {
fn test_xgetbv() {
let xcr_n: u32 = _XCR_XFEATURE_ENABLED_MASK;
let xcr: u64 = _xgetbv(xcr_n);
let xcr_cpy: u64 = _xgetbv(xcr_n);
let xcr: u64 = unsafe { _xgetbv(xcr_n) };
let xcr_cpy: u64 = unsafe { _xgetbv(xcr_n) };
assert_eq!(xcr, xcr_cpy);
}
#[simd_test(enable = "xsave,xsaveopt")]
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
unsafe fn test_xsaveopt() {
fn test_xsaveopt() {
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
_xsaveopt(a.ptr(), m);
_xrstor(a.ptr(), m);
_xsaveopt(b.ptr(), m);
unsafe {
_xsaveopt(a.ptr(), m);
_xrstor(a.ptr(), m);
_xsaveopt(b.ptr(), m);
}
}
#[simd_test(enable = "xsave,xsavec")]
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
unsafe fn test_xsavec() {
fn test_xsavec() {
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
_xsavec(a.ptr(), m);
_xrstor(a.ptr(), m);
_xsavec(b.ptr(), m);
unsafe {
_xsavec(a.ptr(), m);
_xrstor(a.ptr(), m);
_xsavec(b.ptr(), m);
}
}
}

View file

@ -581,267 +581,297 @@ mod tests {
}
#[simd_test(enable = "amx-tile")]
unsafe fn test_tile_loadconfig() {
let config = __tilecfg::default();
_tile_loadconfig(config.as_ptr());
_tile_release();
fn test_tile_loadconfig() {
unsafe {
let config = __tilecfg::default();
_tile_loadconfig(config.as_ptr());
_tile_release();
}
}
#[simd_test(enable = "amx-tile")]
unsafe fn test_tile_storeconfig() {
let config = __tilecfg::new(1, 0, [32; 8], [8; 8]);
_tile_loadconfig(config.as_ptr());
let mut _config = __tilecfg::default();
_tile_storeconfig(_config.as_mut_ptr());
_tile_release();
assert_eq!(config, _config);
fn test_tile_storeconfig() {
unsafe {
let config = __tilecfg::new(1, 0, [32; 8], [8; 8]);
_tile_loadconfig(config.as_ptr());
let mut _config = __tilecfg::default();
_tile_storeconfig(_config.as_mut_ptr());
_tile_release();
assert_eq!(config, _config);
}
}
#[simd_test(enable = "amx-tile")]
unsafe fn test_tile_zero() {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mut out = [[1_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[0; 64]; 16]);
fn test_tile_zero() {
unsafe {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mut out = [[1_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[0; 64]; 16]);
}
}
#[simd_test(enable = "amx-tile")]
unsafe fn test_tile_stored() {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mut out = [[1_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[0; 64]; 16]);
fn test_tile_stored() {
unsafe {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mut out = [[1_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[0; 64]; 16]);
}
}
#[simd_test(enable = "amx-tile")]
unsafe fn test_tile_loadd() {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_loadd::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
fn test_tile_loadd() {
unsafe {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_loadd::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
}
}
#[simd_test(enable = "amx-tile")]
unsafe fn test_tile_stream_loadd() {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_stream_loadd::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
fn test_tile_stream_loadd() {
unsafe {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_stream_loadd::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
}
}
#[simd_test(enable = "amx-tile")]
unsafe fn test_tile_release() {
_tile_release();
fn test_tile_release() {
unsafe {
_tile_release();
}
}
#[simd_test(enable = "amx-bf16,avx512f")]
unsafe fn test_tile_dpbf16ps() {
_init_amx();
let bf16_1: u16 = _mm_cvtness_sbh(1.0).to_bits();
let bf16_2: u16 = _mm_cvtness_sbh(2.0).to_bits();
let ones: [u8; 1024] = transmute([bf16_1; 512]);
let twos: [u8; 1024] = transmute([bf16_2; 512]);
let mut res = [[0f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbf16ps::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[64f32; 16]; 16]);
fn test_tile_dpbf16ps() {
unsafe {
_init_amx();
let bf16_1: u16 = _mm_cvtness_sbh(1.0).to_bits();
let bf16_2: u16 = _mm_cvtness_sbh(2.0).to_bits();
let ones: [u8; 1024] = transmute([bf16_1; 512]);
let twos: [u8; 1024] = transmute([bf16_2; 512]);
let mut res = [[0f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbf16ps::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[64f32; 16]; 16]);
}
}
#[simd_test(enable = "amx-int8")]
unsafe fn test_tile_dpbssd() {
_init_amx();
let ones = [-1_i8; 1024];
let twos = [-2_i8; 1024];
let mut res = [[0_i32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
_tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
_tile_dpbssd::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[128_i32; 16]; 16]);
fn test_tile_dpbssd() {
unsafe {
_init_amx();
let ones = [-1_i8; 1024];
let twos = [-2_i8; 1024];
let mut res = [[0_i32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
_tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
_tile_dpbssd::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[128_i32; 16]; 16]);
}
}
#[simd_test(enable = "amx-int8")]
unsafe fn test_tile_dpbsud() {
_init_amx();
let ones = [-1_i8; 1024];
let twos = [2_u8; 1024];
let mut res = [[0_i32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbsud::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[-128_i32; 16]; 16]);
fn test_tile_dpbsud() {
unsafe {
_init_amx();
let ones = [-1_i8; 1024];
let twos = [2_u8; 1024];
let mut res = [[0_i32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbsud::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[-128_i32; 16]; 16]);
}
}
#[simd_test(enable = "amx-int8")]
unsafe fn test_tile_dpbusd() {
_init_amx();
let ones = [1_u8; 1024];
let twos = [-2_i8; 1024];
let mut res = [[0_i32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
_tile_dpbusd::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[-128_i32; 16]; 16]);
fn test_tile_dpbusd() {
unsafe {
_init_amx();
let ones = [1_u8; 1024];
let twos = [-2_i8; 1024];
let mut res = [[0_i32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
_tile_dpbusd::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[-128_i32; 16]; 16]);
}
}
#[simd_test(enable = "amx-int8")]
unsafe fn test_tile_dpbuud() {
_init_amx();
let ones = [1_u8; 1024];
let twos = [2_u8; 1024];
let mut res = [[0_i32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbuud::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[128_i32; 16]; 16]);
fn test_tile_dpbuud() {
unsafe {
_init_amx();
let ones = [1_u8; 1024];
let twos = [2_u8; 1024];
let mut res = [[0_i32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbuud::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[128_i32; 16]; 16]);
}
}
#[simd_test(enable = "amx-fp16")]
unsafe fn test_tile_dpfp16ps() {
_init_amx();
let ones = [1f16; 512];
let twos = [2f16; 512];
let mut res = [[0f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
_tile_dpfp16ps::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[64f32; 16]; 16]);
fn test_tile_dpfp16ps() {
unsafe {
_init_amx();
let ones = [1f16; 512];
let twos = [2f16; 512];
let mut res = [[0f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
_tile_dpfp16ps::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[64f32; 16]; 16]);
}
}
#[simd_test(enable = "amx-complex")]
unsafe fn test_tile_cmmimfp16ps() {
_init_amx();
let ones = [1f16; 512];
let twos = [2f16; 512];
let mut res = [[0f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
_tile_cmmimfp16ps::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[64f32; 16]; 16]);
fn test_tile_cmmimfp16ps() {
unsafe {
_init_amx();
let ones = [1f16; 512];
let twos = [2f16; 512];
let mut res = [[0f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
_tile_cmmimfp16ps::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[64f32; 16]; 16]);
}
}
#[simd_test(enable = "amx-complex")]
unsafe fn test_tile_cmmrlfp16ps() {
_init_amx();
let ones = [1f16; 512];
let twos = [2f16; 512];
let mut res = [[0f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
_tile_cmmrlfp16ps::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[0f32; 16]; 16]);
fn test_tile_cmmrlfp16ps() {
unsafe {
_init_amx();
let ones = [1f16; 512];
let twos = [2f16; 512];
let mut res = [[0f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
_tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
_tile_cmmrlfp16ps::<0, 1, 2>();
_tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
_tile_release();
assert_eq!(res, [[0f32; 16]; 16]);
}
}
const BF8_ONE: u8 = 0x3c;
@ -850,223 +880,245 @@ mod tests {
const HF8_TWO: u8 = 0x40;
#[simd_test(enable = "amx-fp8")]
unsafe fn test_tile_dpbf8ps() {
_init_amx();
let ones = [BF8_ONE; 1024];
let twos = [BF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
fn test_tile_dpbf8ps() {
unsafe {
_init_amx();
let ones = [BF8_ONE; 1024];
let twos = [BF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
}
}
#[simd_test(enable = "amx-fp8")]
unsafe fn test_tile_dpbhf8ps() {
_init_amx();
let ones = [BF8_ONE; 1024];
let twos = [HF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbhf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
fn test_tile_dpbhf8ps() {
unsafe {
_init_amx();
let ones = [BF8_ONE; 1024];
let twos = [HF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbhf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
}
}
#[simd_test(enable = "amx-fp8")]
unsafe fn test_tile_dphbf8ps() {
_init_amx();
let ones = [HF8_ONE; 1024];
let twos = [BF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dphbf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
fn test_tile_dphbf8ps() {
unsafe {
_init_amx();
let ones = [HF8_ONE; 1024];
let twos = [BF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dphbf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
}
}
#[simd_test(enable = "amx-fp8")]
unsafe fn test_tile_dphf8ps() {
_init_amx();
let ones = [HF8_ONE; 1024];
let twos = [HF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dphf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
fn test_tile_dphf8ps() {
unsafe {
_init_amx();
let ones = [HF8_ONE; 1024];
let twos = [HF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dphf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
}
}
#[simd_test(enable = "amx-movrs")]
unsafe fn test_tile_loaddrs() {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
fn test_tile_loaddrs() {
unsafe {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
}
}
#[simd_test(enable = "amx-movrs")]
unsafe fn test_tile_stream_loaddrs() {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_stream_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
}
#[simd_test(enable = "amx-avx512,avx10.2")]
unsafe fn test_tile_movrow() {
_init_amx();
let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_movrow::<0>(i);
assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
fn test_tile_stream_loaddrs() {
unsafe {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_stream_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
}
}
#[simd_test(enable = "amx-avx512,avx10.2")]
unsafe fn test_tile_cvtrowd2ps() {
_init_amx();
let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
fn test_tile_movrow() {
unsafe {
_init_amx();
let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_cvtrowd2ps::<0>(i);
assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_movrow::<0>(i);
assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
}
}
}
#[simd_test(enable = "amx-avx512,avx10.2")]
unsafe fn test_tile_cvtrowps2phh() {
_init_amx();
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
fn test_tile_cvtrowd2ps() {
unsafe {
_init_amx();
let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_cvtrowps2phh::<0>(i);
assert_eq!(
*row.as_f16x32().as_array(),
array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_cvtrowd2ps::<0>(i);
assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
}
}
}
#[simd_test(enable = "amx-avx512,avx10.2")]
unsafe fn test_tile_cvtrowps2phl() {
_init_amx();
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
fn test_tile_cvtrowps2phh() {
unsafe {
_init_amx();
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_cvtrowps2phl::<0>(i);
assert_eq!(
*row.as_f16x32().as_array(),
array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_cvtrowps2phh::<0>(i);
assert_eq!(
*row.as_f16x32().as_array(),
array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
);
}
}
}
#[simd_test(enable = "amx-avx512,avx10.2")]
fn test_tile_cvtrowps2phl() {
unsafe {
_init_amx();
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_cvtrowps2phl::<0>(i);
assert_eq!(
*row.as_f16x32().as_array(),
array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
);
}
}
}
#[simd_test(enable = "amx-tf32")]
unsafe fn test_tile_mmultf32ps() {
_init_amx();
let a: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
let b: [[f32; 16]; 16] = [array::from_fn(|j| j as _); _];
let mut res = [[0.0; 16]; 16];
fn test_tile_mmultf32ps() {
unsafe {
_init_amx();
let a: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
let b: [[f32; 16]; 16] = [array::from_fn(|j| j as _); _];
let mut res = [[0.0; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(a.as_ptr().cast(), 64);
_tile_loadd::<2>(b.as_ptr().cast(), 64);
_tile_mmultf32ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(a.as_ptr().cast(), 64);
_tile_loadd::<2>(b.as_ptr().cast(), 64);
_tile_mmultf32ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
let expected = array::from_fn(|i| array::from_fn(|j| 16.0 * i as f32 * j as f32));
assert_eq!(res, expected);
let expected = array::from_fn(|i| array::from_fn(|j| 16.0 * i as f32 * j as f32));
assert_eq!(res, expected);
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -77,12 +77,14 @@ mod tests {
#[simd_test(enable = "fxsr")]
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
unsafe fn test_fxsave64() {
fn test_fxsave64() {
let mut a = FxsaveArea::new();
let mut b = FxsaveArea::new();
fxsr::_fxsave64(a.ptr());
fxsr::_fxrstor64(a.ptr());
fxsr::_fxsave64(b.ptr());
unsafe {
fxsr::_fxsave64(a.ptr());
fxsr::_fxrstor64(a.ptr());
fxsr::_fxsave64(b.ptr());
}
}
}

View file

@ -204,10 +204,12 @@ mod tests {
// Miri cannot support this until it is clear how it fits in the Rust memory model
// (non-temporal store)
#[cfg_attr(miri, ignore)]
unsafe fn test_mm_stream_si64() {
fn test_mm_stream_si64() {
let a: i64 = 7;
let mut mem = boxed::Box::<i64>::new(-1);
_mm_stream_si64(ptr::addr_of_mut!(*mem), a);
unsafe {
_mm_stream_si64(ptr::addr_of_mut!(*mem), a);
}
_mm_sfence();
assert_eq!(a, *mem);
}

View file

@ -132,37 +132,43 @@ mod tests {
#[simd_test(enable = "xsave")]
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
unsafe fn test_xsave64() {
fn test_xsave64() {
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
_xsave64(a.ptr(), m);
_xrstor64(a.ptr(), m);
_xsave64(b.ptr(), m);
unsafe {
_xsave64(a.ptr(), m);
_xrstor64(a.ptr(), m);
_xsave64(b.ptr(), m);
}
}
#[simd_test(enable = "xsave,xsaveopt")]
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
unsafe fn test_xsaveopt64() {
fn test_xsaveopt64() {
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
_xsaveopt64(a.ptr(), m);
_xrstor64(a.ptr(), m);
_xsaveopt64(b.ptr(), m);
unsafe {
_xsaveopt64(a.ptr(), m);
_xrstor64(a.ptr(), m);
_xsaveopt64(b.ptr(), m);
}
}
#[simd_test(enable = "xsave,xsavec")]
#[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
unsafe fn test_xsavec64() {
fn test_xsavec64() {
let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
_xsavec64(a.ptr(), m);
_xrstor64(a.ptr(), m);
_xsavec64(b.ptr(), m);
unsafe {
_xsavec64(a.ptr(), m);
_xrstor64(a.ptr(), m);
_xsavec64(b.ptr(), m);
}
}
}

View file

@ -1,3 +1,43 @@
# Not supported by qemu (will throw illegal instruction)
vamax_f16
vamaxq_f16
vamin_f16
vaminq_f16
vscale_f16
vscale_f32
vscaleq_f16
vscaleq_f32
vscaleq_f64
vluti2_lane_p16
vluti2_lane_p8
vluti2_lane_s16
vluti2_lane_s8
vluti2_lane_u16
vluti2_lane_u8
vluti2q_lane_p16
vluti2q_lane_p8
vluti2q_lane_s16
vluti2q_lane_s8
vluti2q_lane_u16
vluti2_laneq_f16
vluti2_lane_f16
vluti2_laneq_f16
vluti2_laneq_p16
vluti2_laneq_p8
vluti2_laneq_s16
vluti2_laneq_s8
vluti2_laneq_u16
vluti2_laneq_u8
vluti2q_lane_f16
vluti2q_laneq_f16
vluti2q_laneq_p16
vluti2q_laneq_p8
vluti2q_laneq_s16
vluti2q_laneq_s8
vluti2q_laneq_u16
vluti2q_laneq_u8
vluti2q_lane_u8
# Not implemented in stdarch yet
vbfdot_f32
vbfdot_lane_f32
@ -30,18 +70,6 @@ vrnd32x_f64
vrnd32z_f64
vrnd64x_f64
vrnd64z_f64
vluti2_lane_p16
vluti2_lane_p8
vluti2_lane_s16
vluti2_lane_s8
vluti2_lane_u16
vluti2_lane_u8
vluti2q_lane_p16
vluti2q_lane_p8
vluti2q_lane_s16
vluti2q_lane_s8
vluti2q_lane_u16
vluti2q_lane_u8
vluti4q_lane_f16_x2
vluti4q_lane_p16_x2
vluti4q_lane_p8

View file

@ -38,6 +38,45 @@ vusdotq_lane_s32
vusdotq_laneq_s32
# Below are in common to missing_aarch64.txt
# Not supported by qemu (will throw illegal instruction)
vamax_f16
vamaxq_f16
vamin_f16
vaminq_f16
vscale_f16
vscale_f32
vscaleq_f16
vscaleq_f32
vscaleq_f64
vluti2_lane_p16
vluti2_lane_p8
vluti2_lane_s16
vluti2_lane_s8
vluti2_lane_u16
vluti2_lane_u8
vluti2q_lane_p16
vluti2q_lane_p8
vluti2q_lane_s16
vluti2q_lane_s8
vluti2q_lane_u16
vluti2_laneq_f16
vluti2_lane_f16
vluti2_laneq_f16
vluti2_laneq_p16
vluti2_laneq_p8
vluti2_laneq_s16
vluti2_laneq_s8
vluti2_laneq_u16
vluti2_laneq_u8
vluti2q_lane_f16
vluti2q_laneq_f16
vluti2q_laneq_p16
vluti2q_laneq_p8
vluti2q_laneq_s16
vluti2q_laneq_s8
vluti2q_laneq_u16
vluti2q_laneq_u8
vluti2q_lane_u8
# Not implemented in stdarch yet
vbfdot_f32
@ -71,18 +110,6 @@ vrnd32x_f64
vrnd32z_f64
vrnd64x_f64
vrnd64z_f64
vluti2_lane_p16
vluti2_lane_p8
vluti2_lane_s16
vluti2_lane_s8
vluti2_lane_u16
vluti2_lane_u8
vluti2q_lane_p16
vluti2q_lane_p8
vluti2q_lane_s16
vluti2q_lane_s8
vluti2q_lane_u16
vluti2q_lane_u8
vluti4q_lane_f16_x2
vluti4q_lane_p16_x2
vluti4q_lane_p8

View file

@ -0,0 +1,300 @@
# stdarch-gen-arm generator guide
## Running the generator
- Run: `cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec`
```
$ cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.18s
Running `target/debug/stdarch-gen-arm crates/stdarch-gen-arm/spec`
```
## Input/Output
### Input files (intrinsic YAML definitions)
- `crates/stdarch-gen-arm/spec/<feature>/*.spec.yml`
### Output files
- Generated intrinsics:
- `crates/core_arch/src/<arch>/<feature>/generated.rs`
- Generated load/store tests:
- `crates/core_arch/src/<arch>/<feature>/ld_st_tests_<arch>.rs`
- Only generated when `test: { load: <idx> }` or `test: { store: <idx> }` is set for SVE/SVE2 intrinsics.
## `.spec.yml` file anatomy
```
---
Configs
---
Variable definitions
---
Intrinsic definitions
---
```
- If you're new to YAML syntax, consider [reviewing](https://quickref.me/yaml.html) some of the less obvious syntax and features.
- For example, mapping an attribute to a sequence can be done in two different ways:
```yaml
attribute: [item_a, item_b, item_c]
```
or
```yaml
attribute:
- item_a
- item_b
- item_c
```
## Configs
- Mappings defining top-level settings applied to all intrinsics:
- `arch_cfgs`
- Sequence of mappings specifying `arch_name`, `target_feature` (sequence), and `llvm_prefix`.
- `uses_neon_types`(_Optional_)
- A boolean specifying whether to emit NEON type imports in generated code.
- `auto_big_endian`(_Optional_)
- A boolean specifying whether to auto-generate big-endian shuffles when possible.
- `auto_llvm_sign_conversion`(_Optional_)
- A boolean specifying whether to auto-convert LLVM wrapper args to signed types.
## Variable definitions
- Defines YAML anchors/variables to avoid repetition.
- Commonly used for stability attributes, cfgs and target features.
## Intrinsic definitions
### Example
```yaml
- name: "vtst{neon_type[0].no}"
doc: "Signed compare bitwise Test bits nonzero"
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
return_type: "{neon_type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
safety: safe
types:
- [int64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
- [int64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)']
- [poly64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
- [poly64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)']
compose:
- Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}]
- Let: [d, "{type[2]}", "{type[3]}"]
- FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]]
```
### Explanation of fields
- `name`
- The name of the intrinsic
- Often built from a base name followed by a type suffix
- `doc` (_Optional_)
- A string explaining the purpose of the intrinsic
- `static_defs` (_Optional_)
- A sequence of const generics of the format `"const <NAME>: <type>"`
- `arguments`
- A sequence of strings in the format `"<argname>: <argtype>"`
- `return_type` (_Optional_)
- A string specifying the return type. If omitted, the intrinsic returns `()`.
- `attr` (_Optional_)
- A sequence of items defining the attributes to be applied to the intrinsic. Often stability attributes, target features, or `assert_instr` tests. At least one of `attr` or `assert_instr` must be set.
- `target_features` (_Optional_)
- A sequence of target features to enable for this intrinsic (merged with any global `arch_cfgs` settings).
- `assert_instr` (_Optional_)
- A sequence of strings expected to be found in the assembly. Required if `attr` is not set.
- `safety` (_Optional_)
- Use `safe`, or map `unsafe:` to a sequence of unsafety comments:
- `custom: "<string>"`
- `uninitialized`
- `pointer_offset`, `pointer_offset_vnum`, or `dereference` (optionally qualified with `predicated`, `predicated_non_faulting`, or `predicated_first_faulting`)
- `unpredictable_on_fault`
- `non_temporal`
- `neon`
- `no_provenance: "<string>"`
- `substitutions` (_Optional_)
- Mappings of custom wildcard names to either `MatchSize` or `MatchKind` expressions
- `types`
- A sequence or sequence of sequences specifying the types to use when producing each intrinsic variant. These sequences can then be indexed by wildcards.
- `constraints` (_Optional_)
- A sequence of mappings. Each specifies a variable and a constraint. The available mappings are:
- Assert a variable's value exists in a sequence of i32's
- Usage: `{ variable: <name>, any_values: [<i32>,...] }`
- Assert a variable's value exists in a range (inclusive)
- Usage: `{ variable: <name>, range: [<i32>, <i32>] }`
- Assert a variable's value exists in a range via a match (inclusive)
- Usage: `{ variable: <name>, range: <MatchSize returning [i32,i32]> }`
- Assert a variable's value does not exceed the number of elements in a SVE type `<type>`.
- Usage: `{ variable: <name>, sve_max_elems_type: <type> }`
- Assert a variable's value does not exceed the number of elements in a vector type `<type>`.
- Usage: `{ variable: <name>, vec_max_elems_type: <type> }`
- `predication_methods` (_Optional_)
- Configuration for predicate-form variants. Only used when the intrinsic name includes an `_m*_` wildcard (e.g., `{_mx}`, `{_mxz}`).
- `zeroing_method`: Required when requesting `_z`; either `{ drop: <arg> }` to remove an argument and replace it with a zero initialiser, or `{ select: <predicate_var> }` to select zeros into a predicate.
- `dont_care_method`: How `_x` should be implemented (`inferred`, `as_zeroing`, or `as_merging`).
- `compose`
- A sequence of expressions that make up the body of the intrinsic
- `big_endian_inverse` (_Optional_)
- A boolean, default false. If true, generates two implementations of each intrinsic variant, one for each endianness, and attempts to automatically generate the required bit swizzles
- `visibility` (_Optional_)
- Function visibility. One of `public` (default) or `private`.
- `n_variant_op` (_Optional_)
- Enables generation of an `_n` variant when the intrinsic name includes the `{_n}` wildcard. Set to the operand name that should be splattered for the `_n` form.
- `test` (_Optional_)
- When set, load/store tests are automatically generated.
- A mapping of either `load` or `store` to a number that indexes `types` to specify the type that the test should be addressing in memory.
### Expressions
#### Common
- `Let`
- Defines a variable
- Usage: `Let: [<variable>, <type(optional)>, <expression>]`
- `Const`
- Defines a const
- Usage: `Const: [<variable>, <type>, <expression>]`
- `Assign`
- Performs variable assignment
- Usage: `Assign: [<variable>, <expression>]`
- `FnCall`
- Performs a function call
- Usage: `FnCall: [<function pointer: expression>, [<argument: expression>, ... ], [<turbofish argument: expression>, ...](optional), <unsafe wrapper(optional): bool>]`
- `MacroCall`
- Performs a macro call
- Usage: `MacroCall: [<macro name>, <token stream>]`
- `MethodCall`
- Performs a method call
- Usage: `MethodCall: [<object: expression>, <method name>, [<argument: expression>, ... ]]`
- `LLVMLink`
- Creates an LLVM link and stores the function's name in the wildcard `{llvm_link}` for later use in subsequent expressions.
- If left unset, the arguments and return type inherit from the intrinsic's signature by default. The links will also be set automatically if unset.
- Usage:
```yaml
LLVMLink:
name: <name>
arguments: [<expression>, ... ](optional)
return_type: <return type>(optional)
links: (optional)
- link: <link>
arch: <arch>
- ...
```
- `Identifier`
- Emits a symbol. Prepend with a `$` to treat it as a scope variable, which engages variable tracking and enables inference. For example, `my_function_name` for a generic symbol or `$my_variable` for a variable.
- Usage `Identifier: [<symbol name>, <Variable|Symbol>]`
- `CastAs`
- Casts an expression to an unchecked type
- Usage: `CastAs: [<expression>, <type>]`
- `MatchSize`
- Allows for conditional generation depending on the size of a specified type
- Usage:
```yaml
MatchSize:
- <type>
- default: <expression>
byte(optional): <expression>
halfword(optional): <expression>
doubleword(optional): <expression>
```
- `MatchKind`
- Allows for conditional generation depending on the kind of a specified type
```yaml
MatchKind:
- <type>
- default: <expression>
float(optional): <expression>
unsigned(optional): <expression>
```
#### Rarely Used
- `IntConstant`
- Constant signed integer expression
- Usage: `IntConstant: <i32>`
- `FloatConstant`
- Constant floating-point expression
- Usage: `FloatConstant: <f32>`
- `BoolConstant`
- Constant boolean expression
- Usage: `BoolConstant: <bool>`
- `Array`
- An array of expressions
- Usage: `Array: [<expression>, ...]`
- `SvUndef`
- Returns the LLVM `undef` symbol
- Usage: `SvUndef`
- `Multiply`
- Simply `*`
- Usage: `Multiply: [<expression>, <expression>]`
- `Xor`
- Simply `^`
- Usage: `Xor: [<expression>, <expression>]`
- `ConvertConst`
- Converts the specified constant to the specified type's kind
- Usage: `ConvertConst: [<type>, <i32>]`
- `Type`
- Yields the given type in the Rust representation
- Usage: `Type: [<type>]`
### Wildstrings
- Wildstrings let you take advantage of wildcards.
- For example, they are often used in intrinsic names `name: "vtst{neon_type[0].no}"`
- As shown above, wildcards are identified by the surrounding curly brackets.
- Double curly brackets can be used to escape wildcard functionality if you need literal curly brackets in the generated intrinsic.
### Wildcards
Wildcards are heavily used in the spec. They let you write generalised definitions for a group of intrinsics that generate multiple variants. The wildcard itself is replaced with the relevant string in each variant.
Ignoring endianness, for each row in the `types` field of an intrinsic in the spec, a variant of the intrinsic will be generated. That row's contents can be indexed by the wildcards. Below is the behaviour of each wildcard.
- `type[<index: usize>]`
- Replaced in each variant with the value in the indexed position in the relevant row of the `types` field.
- For unnested sequences of `types` (i.e., `types` is a sequence where each element is a single item, not another sequence), the square brackets can be omitted. Simply: `type`
- `neon_type[<index: usize>]`
- Extends the behaviour of `type` with some NEON-specific features and inference.
- Tuples: This wildcard can also be written as `neon_type_x<n>` where `n` is in the set `{2,3,4}`. This generates the `n`-tuple variant of the (inferred) NEON type.
- Suffixes: These modify the behaviour of the wildcard from simple substitution.
- `no` - normal behaviour. Tries to do as much work as it can for you, inferring when to emit:
- Regular type-size suffixes: `_s8`, `_u16`, `_f32`, ...
- `q` variants for double-width (128b) vector types: `q_s8`, `q_u16`, `q_f32`, ...
- `_x<n>` variants for tuple vector types: `_s8_x2`, `_u32_x3`, `_f64_x4`, ...
- As well as any combination of the above: `q_s16_x16` ...
- Most of the other suffixes modify the normal behaviour by disabling features or adding new ones. (See table below)
- `sve_type[<index: usize>]`
- Similar to `neon_type`, but without the suffixes.
- `size[<index: usize>]`
- The size (in bits) of the indexed type.
- `size_minus_one[<index: usize>]`
- Emits the size (in bits) of the indexed type minus one.
- `size_literal[<index: usize>]`
- The literal representation of the indexed type.
- `b`: byte, `h`: halfword, `w`: word, or `d`: double.
- `type_kind[<index: usize>]`
- The literal representation of the indexed type's kind.
- `f`: float, `s`: signed, `u`: unsigned, `p`: polynomial, `b`: boolean.
- `size_in_bytes_log2[<index: usize>]`
- Log2 of the size of the indexed type in *bytes*.
- `predicate[<index: usize>]`
- SVE predicate vector type inferred from the indexed type.
- `max_predicate`
- The same as predicate, but uses the largest type in the relevant `types` sequence/row.
- `_n`
- Emits the current N-variant suffix when `n_variant_op` is configured.
- `<wildcard> as <type>`
- If `<wildcard>` evaluates to a vector, it produces a vector of the same shape, but with `<type>` as the base type.
- `llvm_link`
- If the `LLVMLink` mapping has been set for an intrinsic, this will give the name of the link.
- `_m*`
- Predicate form masks. Use wildcards such as `{_mx}` or `{_mxz}` to expand merging/don't-care/zeroing variants according to the mask.
- `<custom>`
- You may simply call upon wildcards defined under `substitutions`.
### neon_type suffixes
| suffix | implication |
| ----------------- | --------------------------------------------- |
| `.no` | Normal |
| `.noq` | Never include `q`s |
| `.nox` | Never include `_x<n>`s |
| `.N` | Include `_n_`, e.g., `_n_s8` |
| `.noq_N` | Include `_n_`, but never `q`s |
| `.dup` | Include `_dup_`, e.g., `_dup_s8` |
| `.dup_nox` | Include `_dup_` but never `_x<n>`s |
| `.lane` | Include `_lane_`, e.g., `_lane_s8` |
| `.lane_nox` | Include `_lane_`, but never `_x<n>`s |
| `.rot90` | Include `_rot90_`, e.g., `_rot90_s8` |
| `.rot180` | Include `_rot180_`, e.g., `_rot180_s8` |
| `.rot270` | Include `_rot270_`, e.g., `_rot270_s8` |
| `.rot90_lane` | Include `_rot90_lane_` |
| `.rot180_lane` | Include `_rot180_lane_` |
| `.rot270_lane` | Include `_rot270_lane_` |
| `.rot90_laneq` | Include `_rot90_laneq_` |
| `.rot180_laneq` | Include `_rot180_laneq_` |
| `.rot270_laneq` | Include `_rot270_laneq_` |
| `.base` | Produce only the size, e.g., `8`, `16` |
| `.u` | Produce the type's unsigned equivalent |
| `.laneq_nox` | Include `_laneq_`, but never `_x<n>`s |
| `.tuple` | Produce only the size of the tuple, e.g., `3` |
| `.base_byte_size` | Produce only the size in bytes. |

View file

@ -66,6 +66,14 @@ neon-unstable-feat-lut: &neon-unstable-feat-lut
aarch64-stable-jscvt: &aarch64-stable-jscvt
FnCall: [stable, ['feature = "stdarch_aarch64_jscvt"', 'since = "CURRENT_RUSTC_VERSION"']]
# #[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
neon-unstable-feat-lrcpc3: &neon-unstable-feat-lrcpc3
FnCall: [unstable, ['feature = "stdarch_neon_feat_lrcpc3"', 'issue = "none"']]
# #[unstable(feature = "stdarch_neon_fp8", issue = "none")]
neon-unstable-fp8: &neon-unstable-fp8
FnCall: [unstable, ['feature = "stdarch_neon_fp8"', 'issue = "none"']]
# #[cfg(target_endian = "little")]
little-endian: &little-endian
FnCall: [cfg, ['target_endian = "little"']]
@ -4398,6 +4406,116 @@ intrinsics:
- - FnCall: [transmute, [a]]
- FnCall: [transmute, [b]]
- name: "vldap1{neon_type[1].lane_nox}"
doc: "Load-acquire RCpc one single-element structure to one lane of one register"
arguments: ["ptr: {type[0]}", "src: {type[1]}"]
static_defs: ["const LANE: i32"]
return_type: "{type[1]}"
safety:
unsafe: [neon]
attr:
- FnCall: [target_feature, ['enable = "neon,rcpc3"']]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [ldap1, 'LANE = 0']]}]]
- FnCall: [rustc_legacy_const_generics, ["2"]]
- *neon-unstable-feat-lrcpc3
types:
- ['*const i64', int64x1_t, 'static_assert!', 'LANE == 0']
- ['*const i64', int64x2_t,'static_assert_uimm_bits!', 'LANE, 1']
compose:
- FnCall: ['{type[2]}', ['{type[3]}']]
- Let:
- "atomic_src"
- FnCall: ["crate::sync::atomic::AtomicI64::from_ptr", ['ptr as *mut i64']]
- Identifier: [';', Symbol]
- FnCall:
- simd_insert!
- - src
- "LANE as u32"
- MethodCall:
- "atomic_src"
- load
- ["crate::sync::atomic::Ordering::Acquire"]
- name: "vldap1{neon_type[1].lane_nox}"
doc: "Load-acquire RCpc one single-element structure to one lane of one register"
arguments: ["ptr: {type[0]}","src: {type[1]}"]
static_defs: ["const LANE: i32"]
return_type: "{type[1]}"
safety:
unsafe: [neon]
attr:
- FnCall: [rustc_legacy_const_generics, ["2"]]
- FnCall: [target_feature, ['enable = "neon,rcpc3"']]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [ldap1, 'LANE = 0']]}]]
- *neon-unstable-feat-lrcpc3
types:
- ['*const u64', uint64x1_t,'static_assert!', 'LANE == 0','']
#- ['*const f64', float64x1_t,'static_assert!', 'LANE == 0',''] # Fails due to bad IR gen from rust
- ['*const p64', poly64x1_t,'static_assert!', 'LANE == 0','']
- ['*const u64', uint64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
- ['*const f64', float64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
- ['*const p64', poly64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
compose:
- FnCall: ['{type[2]}', ['{type[3]}']]
- FnCall:
- transmute
- - FnCall:
- 'vldap1{type[4]}_lane_s64::<LANE>'
- - "ptr as *mut i64"
- FnCall: [transmute,[src]]
- name: "vstl1{neon_type[1].lane_nox}"
doc: "Store-Release a single-element structure from one lane of one register."
arguments: ["ptr: {type[0]}", "val: {neon_type[1]}"]
static_defs: ["const LANE: i32"]
safety: safe
attr:
- FnCall: [target_feature, ['enable = "neon,rcpc3"']]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [stl1, 'LANE = 0']]}]]
- FnCall: [rustc_legacy_const_generics, ["2"]]
- *neon-unstable-feat-lrcpc3
types:
- ['*mut i64', int64x1_t,'static_assert!', 'LANE == 0']
- ['*mut i64', int64x2_t,'static_assert_uimm_bits!', 'LANE, 1']
compose:
- FnCall: ['{type[2]}', ['{type[3]}']]
- Let:
- "atomic_dst"
- "ptr as *mut crate::sync::atomic::AtomicI64"
- Identifier: [';', Symbol]
- Let:
- "lane"
- i64
- FnCall: [simd_extract!, [val, 'LANE as u32']]
- MethodCall:
- "(*atomic_dst)"
- store
- [FnCall: [transmute, [lane]],"crate::sync::atomic::Ordering::Release"]
- name: "vstl1{neon_type[1].lane_nox}"
doc: "Store-Release a single-element structure from one lane of one register."
arguments: ["ptr: {type[0]}", "val: {neon_type[1]}"]
static_defs: ["const LANE: i32"]
safety: safe
attr:
- FnCall: [target_feature, ['enable = "neon,rcpc3"']]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [stl1, 'LANE = 0']]}]]
- FnCall: [rustc_legacy_const_generics, ["2"]]
- *neon-unstable-feat-lrcpc3
types:
- ['*mut u64', uint64x1_t, 'static_assert!', 'LANE == 0','']
- ['*mut f64', float64x1_t,'static_assert!', 'LANE == 0','']
- ['*mut p64', poly64x1_t, 'static_assert!', 'LANE == 0','']
- ['*mut u64', uint64x2_t ,'static_assert_uimm_bits!', 'LANE, 1','q']
- ['*mut f64', float64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
- ['*mut p64', poly64x2_t ,'static_assert_uimm_bits!', 'LANE, 1','q']
compose:
- FnCall: ['{type[2]}', ['{type[3]}']]
- FnCall:
- "vstl1{type[4]}_lane_s64::<LANE>"
- - "ptr as *mut i64"
- FnCall: [transmute, [val]]
- name: "vst1{neon_type[1].lane_nox}"
doc: "Store multiple single-element structures from one, two, three, or four registers"
arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
@ -5081,56 +5199,6 @@ intrinsics:
arch: aarch64,arm64ec
- FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
- name: "vusdot{neon_type[0].laneq_nox}"
doc: "Dot product index form with unsigned and signed integers"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
return_type: "{neon_type[0]}"
attr:
- *neon-i8mm
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usdot, 'LANE = 3']]}]]
- FnCall: [rustc_legacy_const_generics, ['3']]
- FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
static_defs: ["const LANE: i32"]
safety: safe
types:
- [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]','']
- [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
- Let: [c, int32x4_t, {FnCall: ['vreinterpretq_s32_s8', [c]]}]
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]]
- name: "vsudot{neon_type[0].laneq_nox}"
doc: "Dot product index form with signed and unsigned integers"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
return_type: "{neon_type[0]}"
attr:
- *neon-i8mm
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
- FnCall: [rustc_legacy_const_generics, ['3']]
- FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
static_defs: ["const LANE: i32"]
safety: safe
types:
- [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t]
- [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, 2]]
- Let:
- c
- uint32x4_t
- FnCall: [transmute, [c]]
- Let:
- c
- "{type[4]}"
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
- FnCall:
- "vusdot{neon_type[0].no}"
- - a
- FnCall: [transmute, [c]]
- b
- name: "vmul{neon_type.no}"
doc: Multiply
arguments: ["a: {neon_type}", "b: {neon_type}"]
@ -6552,7 +6620,6 @@ intrinsics:
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
- FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
- name: "vcmla{neon_type[0].rot270_lane}"
doc: Floating-point complex multiply accumulate
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
@ -6574,66 +6641,6 @@ intrinsics:
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
- FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
- name: "vdot{neon_type[0].laneq_nox}"
doc: Dot product arithmetic (indexed)
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
return_type: "{neon_type[0]}"
static_defs: ["const LANE: i32"]
attr:
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]]
- FnCall: [rustc_legacy_const_generics, ['3']]
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
safety: safe
types:
- [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]', '']
- [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
- Let:
- c
- "{neon_type[3]}"
- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
- Let:
- c
- "{neon_type[0]}"
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
- FnCall:
- "vdot{neon_type[0].no}"
- - a
- b
- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
- name: "vdot{neon_type[0].laneq_nox}"
doc: Dot product arithmetic (indexed)
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
return_type: "{neon_type[0]}"
static_defs: ["const LANE: i32"]
attr:
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]]
- FnCall: [rustc_legacy_const_generics, ['3']]
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
safety: safe
types:
- [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]','']
- [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
- Let:
- c
- "{neon_type[3]}"
- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
- Let:
- c
- "{neon_type[0]}"
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
- FnCall:
- "vdot{neon_type[0].no}"
- - a
- b
- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
- name: "vmax{neon_type.no}"
doc: Maximum (vector)
arguments: ["a: {neon_type}", "b: {neon_type}"]
@ -13966,10 +13973,12 @@ intrinsics:
return_type: "{neon_type}"
attr:
- FnCall: [target_feature, ['enable = "neon,faminmax"']]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [famax]]}]]
- FnCall: [unstable, ['feature = "faminmax"', 'issue = "137933"']]
safety: safe
types:
- float16x4_t
- float16x8_t
- float32x2_t
- float32x4_t
- float64x2_t
@ -13986,10 +13995,12 @@ intrinsics:
return_type: "{neon_type}"
attr:
- FnCall: [target_feature, ['enable = "neon,faminmax"']]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [famin]]}]]
- FnCall: [unstable, ['feature = "faminmax"', 'issue = "137933"']]
safety: safe
types:
- float16x4_t
- float16x8_t
- float32x2_t
- float32x4_t
- float64x2_t
@ -14030,36 +14041,101 @@ intrinsics:
arch: aarch64,arm64ec
- FnCall: ['_vluti2{neon_type[0].lane_nox}', [a, b, LANE]]
- name: "vluti2{neon_type[0].laneq_nox}"
doc: "Lookup table read with 2-bit indices"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
return_type: "{neon_type[2]}"
attr:
- FnCall: [target_feature, ['enable = {type[4]}']]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'INDEX = 1']]}]]
- *neon-unstable-feat-lut
- FnCall: [rustc_legacy_const_generics, ['2']]
static_defs: ["const INDEX: i32"]
safety:
unsafe: [neon]
types:
- [int8x8_t, uint8x16_t, int8x16_t, 'INDEX >= 0 && INDEX <= 3', '"neon,lut"']
- [int8x16_t, uint8x16_t, int8x16_t, 'INDEX >= 0 && INDEX <= 3', '"neon,lut"']
- [int16x4_t, uint8x16_t, int16x8_t, 'INDEX >= 0 && INDEX <= 7', '"neon,lut"']
- [int16x8_t, uint8x16_t, int16x8_t, 'INDEX >= 0 && INDEX <= 7', '"neon,lut"']
compose:
- FnCall: ['static_assert!', ['{type[3]}']]
- LLVMLink:
name: "vluti2{neon_type[0].laneq_nox}"
arguments:
- 'a: {neon_type[0]}'
- 'b: {neon_type[1]}'
- 'n: i32'
links:
- link: "llvm.aarch64.neon.vluti2.laneq.{neon_type[2]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ['_vluti2{neon_type[0].laneq_nox}', [a, b, INDEX]]
- name: "vluti2{neon_type[0].lane_nox}"
doc: "Lookup table read with 2-bit indices"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
return_type: "{neon_type[2]}"
attr:
- FnCall: [target_feature, ['enable = "neon,lut"']]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'INDEX = 1']]}]]
- *neon-unstable-feat-lut
- FnCall: [rustc_legacy_const_generics, ['2']]
static_defs: ["const LANE: i32"]
static_defs: ["const INDEX: i32"]
safety:
unsafe: [neon]
types:
- [uint8x8_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
- [uint8x16_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
- [poly8x8_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
- [poly8x16_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
- [uint16x4_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
- [uint16x8_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
- [poly16x4_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
- [poly16x8_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
- [uint8x8_t, uint8x8_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x8_t']
- [uint8x16_t, uint8x8_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x16_t']
- [poly8x8_t, uint8x8_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x8_t']
- [poly8x16_t, uint8x8_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x16_t']
- [uint16x4_t, uint8x8_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x4_t']
- [uint16x8_t, uint8x8_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x8_t']
- [poly16x4_t, uint8x8_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x4_t']
- [poly16x8_t, uint8x8_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x8_t']
- [float16x4_t, uint8x8_t, float16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x4_t']
- [float16x8_t, uint8x8_t, float16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x8_t']
compose:
- FnCall: ['static_assert!', ['{type[3]}']]
- FnCall:
- transmute
- - FnCall:
- 'vluti2{neon_type[4].lane_nox}::<LANE>'
- 'vluti2{neon_type[4].lane_nox}::<INDEX>'
- - FnCall: [transmute, [a]]
- b
- name: "vluti2{neon_type[0].laneq_nox}"
doc: "Lookup table read with 2-bit indices"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
return_type: "{neon_type[2]}"
attr:
- FnCall: [target_feature, ['enable = "neon,lut"']]
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'INDEX = 1']]}]]
- *neon-unstable-feat-lut
- FnCall: [rustc_legacy_const_generics, ['2']]
static_defs: ["const INDEX: i32"]
safety:
unsafe: [neon]
types:
- [uint8x8_t, uint8x16_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x8_t']
- [uint8x16_t, uint8x16_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x16_t']
- [poly8x8_t, uint8x16_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x8_t']
- [poly8x16_t, uint8x16_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x16_t']
- [uint16x4_t, uint8x16_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x4_t']
- [uint16x8_t, uint8x16_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x8_t']
- [poly16x4_t, uint8x16_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x4_t']
- [poly16x8_t, uint8x16_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x8_t']
- [float16x4_t, uint8x16_t, float16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x4_t']
- [float16x8_t, uint8x16_t, float16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x8_t']
compose:
- FnCall: ['static_assert!', ['{type[3]}']]
- FnCall:
- transmute
- - FnCall:
- 'vluti2{neon_type[4].laneq_nox}::<INDEX>'
- - FnCall: [transmute, [a]]
- b
- name: "vluti4{neon_type[0].lane_nox}"
doc: "Lookup table read with 4-bit indices"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
@ -14268,6 +14344,28 @@ intrinsics:
- - FnCall: [transmute, [a]]
- b
- name: "vscale{neon_type[0].no}"
doc: "Multi-vector floating-point adjust exponent"
arguments: ["vn: {type[0]}", "vm: {type[1]}"]
return_type: "{type[0]}"
attr:
- *neon-unstable-fp8
- FnCall: [target_feature, ['enable = "neon,fp8"']]
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [fscale]]}]]
safety: safe
types:
- [float16x4_t, int16x4_t]
- [float16x8_t, int16x8_t]
- [float32x2_t, int32x2_t]
- [float32x4_t, int32x4_t]
- [float64x2_t, int64x2_t]
compose:
- LLVMLink:
name: "vscale{neon_type[0].no}"
links:
- link: "llvm.aarch64.neon.fp8.fscale.{neon_type[0]}"
arch: aarch64,arm64ec
- name: "__jcvt"
doc: "Floating-point JavaScript convert to signed fixed-point, rounding toward zero"
arguments: ["a: {type}"]

View file

@ -7096,6 +7096,132 @@ intrinsics:
- FnCall: [simd_cast, [b]]
- FnCall: [simd_sub, [c, d]]
- name: "vusdot{neon_type[0].laneq_nox}"
doc: "Dot product index form with unsigned and signed integers"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
return_type: "{neon_type[0]}"
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
attr:
- *neon-v8
- *neon-i8mm
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot, 'LANE = 3']]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot, 'LANE = 3']]}]]
- FnCall: [rustc_legacy_const_generics, ['3']]
- FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
static_defs: ["const LANE: i32"]
safety: safe
types:
- [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]','']
- [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
- Let: [c, int32x4_t, {FnCall: [transmute, [c]]}]
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]]
#- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]]
- name: "vsudot{neon_type[0].laneq_nox}"
doc: "Dot product index form with signed and unsigned integers"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
return_type: "{neon_type[0]}"
attr:
- *neon-v8
- *neon-i8mm
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 1']]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
- FnCall: [rustc_legacy_const_generics, ['3']]
- FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
static_defs: ["const LANE: i32"]
safety: safe
types:
- [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t]
- [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, 2]]
- Let:
- c
- uint32x4_t
- FnCall: [transmute, [c]]
- Let:
- c
- "{type[4]}"
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
- FnCall:
- "vusdot{neon_type[0].no}"
- - a
- FnCall: [transmute, [c]]
- b
- name: "vdot{neon_type[0].laneq_nox}"
doc: Dot product arithmetic (indexed)
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
return_type: "{neon_type[0]}"
static_defs: ["const LANE: i32"]
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
attr:
- *neon-v8
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsdot, 'LANE = 0']]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]]
- FnCall: [rustc_legacy_const_generics, ['3']]
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
safety: safe
types:
- [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]', '']
- [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
- Let:
- c
- "{neon_type[3]}"
- FnCall: [transmute, [c]]
#- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
- Let:
- c
- "{neon_type[0]}"
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
- FnCall:
- "vdot{neon_type[0].no}"
- - a
- b
- FnCall: [transmute, [c]]
#- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
- name: "vdot{neon_type[0].laneq_nox}"
doc: Dot product arithmetic (indexed)
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
return_type: "{neon_type[0]}"
static_defs: ["const LANE: i32"]
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
attr:
- *neon-v8
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vudot, 'LANE = 0']]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]]
- FnCall: [rustc_legacy_const_generics, ['3']]
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
safety: safe
types:
- [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]','']
- [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
- Let:
- c
- "{neon_type[3]}"
- FnCall: [transmute, [c]]
#- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
- Let:
- c
- "{neon_type[0]}"
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
- FnCall:
- "vdot{neon_type[0].no}"
- - a
- b
- FnCall: [transmute, [c]]
#- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
- name: "vdot{neon_type[0].no}"
doc: Dot product arithmetic (vector)
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
@ -8785,7 +8911,6 @@ intrinsics:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
- *neon-fp16
- *neon-not-arm-stable-fp16
- *neon-cfg-arm-unstable
- *target-not-arm64ec
@ -8849,7 +8974,6 @@ intrinsics:
- *neon-v8
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
- *neon-fp16
- *neon-not-arm-stable-fp16
- *neon-cfg-arm-unstable
- *target-not-arm64ec

View file

@ -840,7 +840,7 @@ impl fmt::Display for UnsafetyComment {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Custom(s) => s.fmt(f),
Self::Neon => write!(f, "Neon instrinsic unsafe"),
Self::Neon => write!(f, "Neon intrinsic unsafe"),
Self::Uninitialized => write!(
f,
"This creates an uninitialized value, and may be unsound (like \

File diff suppressed because it is too large Load diff

View file

@ -25,7 +25,6 @@ backtrace = ["std/backtrace"]
backtrace-trace-only = ["std/backtrace-trace-only"]
compiler-builtins-c = ["std/compiler-builtins-c"]
compiler-builtins-mem = ["std/compiler-builtins-mem"]
compiler-builtins-no-f16-f128 = ["std/compiler-builtins-no-f16-f128"]
debug_refcell = ["std/debug_refcell"]
llvm-libunwind = ["std/llvm-libunwind"]
system-llvm-libunwind = ["std/system-llvm-libunwind"]

View file

@ -1,9 +1,9 @@
//@ has 'glob_shadowing/index.html'
//@ count - '//dt' 6
//@ !has - '//dd' 'sub1::describe'
//@ count - '//dt' 7
//@ !has - '//dd' 'sub1::describe1'
//@ has - '//dd' 'sub2::describe'
//@ !has - '//dd' 'sub1::describe2'
//@ has - '//dd' 'sub1::describe2'
//@ !has - '//dd' 'sub1::prelude'
//@ has - '//dd' 'mod::prelude'
@ -18,7 +18,7 @@
mod sub1 {
// this should be shadowed by sub2::describe
/// sub1::describe
/// sub1::describe1
pub fn describe() -> &'static str {
"sub1::describe"
}
@ -33,7 +33,9 @@ mod sub1 {
pub struct Foo;
// this should be shadowed,
// because both sub1::describe2 and sub3::describe2 are from glob reexport
// because both sub1::describe2 and sub3::describe2 are from glob reexport,
// but it is still usable from other crates under the `ambiguous_glob_imports` lint,
// so it is reachable and documented
/// sub1::describe2
pub fn describe2() -> &'static str {
"sub1::describe2"

View file

@ -1,7 +1,9 @@
// Regression test for https://github.com/rust-lang/rust/issues/100973
// Update: the rules has changed after #147984, one of the colliding items is now available
// from other crates under a deprecation lint.
//@ set m1 = "$.index[?(@.name == 'm1' && @.inner.module)].id"
//@ is "$.index[?(@.name == 'm1')].inner.module.items" []
//@ is "$.index[?(@.name == 'm1')].inner.module.items" [0]
//@ is "$.index[?(@.name == 'm1')].inner.module.is_stripped" true
mod m1 {
pub fn f() {}

View file

@ -14,3 +14,20 @@ pub mod bar {
//~| ERROR: unresolved link
pub fn sql_function_proc() {}
}
// From here, this is a regression test for <https://github.com/rust-lang/rust/issues/151411>.
pub use fuzz_test_helpers::*;
/// A type referenced in the deprecation note.
pub struct Env;
impl Env {
pub fn try_invoke(&self) {}
}
mod fuzz_test_helpers {
#[deprecated(note = "use [Env::try_invoke] instead")]
//~^ ERROR: unresolved link
//~| ERROR: unresolved link
pub fn fuzz_catch_panic() {}
}

View file

@ -16,6 +16,18 @@ note: the lint level is defined here
LL | #![deny(rustdoc::broken_intra_doc_links)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
error: unresolved link to `Env::try_invoke`
--> $DIR/deprecated-note-from-reexported.rs:29:25
|
LL | #[deprecated(note = "use [Env::try_invoke] instead")]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= note: the link appears in this line:
use [Env::try_invoke] instead
^^^^^^^^^^^^^^^
= note: no item named `Env` in scope
error: unresolved link to `define_sql_function`
--> $DIR/deprecated-note-from-reexported.rs:12:25
|
@ -30,5 +42,18 @@ LL | #[deprecated(note = "Use [`define_sql_function`] instead")]
= help: to escape `[` and `]` characters, add '\' before them like `\[` or `\]`
= note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no`
error: aborting due to 2 previous errors
error: unresolved link to `Env::try_invoke`
--> $DIR/deprecated-note-from-reexported.rs:29:25
|
LL | #[deprecated(note = "use [Env::try_invoke] instead")]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= note: the link appears in this line:
use [Env::try_invoke] instead
^^^^^^^^^^^^^^^
= note: no item named `Env` in scope
= note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no`
error: aborting due to 4 previous errors

View file

@ -1,8 +1,6 @@
fn a() {
if let x = 1 && i = 2 {}
//~^ ERROR cannot find value `i` in this scope
//~| ERROR mismatched types
//~| ERROR expected expression, found `let` statement
//~^ ERROR expected expression, found `let` statement
}
fn b() {

View file

@ -15,13 +15,7 @@ LL | if let x = 1 && i == 2 {}
| +
error[E0425]: cannot find value `i` in this scope
--> $DIR/bad-if-let-suggestion.rs:2:21
|
LL | if let x = 1 && i = 2 {}
| ^ not found in this scope
error[E0425]: cannot find value `i` in this scope
--> $DIR/bad-if-let-suggestion.rs:9:9
--> $DIR/bad-if-let-suggestion.rs:7:9
|
LL | fn a() {
| ------ similarly named function `a` defined here
@ -36,7 +30,7 @@ LL + if (a + j) = i {}
|
error[E0425]: cannot find value `j` in this scope
--> $DIR/bad-if-let-suggestion.rs:9:13
--> $DIR/bad-if-let-suggestion.rs:7:13
|
LL | fn a() {
| ------ similarly named function `a` defined here
@ -51,7 +45,7 @@ LL + if (i + a) = i {}
|
error[E0425]: cannot find value `i` in this scope
--> $DIR/bad-if-let-suggestion.rs:9:18
--> $DIR/bad-if-let-suggestion.rs:7:18
|
LL | fn a() {
| ------ similarly named function `a` defined here
@ -66,7 +60,7 @@ LL + if (i + j) = a {}
|
error[E0425]: cannot find value `x` in this scope
--> $DIR/bad-if-let-suggestion.rs:16:8
--> $DIR/bad-if-let-suggestion.rs:14:8
|
LL | fn a() {
| ------ similarly named function `a` defined here
@ -80,18 +74,6 @@ LL - if x[0] = 1 {}
LL + if a[0] = 1 {}
|
error[E0308]: mismatched types
--> $DIR/bad-if-let-suggestion.rs:2:8
|
LL | if let x = 1 && i = 2 {}
| ^^^^^^^^^^^^^^^^^^ expected `bool`, found `()`
|
help: you might have meant to compare for equality
|
LL | if let x = 1 && i == 2 {}
| +
error: aborting due to 5 previous errors
error: aborting due to 7 previous errors
Some errors have detailed explanations: E0308, E0425.
For more information about an error, try `rustc --explain E0308`.
For more information about this error, try `rustc --explain E0425`.

View file

@ -0,0 +1,8 @@
//@ build-pass
//@ aux-crate: ambiguous_reachable_extern=ambiguous-reachable-extern.rs
#![allow(ambiguous_glob_imports)]
fn main() {
ambiguous_reachable_extern::generic::<u8>();
}

View file

@ -0,0 +1,23 @@
Future incompatibility report: Future breakage diagnostic:
warning: `generic` is ambiguous
--> $DIR/ambiguous-reachable.rs:7:33
|
LL | ambiguous_reachable_extern::generic::<u8>();
| ^^^^^^^ ambiguous name
|
= warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release!
= note: for more information, see issue #114095 <https://github.com/rust-lang/rust/issues/114095>
= note: ambiguous because of multiple glob imports of a name in the same module
note: `generic` could refer to the function defined here
--> $DIR/auxiliary/ambiguous-reachable-extern.rs:13:9
|
LL | pub use m1::*;
| ^^
= help: consider updating this dependency to resolve this error
= help: if updating the dependency does not resolve the problem report the problem to the author of the relevant crate
note: `generic` could also refer to the function defined here
--> $DIR/auxiliary/ambiguous-reachable-extern.rs:14:9
|
LL | pub use m2::*;
| ^^

View file

@ -0,0 +1,14 @@
mod m1 {
pub fn generic<T>() {
let x = 10;
let y = 11;
println!("hello {x} world {:?}", y);
}
}
mod m2 {
pub fn generic() {}
}
pub use m1::*;
pub use m2::*;

View file

@ -0,0 +1,6 @@
fn main() {
let x = Some(42);
if let Some(_) = x
&& Some(x) = x //~^ ERROR expected expression, found `let` statement
{}
}

View file

@ -0,0 +1,18 @@
error: expected expression, found `let` statement
--> $DIR/missing-let.rs:3:8
|
LL | if let Some(_) = x
| ^^^^^^^^^^^^^^^
|
= note: only supported directly in conditions of `if` and `while` expressions
help: you might have meant to continue the let-chain
|
LL | && let Some(x) = x
| +++
help: you might have meant to compare for equality
|
LL | && Some(x) == x
| +
error: aborting due to 1 previous error

View file

@ -0,0 +1,20 @@
// issue: https://github.com/rust-lang/rust/issues/146515
use std::rc::Rc;
#[derive(Clone)]
struct ContainsRc<T> {
value: Rc<T>,
}
fn clone_me<T>(x: &ContainsRc<T>) -> ContainsRc<T> {
//~^ NOTE expected `ContainsRc<T>` because of return type
x.clone()
//~^ ERROR mismatched types
//~| NOTE expected `ContainsRc<T>`, found `&ContainsRc<T>`
//~| NOTE expected struct `ContainsRc<_>`
//~| NOTE `ContainsRc<T>` does not implement `Clone`, so `&ContainsRc<T>` was cloned instead
//~| NOTE the trait `Clone` must be implemented
}
fn main() {}

View file

@ -0,0 +1,23 @@
error[E0308]: mismatched types
--> $DIR/derive-clone-already-present-issue-146515.rs:12:5
|
LL | fn clone_me<T>(x: &ContainsRc<T>) -> ContainsRc<T> {
| ------------- expected `ContainsRc<T>` because of return type
LL |
LL | x.clone()
| ^^^^^^^^^ expected `ContainsRc<T>`, found `&ContainsRc<T>`
|
= note: expected struct `ContainsRc<_>`
found reference `&ContainsRc<_>`
note: `ContainsRc<T>` does not implement `Clone`, so `&ContainsRc<T>` was cloned instead
--> $DIR/derive-clone-already-present-issue-146515.rs:12:5
|
LL | x.clone()
| ^
= help: `Clone` is not implemented because the trait bound `T: Clone` is not satisfied
note: the trait `Clone` must be implemented
--> $SRC_DIR/core/src/clone.rs:LL:COL
error: aborting due to 1 previous error
For more information about this error, try `rustc --explain E0308`.