Rollup merge of #151711 - folkertdev:stdarch-sync-2026-01-26, r=folkertdev

stdarch subtree update

Subtree update of `stdarch` to 9ba0a3f392.

Created using https://github.com/rust-lang/josh-sync.

r? @ghost
This commit is contained in:
Jonathan Brouwer 2026-01-27 17:00:54 +01:00 committed by GitHub
commit 7d11720fd3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 7214 additions and 6731 deletions

View file

@ -182,6 +182,7 @@
#![feature(staged_api)]
#![feature(stmt_expr_attributes)]
#![feature(strict_provenance_lints)]
#![feature(target_feature_inline_always)]
#![feature(trait_alias)]
#![feature(transparent_unions)]
#![feature(try_blocks)]

View file

@ -309,6 +309,7 @@
#![feature(staged_api)]
#![feature(stmt_expr_attributes)]
#![feature(strict_provenance_lints)]
#![feature(target_feature_inline_always)]
#![feature(thread_local)]
#![feature(try_blocks)]
#![feature(try_trait_v2)]

View file

@ -40,7 +40,7 @@ case ${TARGET} in
export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
;;
armv7-*eabihf | thumbv7-*eabihf)
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon,+fp16"
;;
amdgcn-*)
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-cpu=gfx1200"

View file

@ -1,3 +0,0 @@
ignore = [
"src/simd.rs",
]

File diff suppressed because it is too large Load diff

View file

@ -13,194 +13,195 @@ use crate::core_arch::aarch64::*;
use crate::core_arch::simd::*;
use std::mem;
use stdarch_test::simd_test;
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s8() {
fn test_vld1_s8() {
let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr()));
let r = unsafe { i8x8::from(vld1_s8(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s8() {
fn test_vld1q_s8() {
let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr()));
let r = unsafe { i8x16::from(vld1q_s8(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s16() {
fn test_vld1_s16() {
let a: [i16; 5] = [0, 1, 2, 3, 4];
let e = i16x4::new(1, 2, 3, 4);
let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr()));
let r = unsafe { i16x4::from(vld1_s16(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s16() {
fn test_vld1q_s16() {
let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr()));
let r = unsafe { i16x8::from(vld1q_s16(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s32() {
fn test_vld1_s32() {
let a: [i32; 3] = [0, 1, 2];
let e = i32x2::new(1, 2);
let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr()));
let r = unsafe { i32x2::from(vld1_s32(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s32() {
fn test_vld1q_s32() {
let a: [i32; 5] = [0, 1, 2, 3, 4];
let e = i32x4::new(1, 2, 3, 4);
let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr()));
let r = unsafe { i32x4::from(vld1q_s32(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s64() {
fn test_vld1_s64() {
let a: [i64; 2] = [0, 1];
let e = i64x1::new(1);
let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr()));
let r = unsafe { i64x1::from(vld1_s64(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s64() {
fn test_vld1q_s64() {
let a: [i64; 3] = [0, 1, 2];
let e = i64x2::new(1, 2);
let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr()));
let r = unsafe { i64x2::from(vld1q_s64(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u8() {
fn test_vld1_u8() {
let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr()));
let r = unsafe { u8x8::from(vld1_u8(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u8() {
fn test_vld1q_u8() {
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr()));
let r = unsafe { u8x16::from(vld1q_u8(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u16() {
fn test_vld1_u16() {
let a: [u16; 5] = [0, 1, 2, 3, 4];
let e = u16x4::new(1, 2, 3, 4);
let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr()));
let r = unsafe { u16x4::from(vld1_u16(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u16() {
fn test_vld1q_u16() {
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr()));
let r = unsafe { u16x8::from(vld1q_u16(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u32() {
fn test_vld1_u32() {
let a: [u32; 3] = [0, 1, 2];
let e = u32x2::new(1, 2);
let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr()));
let r = unsafe { u32x2::from(vld1_u32(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u32() {
fn test_vld1q_u32() {
let a: [u32; 5] = [0, 1, 2, 3, 4];
let e = u32x4::new(1, 2, 3, 4);
let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr()));
let r = unsafe { u32x4::from(vld1q_u32(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u64() {
fn test_vld1_u64() {
let a: [u64; 2] = [0, 1];
let e = u64x1::new(1);
let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr()));
let r = unsafe { u64x1::from(vld1_u64(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u64() {
fn test_vld1q_u64() {
let a: [u64; 3] = [0, 1, 2];
let e = u64x2::new(1, 2);
let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr()));
let r = unsafe { u64x2::from(vld1q_u64(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_p8() {
fn test_vld1_p8() {
let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr()));
let r = unsafe { u8x8::from(vld1_p8(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_p8() {
fn test_vld1q_p8() {
let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr()));
let r = unsafe { u8x16::from(vld1q_p8(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_p16() {
fn test_vld1_p16() {
let a: [p16; 5] = [0, 1, 2, 3, 4];
let e = u16x4::new(1, 2, 3, 4);
let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr()));
let r = unsafe { u16x4::from(vld1_p16(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_p16() {
fn test_vld1q_p16() {
let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr()));
let r = unsafe { u16x8::from(vld1q_p16(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vld1_p64() {
fn test_vld1_p64() {
let a: [p64; 2] = [0, 1];
let e = u64x1::new(1);
let r: u64x1 = transmute(vld1_p64(a[1..].as_ptr()));
let r = unsafe { u64x1::from(vld1_p64(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vld1q_p64() {
fn test_vld1q_p64() {
let a: [p64; 3] = [0, 1, 2];
let e = u64x2::new(1, 2);
let r: u64x2 = transmute(vld1q_p64(a[1..].as_ptr()));
let r = unsafe { u64x2::from(vld1q_p64(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_f32() {
fn test_vld1_f32() {
let a: [f32; 3] = [0., 1., 2.];
let e = f32x2::new(1., 2.);
let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr()));
let r = unsafe { f32x2::from(vld1_f32(a[1..].as_ptr())) };
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_f32() {
fn test_vld1q_f32() {
let a: [f32; 5] = [0., 1., 2., 3., 4.];
let e = f32x4::new(1., 2., 3., 4.);
let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr()));
let r = unsafe { f32x4::from(vld1q_f32(a[1..].as_ptr())) };
assert_eq!(r, e)
}

File diff suppressed because it is too large Load diff

View file

@ -14,11 +14,13 @@ use crate::core_arch::simd::*;
use stdarch_test::simd_test;
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s8() {
fn test_vst1_s8() {
let mut vals = [0_i8; 9];
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1_s8(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_s8(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -32,11 +34,13 @@ unsafe fn test_vst1_s8() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s8() {
fn test_vst1q_s8() {
let mut vals = [0_i8; 17];
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
vst1q_s8(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_s8(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -58,11 +62,13 @@ unsafe fn test_vst1q_s8() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s16() {
fn test_vst1_s16() {
let mut vals = [0_i16; 5];
let a = i16x4::new(1, 2, 3, 4);
vst1_s16(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_s16(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -72,11 +78,13 @@ unsafe fn test_vst1_s16() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s16() {
fn test_vst1q_s16() {
let mut vals = [0_i16; 9];
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1q_s16(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_s16(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -90,11 +98,13 @@ unsafe fn test_vst1q_s16() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s32() {
fn test_vst1_s32() {
let mut vals = [0_i32; 3];
let a = i32x2::new(1, 2);
vst1_s32(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_s32(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -102,11 +112,13 @@ unsafe fn test_vst1_s32() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s32() {
fn test_vst1q_s32() {
let mut vals = [0_i32; 5];
let a = i32x4::new(1, 2, 3, 4);
vst1q_s32(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_s32(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -116,22 +128,26 @@ unsafe fn test_vst1q_s32() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s64() {
fn test_vst1_s64() {
let mut vals = [0_i64; 2];
let a = i64x1::new(1);
vst1_s64(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_s64(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s64() {
fn test_vst1q_s64() {
let mut vals = [0_i64; 3];
let a = i64x2::new(1, 2);
vst1q_s64(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_s64(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -139,11 +155,13 @@ unsafe fn test_vst1q_s64() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u8() {
fn test_vst1_u8() {
let mut vals = [0_u8; 9];
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1_u8(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_u8(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -157,11 +175,13 @@ unsafe fn test_vst1_u8() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u8() {
fn test_vst1q_u8() {
let mut vals = [0_u8; 17];
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
vst1q_u8(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_u8(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -183,11 +203,13 @@ unsafe fn test_vst1q_u8() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u16() {
fn test_vst1_u16() {
let mut vals = [0_u16; 5];
let a = u16x4::new(1, 2, 3, 4);
vst1_u16(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_u16(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -197,11 +219,13 @@ unsafe fn test_vst1_u16() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u16() {
fn test_vst1q_u16() {
let mut vals = [0_u16; 9];
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1q_u16(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_u16(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -215,11 +239,13 @@ unsafe fn test_vst1q_u16() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u32() {
fn test_vst1_u32() {
let mut vals = [0_u32; 3];
let a = u32x2::new(1, 2);
vst1_u32(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_u32(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -227,11 +253,13 @@ unsafe fn test_vst1_u32() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u32() {
fn test_vst1q_u32() {
let mut vals = [0_u32; 5];
let a = u32x4::new(1, 2, 3, 4);
vst1q_u32(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_u32(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -241,22 +269,26 @@ unsafe fn test_vst1q_u32() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u64() {
fn test_vst1_u64() {
let mut vals = [0_u64; 2];
let a = u64x1::new(1);
vst1_u64(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_u64(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u64() {
fn test_vst1q_u64() {
let mut vals = [0_u64; 3];
let a = u64x2::new(1, 2);
vst1q_u64(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_u64(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -264,11 +296,13 @@ unsafe fn test_vst1q_u64() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p8() {
fn test_vst1_p8() {
let mut vals = [0_u8; 9];
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1_p8(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_p8(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -282,11 +316,13 @@ unsafe fn test_vst1_p8() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p8() {
fn test_vst1q_p8() {
let mut vals = [0_u8; 17];
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
vst1q_p8(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_p8(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -308,11 +344,13 @@ unsafe fn test_vst1q_p8() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p16() {
fn test_vst1_p16() {
let mut vals = [0_u16; 5];
let a = u16x4::new(1, 2, 3, 4);
vst1_p16(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_p16(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -322,11 +360,13 @@ unsafe fn test_vst1_p16() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p16() {
fn test_vst1q_p16() {
let mut vals = [0_u16; 9];
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1q_p16(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_p16(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -340,22 +380,26 @@ unsafe fn test_vst1q_p16() {
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vst1_p64() {
fn test_vst1_p64() {
let mut vals = [0_u64; 2];
let a = u64x1::new(1);
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_p64(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vst1q_p64() {
fn test_vst1q_p64() {
let mut vals = [0_u64; 3];
let a = u64x2::new(1, 2);
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_p64(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
@ -363,11 +407,13 @@ unsafe fn test_vst1q_p64() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_f32() {
fn test_vst1_f32() {
let mut vals = [0_f32; 3];
let a = f32x2::new(1., 2.);
vst1_f32(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1_f32(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0.);
assert_eq!(vals[1], 1.);
@ -375,11 +421,13 @@ unsafe fn test_vst1_f32() {
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_f32() {
fn test_vst1q_f32() {
let mut vals = [0_f32; 5];
let a = f32x4::new(1., 2., 3., 4.);
vst1q_f32(vals[1..].as_mut_ptr(), transmute(a));
unsafe {
vst1q_f32(vals[1..].as_mut_ptr(), a.into());
}
assert_eq!(vals[0], 0.);
assert_eq!(vals[1], 1.);

View file

@ -21,19 +21,19 @@ macro_rules! test_vtbl {
) => {
#[cfg(target_endian = "little")]
#[simd_test(enable = "neon")]
unsafe fn $test_name() {
fn $test_name() {
// create table as array, and transmute it to
// arm's table type
let table: $table_t = mem::transmute([$($table_v),*]);
let table: $table_t = unsafe { mem::transmute([$($table_v),*]) };
// For each control vector, perform a table lookup and
// verify the result:
$(
{
let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]);
let result = $fn_id(table, mem::transmute(ctrl));
let result: $ctrl_t = mem::transmute(result);
let expected: $ctrl_t = mem::transmute([$($exp_v),*]);
let ctrl: $ctrl_t = unsafe { mem::transmute([$($ctrl_v),*]) };
let result = $fn_id(table, unsafe { mem::transmute(ctrl) });
let result: $ctrl_t = unsafe { mem::transmute(result) };
let expected: $ctrl_t = unsafe { mem::transmute([$($exp_v),*]) };
assert_eq!(result, expected);
}
)*
@ -171,20 +171,19 @@ macro_rules! test_vtbx {
) => {
#[cfg(target_endian = "little")]
#[simd_test(enable = "neon")]
unsafe fn $test_name() {
fn $test_name() {
// create table as array, and transmute it to
// arm's table type
let table: $table_t = mem::transmute([$($table_v),*]);
let ext: $ext_t = mem::transmute([$($ext_v),*]);
let table: $table_t = unsafe { mem::transmute([$($table_v),*]) };
let ext: $ext_t = unsafe { mem::transmute([$($ext_v),*]) };
// For each control vector, perform a table lookup and
// verify the result:
$(
{
let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]);
let result = $fn_id(ext, table, mem::transmute(ctrl));
let result: $ctrl_t = mem::transmute(result);
let expected: $ctrl_t = mem::transmute([$($exp_v),*]);
let ctrl: $ctrl_t = unsafe { mem::transmute([$($ctrl_v),*]) };
let result = $fn_id(ext, table, unsafe { mem::transmute(ctrl) });
let result: $ctrl_t = unsafe { mem::transmute(result) };
let expected: $ctrl_t = unsafe { mem::transmute([$($exp_v),*]) };
assert_eq!(result, expected);
}
)*

View file

@ -111,13 +111,13 @@ macro_rules! V_f32 {
macro_rules! to64 {
($t : ident) => {
|v: $t| -> u64 { transmute(v) }
|v: $t| -> u64 { unsafe { transmute(v) } }
};
}
macro_rules! to128 {
($t : ident) => {
|v: $t| -> u128 { transmute(v) }
|v: $t| -> u128 { unsafe { transmute(v) } }
};
}
@ -158,9 +158,7 @@ pub(crate) fn test<T, U, V, W, X>(
macro_rules! gen_test_fn {
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
unsafe {
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
};
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun);
}
};
}

View file

@ -32,12 +32,14 @@
x86_amx_intrinsics,
f16,
aarch64_unstable_target_feature,
target_feature_inline_always,
bigint_helper_methods,
funnel_shifts,
avx10_target_feature,
const_trait_impl,
const_cmp,
const_eval_select
const_eval_select,
maybe_uninit_as_bytes
)]
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
#![deny(clippy::missing_inline_in_public_items)]
@ -87,4 +89,4 @@ pub mod arch {
}
#[allow(unused_imports)]
use core::{array, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};
use core::{array, cmp, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};

View file

@ -90,17 +90,10 @@ macro_rules! types {
pub struct $name($v [$elem_type; $len]);
impl $name {
/// Using `my_simd([x; N])` seemingly fails tests,
/// so use this internal helper for it instead.
/// Put the same value in every lane.
#[inline(always)]
$v fn splat(value: $elem_type) -> $name {
#[derive(Copy, Clone)]
#[repr(simd)]
struct JustOne([$elem_type; 1]);
let one = JustOne([value]);
// SAFETY: 0 is always in-bounds because we're shuffling
// a simd type with exactly one element.
unsafe { simd_shuffle!(one, one, [0; $len]) }
unsafe { $crate::intrinsics::simd::simd_splat(value) }
}
/// Returns an array reference containing the entire SIMD vector.
@ -135,6 +128,22 @@ macro_rules! types {
crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
}
}
$(#[$stability])+
impl crate::convert::From<crate::core_arch::simd::Simd<$elem_type, $len>> for $name {
#[inline(always)]
fn from(simd: crate::core_arch::simd::Simd<$elem_type, $len>) -> Self {
unsafe { crate::mem::transmute(simd) }
}
}
$(#[$stability])+
impl crate::convert::From<$name> for crate::core_arch::simd::Simd<$elem_type, $len> {
#[inline(always)]
fn from(simd: $name) -> Self {
unsafe { crate::mem::transmute(simd) }
}
}
)*);
}

View file

@ -23,29 +23,29 @@ unsafe extern "C" {
#[link_name = "llvm.nvvm.barrier0"]
fn syncthreads() -> ();
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
fn block_dim_x() -> i32;
fn block_dim_x() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
fn block_dim_y() -> i32;
fn block_dim_y() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
fn block_dim_z() -> i32;
fn block_dim_z() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
fn block_idx_x() -> i32;
fn block_idx_x() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
fn block_idx_y() -> i32;
fn block_idx_y() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
fn block_idx_z() -> i32;
fn block_idx_z() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
fn grid_dim_x() -> i32;
fn grid_dim_x() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
fn grid_dim_y() -> i32;
fn grid_dim_y() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
fn grid_dim_z() -> i32;
fn grid_dim_z() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
fn thread_idx_x() -> i32;
fn thread_idx_x() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
fn thread_idx_y() -> i32;
fn thread_idx_y() -> u32;
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
fn thread_idx_z() -> i32;
fn thread_idx_z() -> u32;
}
/// Synchronizes all threads in the block.
@ -58,84 +58,84 @@ pub unsafe fn _syncthreads() -> () {
/// x-th thread-block dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_dim_x() -> i32 {
pub unsafe fn _block_dim_x() -> u32 {
block_dim_x()
}
/// y-th thread-block dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_dim_y() -> i32 {
pub unsafe fn _block_dim_y() -> u32 {
block_dim_y()
}
/// z-th thread-block dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_dim_z() -> i32 {
pub unsafe fn _block_dim_z() -> u32 {
block_dim_z()
}
/// x-th thread-block index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_idx_x() -> i32 {
pub unsafe fn _block_idx_x() -> u32 {
block_idx_x()
}
/// y-th thread-block index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_idx_y() -> i32 {
pub unsafe fn _block_idx_y() -> u32 {
block_idx_y()
}
/// z-th thread-block index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _block_idx_z() -> i32 {
pub unsafe fn _block_idx_z() -> u32 {
block_idx_z()
}
/// x-th block-grid dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _grid_dim_x() -> i32 {
pub unsafe fn _grid_dim_x() -> u32 {
grid_dim_x()
}
/// y-th block-grid dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _grid_dim_y() -> i32 {
pub unsafe fn _grid_dim_y() -> u32 {
grid_dim_y()
}
/// z-th block-grid dimension.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _grid_dim_z() -> i32 {
pub unsafe fn _grid_dim_z() -> u32 {
grid_dim_z()
}
/// x-th thread index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _thread_idx_x() -> i32 {
pub unsafe fn _thread_idx_x() -> u32 {
thread_idx_x()
}
/// y-th thread index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _thread_idx_y() -> i32 {
pub unsafe fn _thread_idx_y() -> u32 {
thread_idx_y()
}
/// z-th thread index.
#[inline]
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
pub unsafe fn _thread_idx_z() -> i32 {
pub unsafe fn _thread_idx_z() -> u32 {
thread_idx_z()
}

View file

@ -364,17 +364,46 @@ unsafe extern "C" {
fn vrfin(a: vector_float) -> vector_float;
}
impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4 }
impl_neg! { i8x16 : 0 }
impl_neg! { i16x8 : 0 }
impl_neg! { i32x4 : 0 }
impl_neg! { f32x4 : 0f32 }
#[macro_use]
mod sealed {
use super::*;
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub trait VectorNeg {
unsafe fn vec_neg(self) -> Self;
}
macro_rules! impl_neg {
($($v:ty)*) => {
$(
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl VectorNeg for $v {
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_neg(self) -> Self {
simd_neg(self)
}
}
)*
}
}
impl_neg! {
vector_signed_char
vector_unsigned_char
vector_bool_char
vector_signed_short
vector_unsigned_short
vector_bool_short
vector_signed_int
vector_unsigned_int
vector_bool_int
vector_float
}
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub trait VectorInsert {
type Scalar;
@ -1380,7 +1409,7 @@ mod sealed {
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) {
v.vec_max(-v)
v.vec_max(simd_neg(v))
}
impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) }
@ -1428,7 +1457,7 @@ mod sealed {
#[cfg_attr(test, assert_instr(vspltb, IMM4 = 15))]
unsafe fn vspltb<const IMM4: u32>(a: vector_signed_char) -> vector_signed_char {
static_assert_uimm_bits!(IMM4, 4);
simd_shuffle(a, a, const { u32x16::from_array([IMM4; 16]) })
simd_shuffle(a, a, const { u32x16::splat(IMM4) })
}
#[inline]
@ -1436,7 +1465,7 @@ mod sealed {
#[cfg_attr(test, assert_instr(vsplth, IMM3 = 7))]
unsafe fn vsplth<const IMM3: u32>(a: vector_signed_short) -> vector_signed_short {
static_assert_uimm_bits!(IMM3, 3);
simd_shuffle(a, a, const { u32x8::from_array([IMM3; 8]) })
simd_shuffle(a, a, const { u32x8::splat(IMM3) })
}
#[inline]
@ -1445,7 +1474,7 @@ mod sealed {
#[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxspltw, IMM2 = 3))]
unsafe fn vspltw<const IMM2: u32>(a: vector_signed_int) -> vector_signed_int {
static_assert_uimm_bits!(IMM2, 2);
simd_shuffle(a, a, const { u32x4::from_array([IMM2; 4]) })
simd_shuffle(a, a, const { u32x4::splat(IMM2) })
}
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
@ -4032,6 +4061,14 @@ pub unsafe fn vec_mfvscr() -> vector_unsigned_short {
mfvscr()
}
/// Vector Negate
#[inline]
#[target_feature(enable = "altivec")]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
pub unsafe fn vec_neg<T: sealed::VectorNeg>(a: T) -> T {
a.vec_neg()
}
/// Vector add.
#[inline]
#[target_feature(enable = "altivec")]
@ -4703,7 +4740,7 @@ mod tests {
for off in 0..16 {
let val: u8x16 = transmute(vec_xl(0, (pat.as_ptr() as *const u8).offset(off)));
for i in 0..16 {
let v = val.extract(i);
let v = val.extract_dyn(i);
assert_eq!(off as usize + i, v as usize);
}
}
@ -4758,7 +4795,7 @@ mod tests {
)];
for off in 0..16 {
let v: u8x16 = transmute(vec_lde(off, pat.as_ptr() as *const u8));
assert_eq!(off as u8, v.extract(off as _));
assert_eq!(off as u8, v.extract_dyn(off as _));
}
}
@ -4767,7 +4804,7 @@ mod tests {
let pat = [u16x8::new(0, 1, 2, 3, 4, 5, 6, 7)];
for off in 0..8 {
let v: u16x8 = transmute(vec_lde(off * 2, pat.as_ptr() as *const u16));
assert_eq!(off as u16, v.extract(off as _));
assert_eq!(off as u16, v.extract_dyn(off as _));
}
}
@ -4776,7 +4813,7 @@ mod tests {
let pat = [u32x4::new(0, 1, 2, 3)];
for off in 0..4 {
let v: u32x4 = transmute(vec_lde(off * 4, pat.as_ptr() as *const u32));
assert_eq!(off as u32, v.extract(off as _));
assert_eq!(off as u32, v.extract_dyn(off as _));
}
}

View file

@ -274,40 +274,6 @@ macro_rules! t_b {
};
}
macro_rules! impl_from {
($s: ident) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl From<$s> for s_t_l!($s) {
#[inline]
fn from (v: $s) -> Self {
unsafe {
transmute(v)
}
}
}
};
($($s: ident),*) => {
$(
impl_from! { $s }
)*
};
}
macro_rules! impl_neg {
($s: ident : $zero: expr) => {
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl crate::ops::Neg for s_t_l!($s) {
type Output = s_t_l!($s);
#[inline]
fn neg(self) -> Self::Output {
unsafe { simd_neg(self) }
}
}
};
}
pub(crate) use impl_from;
pub(crate) use impl_neg;
pub(crate) use impl_vec_trait;
pub(crate) use s_t_l;
pub(crate) use t_b;

View file

@ -431,40 +431,6 @@ macro_rules! t_b {
};
}
macro_rules! impl_from {
($s: ident) => {
#[unstable(feature = "stdarch_s390x", issue = "135681")]
impl From<$s> for s_t_l!($s) {
#[inline]
fn from (v: $s) -> Self {
unsafe {
transmute(v)
}
}
}
};
($($s: ident),*) => {
$(
impl_from! { $s }
)*
};
}
macro_rules! impl_neg {
($s: ident : $zero: expr) => {
#[unstable(feature = "stdarch_s390x", issue = "135681")]
impl crate::ops::Neg for s_t_l!($s) {
type Output = s_t_l!($s);
#[inline]
fn neg(self) -> Self::Output {
unsafe { simd_neg(self) }
}
}
};
}
pub(crate) use impl_from;
pub(crate) use impl_neg;
pub(crate) use impl_vec_trait;
pub(crate) use l_t_t;
pub(crate) use s_t_l;

View file

@ -281,17 +281,14 @@ unsafe extern "unadjusted" {
#[link_name = "llvm.s390.vfenezbs"] fn vfenezbs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
#[link_name = "llvm.s390.vfenezhs"] fn vfenezhs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
#[link_name = "llvm.s390.vfenezfs"] fn vfenezfs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
#[link_name = "llvm.s390.vclfnhs"] fn vclfnhs(a: vector_signed_short, immarg: i32) -> vector_float;
#[link_name = "llvm.s390.vclfnls"] fn vclfnls(a: vector_signed_short, immarg: i32) -> vector_float;
#[link_name = "llvm.s390.vcfn"] fn vcfn(a: vector_signed_short, immarg: i32) -> vector_signed_short;
#[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short;
#[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short;
}
impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
impl_neg! { i8x16 : 0 }
impl_neg! { i16x8 : 0 }
impl_neg! { i32x4 : 0 }
impl_neg! { i64x2 : 0 }
impl_neg! { f32x4 : 0f32 }
impl_neg! { f64x2 : 0f64 }
#[repr(simd)]
struct ShuffleMask<const N: usize>([u32; N]);
@ -439,6 +436,43 @@ enum FindImm {
mod sealed {
use super::*;
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub trait VectorNeg {
unsafe fn vec_neg(self) -> Self;
}
macro_rules! impl_neg {
($($v:ty)*) => {
$(
#[unstable(feature = "stdarch_s390x", issue = "135681")]
impl VectorNeg for $v {
#[inline]
#[target_feature(enable = "vector")]
unsafe fn vec_neg(self) -> Self {
simd_neg(self)
}
}
)*
}
}
impl_neg! {
vector_signed_char
vector_unsigned_char
vector_signed_short
vector_unsigned_short
vector_signed_int
vector_unsigned_int
vector_signed_long_long
vector_unsigned_long_long
vector_float
vector_double
}
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub trait VectorAdd<Other> {
type Result;
@ -761,7 +795,7 @@ mod sealed {
#[inline]
#[target_feature(enable = "vector")]
unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) {
v.vec_max(-v)
v.vec_max(simd_neg(v))
}
impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) }
@ -4055,6 +4089,14 @@ unsafe fn __lcbb<const BLOCK_BOUNDARY: u16>(ptr: *const u8) -> u32 {
lcbb(ptr, const { validate_block_boundary(BLOCK_BOUNDARY) })
}
/// Vector Negate
#[inline]
#[target_feature(enable = "vector")]
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub unsafe fn vec_neg<T: sealed::VectorNeg>(a: T) -> T {
a.vec_neg()
}
/// Vector Add
#[inline]
#[target_feature(enable = "vector")]
@ -5875,6 +5917,74 @@ pub unsafe fn vec_promote<T: sealed::VectorPromote>(a: T::ElementType, b: i32) -
T::vec_promote(a, b)
}
/// Converts the left-most half of `a` to a vector of single-precision numbers.
/// The format of the source vector elements is specified by `B`.
#[inline]
#[target_feature(enable = "nnp-assist")]
#[cfg_attr(test, assert_instr(vclfnh, B = 0))]
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub unsafe fn vec_extend_to_fp32_hi<const B: i32>(a: vector_signed_short) -> vector_float {
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
static_assert_uimm_bits!(B, 4);
vclfnhs(a, B)
}
/// Converts the right-most half of `a` to a vector of single-precision numbers.
/// The format of the source vector elements is specified by `B`.
#[inline]
#[target_feature(enable = "nnp-assist")]
#[cfg_attr(test, assert_instr(vclfnl, B = 0))]
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub unsafe fn vec_extend_to_fp32_lo<const B: i32>(a: vector_signed_short) -> vector_float {
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
static_assert_uimm_bits!(B, 4);
vclfnls(a, B)
}
/// Converts the elements of vector `a` to the 16-bit IEEE floating point format.
/// The format of the source vector elements is specified by `B`.
#[inline]
#[target_feature(enable = "nnp-assist")]
#[cfg_attr(test, assert_instr(vcfn, B = 0))]
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub unsafe fn vec_convert_to_fp16<const B: i32>(a: vector_signed_short) -> vector_signed_short {
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
static_assert_uimm_bits!(B, 4);
vcfn(a, B)
}
/// Converts the elements of vector `a` to an internal floating point format.
/// The format of the target vector elements is specified by `B`.
#[inline]
#[target_feature(enable = "nnp-assist")]
#[cfg_attr(test, assert_instr(vcnf, B = 0))]
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub unsafe fn vec_convert_from_fp16<const B: i32>(a: vector_signed_short) -> vector_signed_short {
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
static_assert_uimm_bits!(B, 4);
vcnf(a, B)
}
/// Converts the elements of single-precision vectors `a` and `b` to an internal floating point
/// format with 16-bit sized elements. The format of the target vector elements is specified by `C`.
#[inline]
#[target_feature(enable = "nnp-assist")]
#[unstable(feature = "stdarch_s390x", issue = "135681")]
#[cfg_attr(test, assert_instr(vcrnf, C = 0))]
pub unsafe fn vec_round_from_fp32<const C: i32>(
a: vector_float,
b: vector_float,
) -> vector_signed_short {
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
static_assert_uimm_bits!(C, 4);
vcrnfs(a, b, C)
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -16,129 +16,216 @@ pub(crate) const unsafe fn simd_imin<T: Copy>(a: T, b: T) -> T {
crate::intrinsics::simd::simd_select(mask, a, b)
}
/// SAFETY: All bits patterns must be valid
pub(crate) unsafe trait SimdElement:
Copy + const PartialEq + crate::fmt::Debug
{
}
unsafe impl SimdElement for u8 {}
unsafe impl SimdElement for u16 {}
unsafe impl SimdElement for u32 {}
unsafe impl SimdElement for u64 {}
unsafe impl SimdElement for i8 {}
unsafe impl SimdElement for i16 {}
unsafe impl SimdElement for i32 {}
unsafe impl SimdElement for i64 {}
unsafe impl SimdElement for f16 {}
unsafe impl SimdElement for f32 {}
unsafe impl SimdElement for f64 {}
#[repr(simd)]
#[derive(Copy)]
pub(crate) struct Simd<T: SimdElement, const N: usize>([T; N]);
impl<T: SimdElement, const N: usize> Simd<T, N> {
/// A value of this type where all elements are zeroed out.
// SAFETY: `T` implements `SimdElement`, so it is zeroable.
pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
#[inline(always)]
pub(crate) const fn from_array(elements: [T; N]) -> Self {
Self(elements)
}
#[inline]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn splat(value: T) -> Self {
unsafe { crate::intrinsics::simd::simd_splat(value) }
}
/// Extract the element at position `index`. Note that `index` is not a constant so this
/// operation is not efficient on most platforms. Use for testing only.
#[inline]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn extract_dyn(&self, index: usize) -> T {
assert!(index < N);
// SAFETY: self is a vector, T its element type.
unsafe { crate::intrinsics::simd::simd_extract_dyn(*self, index as u32) }
}
#[inline]
pub(crate) const fn as_array(&self) -> &[T; N] {
let simd_ptr: *const Self = self;
let array_ptr: *const [T; N] = simd_ptr.cast();
// SAFETY: We can always read the prefix of a simd type as an array.
// There might be more padding afterwards for some widths, but
// that's not a problem for reading less than that.
unsafe { &*array_ptr }
}
}
// `#[derive(Clone)]` causes ICE "Projecting into SIMD type core_arch::simd::Simd is banned by MCP#838"
impl<T: SimdElement, const N: usize> Clone for Simd<T, N> {
#[inline]
fn clone(&self) -> Self {
*self
}
}
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
impl<T: SimdElement, const N: usize> const crate::cmp::PartialEq for Simd<T, N> {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_array() == other.as_array()
}
}
impl<T: SimdElement, const N: usize> crate::fmt::Debug for Simd<T, N> {
#[inline]
fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
debug_simd_finish(f, "Simd", self.as_array())
}
}
impl<const N: usize> Simd<f16, N> {
#[inline]
pub(crate) const fn to_bits(self) -> Simd<u16, N> {
assert!(size_of::<Self>() == size_of::<Simd<u16, N>>());
unsafe { crate::mem::transmute_copy(&self) }
}
#[inline]
pub(crate) const fn from_bits(bits: Simd<u16, N>) -> Self {
assert!(size_of::<Self>() == size_of::<Simd<u16, N>>());
unsafe { crate::mem::transmute_copy(&bits) }
}
}
impl<const N: usize> Simd<f32, N> {
#[inline]
pub(crate) const fn to_bits(self) -> Simd<u32, N> {
assert!(size_of::<Self>() == size_of::<Simd<u32, N>>());
unsafe { crate::mem::transmute_copy(&self) }
}
#[inline]
pub(crate) const fn from_bits(bits: Simd<u32, N>) -> Self {
assert!(size_of::<Self>() == size_of::<Simd<u32, N>>());
unsafe { crate::mem::transmute_copy(&bits) }
}
}
impl<const N: usize> Simd<f64, N> {
#[inline]
pub(crate) const fn to_bits(self) -> Simd<u64, N> {
assert!(size_of::<Self>() == size_of::<Simd<u64, N>>());
unsafe { crate::mem::transmute_copy(&self) }
}
#[inline]
pub(crate) const fn from_bits(bits: Simd<u64, N>) -> Self {
assert!(size_of::<Self>() == size_of::<Simd<u64, N>>());
unsafe { crate::mem::transmute_copy(&bits) }
}
}
macro_rules! simd_ty {
($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone)]
pub(crate) struct $id([$elem_type; $len]);
pub(crate) type $id = Simd<$elem_type, $len>;
#[allow(clippy::use_self)]
impl $id {
/// A value of this type where all elements are zeroed out.
pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
#[inline(always)]
pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
$id([$($param_name),*])
}
#[inline(always)]
pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self {
$id(elements)
}
// FIXME: Workaround rust@60637
#[inline(always)]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn splat(value: $elem_type) -> Self {
#[derive(Copy, Clone)]
#[repr(simd)]
struct JustOne([$elem_type; 1]);
let one = JustOne([value]);
// SAFETY: 0 is always in-bounds because we're shuffling
// a simd type with exactly one element.
unsafe { simd_shuffle!(one, one, [0; $len]) }
}
/// Extract the element at position `index`.
/// `index` is not a constant so this is not efficient!
/// Use for testing only.
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) const fn extract(&self, index: usize) -> $elem_type {
self.as_array()[index]
}
#[inline]
pub(crate) const fn as_array(&self) -> &[$elem_type; $len] {
let simd_ptr: *const Self = self;
let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
// SAFETY: We can always read the prefix of a simd type as an array.
// There might be more padding afterwards for some widths, but
// that's not a problem for reading less than that.
unsafe { &*array_ptr }
}
}
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
const impl core::cmp::PartialEq for $id {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_array() == other.as_array()
}
}
impl core::fmt::Debug for $id {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_simd_finish(f, stringify!($id), self.as_array())
Self([$($param_name),*])
}
}
}
}
#[repr(simd)]
#[derive(Copy)]
pub(crate) struct SimdM<T: SimdElement, const N: usize>([T; N]);
impl<T: SimdElement, const N: usize> SimdM<T, N> {
#[inline(always)]
const fn bool_to_internal(x: bool) -> T {
// SAFETY: `T` implements `SimdElement`, so all bit patterns are valid.
let zeros = const { unsafe { crate::mem::zeroed::<T>() } };
let ones = const {
// Ideally, this would be `transmute([0xFFu8; size_of::<T>()])`, but
// `size_of::<T>()` is not allowed to use a generic parameter there.
let mut r = crate::mem::MaybeUninit::<T>::uninit();
let mut i = 0;
while i < crate::mem::size_of::<T>() {
r.as_bytes_mut()[i] = crate::mem::MaybeUninit::new(0xFF);
i += 1;
}
unsafe { r.assume_init() }
};
[zeros, ones][x as usize]
}
#[inline]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn splat(value: bool) -> Self {
unsafe { crate::intrinsics::simd::simd_splat(value) }
}
#[inline]
pub(crate) const fn as_array(&self) -> &[T; N] {
let simd_ptr: *const Self = self;
let array_ptr: *const [T; N] = simd_ptr.cast();
// SAFETY: We can always read the prefix of a simd type as an array.
// There might be more padding afterwards for some widths, but
// that's not a problem for reading less than that.
unsafe { &*array_ptr }
}
}
// `#[derive(Clone)]` causes ICE "Projecting into SIMD type core_arch::simd::SimdM is banned by MCP#838"
impl<T: SimdElement, const N: usize> Clone for SimdM<T, N> {
#[inline]
fn clone(&self) -> Self {
*self
}
}
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
impl<T: SimdElement, const N: usize> const crate::cmp::PartialEq for SimdM<T, N> {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_array() == other.as_array()
}
}
impl<T: SimdElement, const N: usize> crate::fmt::Debug for SimdM<T, N> {
#[inline]
fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
debug_simd_finish(f, "SimdM", self.as_array())
}
}
macro_rules! simd_m_ty {
($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone)]
pub(crate) struct $id([$elem_type; $len]);
pub(crate) type $id = SimdM<$elem_type, $len>;
#[allow(clippy::use_self)]
impl $id {
#[inline(always)]
const fn bool_to_internal(x: bool) -> $elem_type {
[0 as $elem_type, !(0 as $elem_type)][x as usize]
}
#[inline(always)]
pub(crate) const fn new($($param_name: bool),*) -> Self {
$id([$(Self::bool_to_internal($param_name)),*])
}
// FIXME: Workaround rust@60637
#[inline(always)]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn splat(value: bool) -> Self {
#[derive(Copy, Clone)]
#[repr(simd)]
struct JustOne([$elem_type; 1]);
let one = JustOne([Self::bool_to_internal(value)]);
// SAFETY: 0 is always in-bounds because we're shuffling
// a simd type with exactly one element.
unsafe { simd_shuffle!(one, one, [0; $len]) }
}
#[inline]
pub(crate) const fn as_array(&self) -> &[$elem_type; $len] {
let simd_ptr: *const Self = self;
let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
// SAFETY: We can always read the prefix of a simd type as an array.
// There might be more padding afterwards for some widths, but
// that's not a problem for reading less than that.
unsafe { &*array_ptr }
}
}
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
const impl core::cmp::PartialEq for $id {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_array() == other.as_array()
}
}
impl core::fmt::Debug for $id {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_simd_finish(f, stringify!($id), self.as_array())
Self([$(Self::bool_to_internal($param_name)),*])
}
}
}

View file

@ -2746,7 +2746,7 @@ pub const fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_set1_pd(a: f64) -> __m256d {
_mm256_setr_pd(a, a, a, a)
f64x4::splat(a).as_m256d()
}
/// Broadcasts single-precision (32-bit) floating-point value `a` to all
@ -2759,7 +2759,7 @@ pub const fn _mm256_set1_pd(a: f64) -> __m256d {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_set1_ps(a: f32) -> __m256 {
_mm256_setr_ps(a, a, a, a, a, a, a, a)
f32x8::splat(a).as_m256()
}
/// Broadcasts 8-bit integer `a` to all elements of returned vector.
@ -2772,13 +2772,7 @@ pub const fn _mm256_set1_ps(a: f32) -> __m256 {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_set1_epi8(a: i8) -> __m256i {
#[rustfmt::skip]
_mm256_setr_epi8(
a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a,
a, a, a, a, a, a, a, a,
)
i8x32::splat(a).as_m256i()
}
/// Broadcasts 16-bit integer `a` to all elements of returned vector.
@ -2793,7 +2787,7 @@ pub const fn _mm256_set1_epi8(a: i8) -> __m256i {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_set1_epi16(a: i16) -> __m256i {
_mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
i16x16::splat(a).as_m256i()
}
/// Broadcasts 32-bit integer `a` to all elements of returned vector.
@ -2806,7 +2800,7 @@ pub const fn _mm256_set1_epi16(a: i16) -> __m256i {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_set1_epi32(a: i32) -> __m256i {
_mm256_setr_epi32(a, a, a, a, a, a, a, a)
i32x8::splat(a).as_m256i()
}
/// Broadcasts 64-bit integer `a` to all elements of returned vector.
@ -2821,7 +2815,7 @@ pub const fn _mm256_set1_epi32(a: i32) -> __m256i {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_set1_epi64x(a: i64) -> __m256i {
_mm256_setr_epi64x(a, a, a, a)
i64x4::splat(a).as_m256i()
}
/// Cast vector of type __m256d to type __m256.

View file

@ -932,7 +932,7 @@ pub const fn _mm_set_ss(a: f32) -> __m128 {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_set1_ps(a: f32) -> __m128 {
__m128([a, a, a, a])
f32x4::splat(a).as_m128()
}
/// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html)
@ -2079,7 +2079,7 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
#[cfg(test)]
mod tests {
use crate::core_arch::assert_eq_const as assert_eq;
use crate::{hint::black_box, mem::transmute, ptr};
use crate::{hint::black_box, ptr};
use std::boxed;
use stdarch_test::simd_test;
@ -2221,7 +2221,7 @@ mod tests {
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_min_ps() {
fn test_mm_min_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_min_ps(a, b);
@ -2234,10 +2234,10 @@ mod tests {
// `r1` to `a` and `r2` to `b`.
let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
let a: [u8; 16] = transmute(a);
let b: [u8; 16] = transmute(b);
let r1 = _mm_min_ps(a, b).as_f32x4().to_bits();
let r2 = _mm_min_ps(b, a).as_f32x4().to_bits();
let a = a.as_f32x4().to_bits();
let b = b.as_f32x4().to_bits();
assert_eq!(r1, b);
assert_eq!(r2, a);
assert_ne!(a, b); // sanity check that -0.0 is actually present
@ -2252,7 +2252,7 @@ mod tests {
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_max_ps() {
fn test_mm_max_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_max_ps(a, b);
@ -2261,67 +2261,67 @@ mod tests {
// Check SSE-specific semantics for -0.0 handling.
let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
let a: [u8; 16] = transmute(a);
let b: [u8; 16] = transmute(b);
let r1 = _mm_max_ps(a, b).as_f32x4().to_bits();
let r2 = _mm_max_ps(b, a).as_f32x4().to_bits();
let a = a.as_f32x4().to_bits();
let b = b.as_f32x4().to_bits();
assert_eq!(r1, b);
assert_eq!(r2, a);
assert_ne!(a, b); // sanity check that -0.0 is actually present
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_and_ps() {
let a = transmute(u32x4::splat(0b0011));
let b = transmute(u32x4::splat(0b0101));
const fn test_mm_and_ps() {
let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
let r = _mm_and_ps(*black_box(&a), *black_box(&b));
let e = transmute(u32x4::splat(0b0001));
let e = f32x4::from_bits(u32x4::splat(0b0001)).as_m128();
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_andnot_ps() {
let a = transmute(u32x4::splat(0b0011));
let b = transmute(u32x4::splat(0b0101));
const fn test_mm_andnot_ps() {
let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
let e = transmute(u32x4::splat(0b0100));
let e = f32x4::from_bits(u32x4::splat(0b0100)).as_m128();
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_or_ps() {
let a = transmute(u32x4::splat(0b0011));
let b = transmute(u32x4::splat(0b0101));
const fn test_mm_or_ps() {
let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
let r = _mm_or_ps(*black_box(&a), *black_box(&b));
let e = transmute(u32x4::splat(0b0111));
let e = f32x4::from_bits(u32x4::splat(0b0111)).as_m128();
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
const unsafe fn test_mm_xor_ps() {
let a = transmute(u32x4::splat(0b0011));
let b = transmute(u32x4::splat(0b0101));
const fn test_mm_xor_ps() {
let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
let e = transmute(u32x4::splat(0b0110));
let e = f32x4::from_bits(u32x4::splat(0b0110)).as_m128();
assert_eq_m128(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpeq_ss() {
fn test_mm_cmpeq_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
let r = _mm_cmpeq_ss(a, b).as_f32x4().to_bits();
let e = f32x4::new(f32::from_bits(0), 2.0, 3.0, 4.0).to_bits();
assert_eq!(r, e);
let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
let r2 = _mm_cmpeq_ss(a, b2).as_f32x4().to_bits();
let e2 = f32x4::new(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0).to_bits();
assert_eq!(r2, e2);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmplt_ss() {
fn test_mm_cmplt_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2331,21 +2331,21 @@ mod tests {
let c1 = 0u32; // a.extract(0) < c.extract(0)
let d1 = !0u32; // a.extract(0) < d.extract(0)
let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmplt_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmplt_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmplt_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmple_ss() {
fn test_mm_cmple_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2355,21 +2355,21 @@ mod tests {
let c1 = !0u32; // a.extract(0) <= c.extract(0)
let d1 = !0u32; // a.extract(0) <= d.extract(0)
let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmple_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmple_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmple_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpgt_ss() {
fn test_mm_cmpgt_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2379,21 +2379,21 @@ mod tests {
let c1 = 0u32; // a.extract(0) > c.extract(0)
let d1 = 0u32; // a.extract(0) > d.extract(0)
let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmpgt_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmpgt_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmpgt_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpge_ss() {
fn test_mm_cmpge_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2403,21 +2403,21 @@ mod tests {
let c1 = !0u32; // a.extract(0) >= c.extract(0)
let d1 = 0u32; // a.extract(0) >= d.extract(0)
let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmpge_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmpge_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmpge_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpneq_ss() {
fn test_mm_cmpneq_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@ -2427,21 +2427,21 @@ mod tests {
let c1 = 0u32; // a.extract(0) != c.extract(0)
let d1 = !0u32; // a.extract(0) != d.extract(0)
let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmpneq_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmpneq_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmpneq_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnlt_ss() {
fn test_mm_cmpnlt_ss() {
// TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there
// must be a difference. It may have to do with behavior in the
// presence of NaNs (signaling or quiet). If so, we should add tests
@ -2456,21 +2456,21 @@ mod tests {
let c1 = !0u32; // a.extract(0) >= c.extract(0)
let d1 = 0u32; // a.extract(0) >= d.extract(0)
let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmpnlt_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmpnlt_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmpnlt_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnle_ss() {
fn test_mm_cmpnle_ss() {
// TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there
// must be a difference. It may have to do with behavior in the
// presence
@ -2485,21 +2485,21 @@ mod tests {
let c1 = 0u32; // a.extract(0) > c.extract(0)
let d1 = 0u32; // a.extract(0) > d.extract(0)
let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmpnle_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmpnle_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmpnle_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpngt_ss() {
fn test_mm_cmpngt_ss() {
// TODO: this test is exactly the same as for `_mm_cmple_ss`, but there
// must be a difference. It may have to do with behavior in the
// presence of NaNs (signaling or quiet). If so, we should add tests
@ -2514,21 +2514,21 @@ mod tests {
let c1 = !0u32; // a.extract(0) <= c.extract(0)
let d1 = !0u32; // a.extract(0) <= d.extract(0)
let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmpngt_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmpngt_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmpngt_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnge_ss() {
fn test_mm_cmpnge_ss() {
// TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there
// must be a difference. It may have to do with behavior in the
// presence of NaNs (signaling or quiet). If so, we should add tests
@ -2543,21 +2543,21 @@ mod tests {
let c1 = 0u32; // a.extract(0) < c.extract(0)
let d1 = !0u32; // a.extract(0) < d.extract(0)
let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmpnge_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmpnge_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmpnge_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpord_ss() {
fn test_mm_cmpord_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
@ -2567,21 +2567,21 @@ mod tests {
let c1 = 0u32; // a.extract(0) ord c.extract(0)
let d1 = !0u32; // a.extract(0) ord d.extract(0)
let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmpord_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmpord_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmpord_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpunord_ss() {
fn test_mm_cmpunord_ss() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
@ -2591,160 +2591,160 @@ mod tests {
let c1 = !0u32; // a.extract(0) unord c.extract(0)
let d1 = 0u32; // a.extract(0) unord d.extract(0)
let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
let rb = _mm_cmpunord_ss(a, b).as_f32x4().to_bits();
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rb, eb);
let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
let rc = _mm_cmpunord_ss(a, c).as_f32x4().to_bits();
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rc, ec);
let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
let rd = _mm_cmpunord_ss(a, d).as_f32x4().to_bits();
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
assert_eq!(rd, ed);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpeq_ps() {
fn test_mm_cmpeq_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, fls, tru, fls);
let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
let r = _mm_cmpeq_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmplt_ps() {
fn test_mm_cmplt_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, fls, fls);
let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
let r = _mm_cmplt_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmple_ps() {
fn test_mm_cmple_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, tru, fls);
let r: u32x4 = transmute(_mm_cmple_ps(a, b));
let r = _mm_cmple_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpgt_ps() {
fn test_mm_cmpgt_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, fls, fls);
let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
let r = _mm_cmpgt_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpge_ps() {
fn test_mm_cmpge_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, tru, fls);
let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
let r = _mm_cmpge_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpneq_ps() {
fn test_mm_cmpneq_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, tru, fls, tru);
let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
let r = _mm_cmpneq_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnlt_ps() {
fn test_mm_cmpnlt_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, tru, tru);
let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
let r = _mm_cmpnlt_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnle_ps() {
fn test_mm_cmpnle_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, fls, tru);
let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
let r = _mm_cmpnle_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpngt_ps() {
fn test_mm_cmpngt_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, tru, tru);
let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
let r = _mm_cmpngt_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpnge_ps() {
fn test_mm_cmpnge_ps() {
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, fls, tru);
let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
let r = _mm_cmpnge_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpord_ps() {
fn test_mm_cmpord_ps() {
let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(tru, fls, fls, fls);
let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
let r = _mm_cmpord_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_cmpunord_ps() {
fn test_mm_cmpunord_ps() {
let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
let tru = !0u32;
let fls = 0u32;
let e = u32x4::new(fls, tru, tru, tru);
let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
let r = _mm_cmpunord_ps(a, b).as_f32x4().to_bits();
assert_eq!(r, e);
}

View file

@ -1176,7 +1176,7 @@ pub const fn _mm_set_epi8(
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
_mm_set_epi64x(a, a)
i64x2::splat(a).as_m128i()
}
/// Broadcasts 32-bit integer `a` to all elements.
@ -1188,7 +1188,7 @@ pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_set1_epi32(a: i32) -> __m128i {
_mm_set_epi32(a, a, a, a)
i32x4::splat(a).as_m128i()
}
/// Broadcasts 16-bit integer `a` to all elements.
@ -1200,7 +1200,7 @@ pub const fn _mm_set1_epi32(a: i32) -> __m128i {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_set1_epi16(a: i16) -> __m128i {
_mm_set_epi16(a, a, a, a, a, a, a, a)
i16x8::splat(a).as_m128i()
}
/// Broadcasts 8-bit integer `a` to all elements.
@ -1212,7 +1212,7 @@ pub const fn _mm_set1_epi16(a: i16) -> __m128i {
#[stable(feature = "simd_x86", since = "1.27.0")]
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_set1_epi8(a: i8) -> __m128i {
_mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
i8x16::splat(a).as_m128i()
}
/// Sets packed 32-bit integers with the supplied values in reverse order.
@ -3280,11 +3280,7 @@ mod tests {
core_arch::{simd::*, x86::*},
hint::black_box,
};
use std::{
boxed, f32, f64,
mem::{self, transmute},
ptr,
};
use std::{boxed, f32, f64, mem, ptr};
use stdarch_test::simd_test;
const NAN: f64 = f64::NAN;
@ -4593,38 +4589,38 @@ mod tests {
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_and_pd() {
let a = transmute(u64x2::splat(5));
let b = transmute(u64x2::splat(3));
const fn test_mm_and_pd() {
let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
let r = _mm_and_pd(a, b);
let e = transmute(u64x2::splat(1));
let e = f64x2::from_bits(u64x2::splat(1)).as_m128d();
assert_eq_m128d(r, e);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_andnot_pd() {
let a = transmute(u64x2::splat(5));
let b = transmute(u64x2::splat(3));
const fn test_mm_andnot_pd() {
let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
let r = _mm_andnot_pd(a, b);
let e = transmute(u64x2::splat(2));
let e = f64x2::from_bits(u64x2::splat(2)).as_m128d();
assert_eq_m128d(r, e);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_or_pd() {
let a = transmute(u64x2::splat(5));
let b = transmute(u64x2::splat(3));
const fn test_mm_or_pd() {
let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
let r = _mm_or_pd(a, b);
let e = transmute(u64x2::splat(7));
let e = f64x2::from_bits(u64x2::splat(7)).as_m128d();
assert_eq_m128d(r, e);
}
#[simd_test(enable = "sse2")]
const unsafe fn test_mm_xor_pd() {
let a = transmute(u64x2::splat(5));
let b = transmute(u64x2::splat(3));
const fn test_mm_xor_pd() {
let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
let r = _mm_xor_pd(a, b);
let e = transmute(u64x2::splat(6));
let e = f64x2::from_bits(u64x2::splat(6)).as_m128d();
assert_eq_m128d(r, e);
}

View file

@ -78,38 +78,45 @@ pub(crate) const fn assert_eq_m512h(a: __m512h, b: __m512h) {
}
#[target_feature(enable = "sse2")]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn get_m128d(a: __m128d, idx: usize) -> f64 {
a.as_f64x2().extract(idx)
a.as_f64x2().extract_dyn(idx)
}
#[target_feature(enable = "sse")]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn get_m128(a: __m128, idx: usize) -> f32 {
a.as_f32x4().extract(idx)
a.as_f32x4().extract_dyn(idx)
}
#[target_feature(enable = "avx")]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn get_m256d(a: __m256d, idx: usize) -> f64 {
a.as_f64x4().extract(idx)
a.as_f64x4().extract_dyn(idx)
}
#[target_feature(enable = "avx")]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn get_m256(a: __m256, idx: usize) -> f32 {
a.as_f32x8().extract(idx)
a.as_f32x8().extract_dyn(idx)
}
#[target_feature(enable = "avx512f")]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn get_m512(a: __m512, idx: usize) -> f32 {
a.as_f32x16().extract(idx)
a.as_f32x16().extract_dyn(idx)
}
#[target_feature(enable = "avx512f")]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn get_m512d(a: __m512d, idx: usize) -> f64 {
a.as_f64x8().extract(idx)
a.as_f64x8().extract_dyn(idx)
}
#[target_feature(enable = "avx512f")]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const fn get_m512i(a: __m512i, idx: usize) -> i64 {
a.as_i64x8().extract(idx)
a.as_i64x8().extract_dyn(idx)
}
// not actually an intrinsic but useful in various tests as we ported from

View file

@ -59,6 +59,3 @@ vluti4q_laneq_u8
# Broken in Clang
vcvth_s16_f16
# FIXME: Broken output due to missing f16 printing support in Rust, see git blame for this line
vmulh_lane_f16
vmulh_laneq_f16

View file

@ -100,6 +100,3 @@ vluti4q_laneq_u8
# Broken in Clang
vcvth_s16_f16
# FIXME: Broken output due to missing f16 printing support in Rust
vmulh_lane_f16
vmulh_laneq_f16

View file

@ -17,6 +17,15 @@ pub fn simd_test(
item: proc_macro::TokenStream,
) -> proc_macro::TokenStream {
let tokens = TokenStream::from(attr).into_iter().collect::<Vec<_>>();
let target = env::var("TARGET").expect(
"TARGET environment variable should be set for rustc (e.g. TARGET=x86_64-apple-darwin cargo test)"
);
let target_arch = target
.split('-')
.next()
.unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"));
let (target_features, target_feature_attr) = match &tokens[..] {
[] => (Vec::new(), TokenStream::new()),
[
@ -24,13 +33,20 @@ pub fn simd_test(
TokenTree::Punct(equals),
TokenTree::Literal(literal),
] if enable == "enable" && equals.as_char() == '=' => {
let enable_feature = literal.to_string();
let enable_feature = enable_feature.trim_start_matches('"').trim_end_matches('"');
let mut enable_feature = literal
.to_string()
.trim_start_matches('"')
.trim_end_matches('"')
.to_string();
let target_features: Vec<_> = enable_feature
.replace('+', "")
.split(',')
.map(String::from)
.collect();
// Allows using `#[simd_test(enable = "neon")]` on aarch64/armv7 shared tests.
if target_arch == "armv7" && target_features.iter().any(|feat| feat == "neon") {
enable_feature.push_str(",v7");
}
(
target_features,
@ -46,14 +62,7 @@ pub fn simd_test(
let item_attrs = std::mem::take(&mut item.attrs);
let name = &item.sig.ident;
let target = env::var("TARGET").expect(
"TARGET environment variable should be set for rustc (e.g. TARGET=x86_64-apple-darwin cargo test)"
);
let macro_test = match target
.split('-')
.next()
.unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"))
{
let macro_test = match target_arch {
"i686" | "x86_64" | "i586" => "is_x86_feature_detected",
"arm" | "armv7" | "thumbv7neon" => "is_arm_feature_detected",
"aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected",
@ -85,10 +94,20 @@ pub fn simd_test(
let mut detect_missing_features = TokenStream::new();
for feature in target_features {
let q = quote_spanned! {
proc_macro2::Span::call_site() =>
if !::std::arch::#macro_test!(#feature) {
missing_features.push(#feature);
let q = if target_arch == "armv7" && feature == "fp16" {
// "fp16" cannot be checked at runtime
quote_spanned! {
proc_macro2::Span::call_site() =>
if !cfg!(target_feature = #feature) {
missing_features.push(#feature);
}
}
} else {
quote_spanned! {
proc_macro2::Span::call_site() =>
if !::std::arch::#macro_test!(#feature) {
missing_features.push(#feature);
}
}
};
q.to_tokens(&mut detect_missing_features);

View file

@ -63,8 +63,8 @@ neon-unstable-f16: &neon-unstable-f16
neon-unstable-feat-lut: &neon-unstable-feat-lut
FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']]
aarch64-unstable-jscvt: &aarch64-unstable-jscvt
FnCall: [unstable, ['feature = "stdarch_aarch64_jscvt"', 'issue = "147555"']]
aarch64-stable-jscvt: &aarch64-stable-jscvt
FnCall: [stable, ['feature = "stdarch_aarch64_jscvt"', 'since = "CURRENT_RUSTC_VERSION"']]
# #[cfg(target_endian = "little")]
little-endian: &little-endian
@ -14275,7 +14275,7 @@ intrinsics:
attr:
- FnCall: [target_feature, ['enable = "jsconv"']]
- FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["fjcvtzs"]] }]]
- *aarch64-unstable-jscvt
- *aarch64-stable-jscvt
safety: safe
types:
- f64

View file

@ -1736,7 +1736,7 @@ fn create_tokens(intrinsic: &Intrinsic, endianness: Endianness, tokens: &mut Tok
);
}
tokens.append_all(quote! { #[inline] });
tokens.append_all(quote! { #[inline(always)] });
match endianness {
Endianness::Little => tokens.append_all(quote! { #[cfg(target_endian = "little")] }),

View file

@ -1 +1 @@
48622726c4a91c87bf6cd4dbe1000c95df59906e
873d4682c7d285540b8f28bfe637006cef8918a6