Rollup merge of #151711 - folkertdev:stdarch-sync-2026-01-26, r=folkertdev
stdarch subtree update
Subtree update of `stdarch` to 9ba0a3f392.
Created using https://github.com/rust-lang/josh-sync.
r? @ghost
This commit is contained in:
commit
7d11720fd3
29 changed files with 7214 additions and 6731 deletions
|
|
@ -182,6 +182,7 @@
|
|||
#![feature(staged_api)]
|
||||
#![feature(stmt_expr_attributes)]
|
||||
#![feature(strict_provenance_lints)]
|
||||
#![feature(target_feature_inline_always)]
|
||||
#![feature(trait_alias)]
|
||||
#![feature(transparent_unions)]
|
||||
#![feature(try_blocks)]
|
||||
|
|
|
|||
|
|
@ -309,6 +309,7 @@
|
|||
#![feature(staged_api)]
|
||||
#![feature(stmt_expr_attributes)]
|
||||
#![feature(strict_provenance_lints)]
|
||||
#![feature(target_feature_inline_always)]
|
||||
#![feature(thread_local)]
|
||||
#![feature(try_blocks)]
|
||||
#![feature(try_trait_v2)]
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ case ${TARGET} in
|
|||
export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
|
||||
;;
|
||||
armv7-*eabihf | thumbv7-*eabihf)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon,+fp16"
|
||||
;;
|
||||
amdgcn-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-cpu=gfx1200"
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
ignore = [
|
||||
"src/simd.rs",
|
||||
]
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -13,194 +13,195 @@ use crate::core_arch::aarch64::*;
|
|||
use crate::core_arch::simd::*;
|
||||
use std::mem;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s8() {
|
||||
fn test_vld1_s8() {
|
||||
let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr()));
|
||||
let r = unsafe { i8x8::from(vld1_s8(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s8() {
|
||||
fn test_vld1q_s8() {
|
||||
let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr()));
|
||||
let r = unsafe { i8x16::from(vld1q_s8(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s16() {
|
||||
fn test_vld1_s16() {
|
||||
let a: [i16; 5] = [0, 1, 2, 3, 4];
|
||||
let e = i16x4::new(1, 2, 3, 4);
|
||||
let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr()));
|
||||
let r = unsafe { i16x4::from(vld1_s16(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s16() {
|
||||
fn test_vld1q_s16() {
|
||||
let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr()));
|
||||
let r = unsafe { i16x8::from(vld1q_s16(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s32() {
|
||||
fn test_vld1_s32() {
|
||||
let a: [i32; 3] = [0, 1, 2];
|
||||
let e = i32x2::new(1, 2);
|
||||
let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr()));
|
||||
let r = unsafe { i32x2::from(vld1_s32(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s32() {
|
||||
fn test_vld1q_s32() {
|
||||
let a: [i32; 5] = [0, 1, 2, 3, 4];
|
||||
let e = i32x4::new(1, 2, 3, 4);
|
||||
let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr()));
|
||||
let r = unsafe { i32x4::from(vld1q_s32(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s64() {
|
||||
fn test_vld1_s64() {
|
||||
let a: [i64; 2] = [0, 1];
|
||||
let e = i64x1::new(1);
|
||||
let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr()));
|
||||
let r = unsafe { i64x1::from(vld1_s64(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s64() {
|
||||
fn test_vld1q_s64() {
|
||||
let a: [i64; 3] = [0, 1, 2];
|
||||
let e = i64x2::new(1, 2);
|
||||
let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr()));
|
||||
let r = unsafe { i64x2::from(vld1q_s64(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u8() {
|
||||
fn test_vld1_u8() {
|
||||
let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr()));
|
||||
let r = unsafe { u8x8::from(vld1_u8(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u8() {
|
||||
fn test_vld1q_u8() {
|
||||
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr()));
|
||||
let r = unsafe { u8x16::from(vld1q_u8(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u16() {
|
||||
fn test_vld1_u16() {
|
||||
let a: [u16; 5] = [0, 1, 2, 3, 4];
|
||||
let e = u16x4::new(1, 2, 3, 4);
|
||||
let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr()));
|
||||
let r = unsafe { u16x4::from(vld1_u16(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u16() {
|
||||
fn test_vld1q_u16() {
|
||||
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr()));
|
||||
let r = unsafe { u16x8::from(vld1q_u16(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u32() {
|
||||
fn test_vld1_u32() {
|
||||
let a: [u32; 3] = [0, 1, 2];
|
||||
let e = u32x2::new(1, 2);
|
||||
let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr()));
|
||||
let r = unsafe { u32x2::from(vld1_u32(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u32() {
|
||||
fn test_vld1q_u32() {
|
||||
let a: [u32; 5] = [0, 1, 2, 3, 4];
|
||||
let e = u32x4::new(1, 2, 3, 4);
|
||||
let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr()));
|
||||
let r = unsafe { u32x4::from(vld1q_u32(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u64() {
|
||||
fn test_vld1_u64() {
|
||||
let a: [u64; 2] = [0, 1];
|
||||
let e = u64x1::new(1);
|
||||
let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr()));
|
||||
let r = unsafe { u64x1::from(vld1_u64(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u64() {
|
||||
fn test_vld1q_u64() {
|
||||
let a: [u64; 3] = [0, 1, 2];
|
||||
let e = u64x2::new(1, 2);
|
||||
let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr()));
|
||||
let r = unsafe { u64x2::from(vld1q_u64(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_p8() {
|
||||
fn test_vld1_p8() {
|
||||
let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr()));
|
||||
let r = unsafe { u8x8::from(vld1_p8(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_p8() {
|
||||
fn test_vld1q_p8() {
|
||||
let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr()));
|
||||
let r = unsafe { u8x16::from(vld1q_p8(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_p16() {
|
||||
fn test_vld1_p16() {
|
||||
let a: [p16; 5] = [0, 1, 2, 3, 4];
|
||||
let e = u16x4::new(1, 2, 3, 4);
|
||||
let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr()));
|
||||
let r = unsafe { u16x4::from(vld1_p16(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_p16() {
|
||||
fn test_vld1q_p16() {
|
||||
let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr()));
|
||||
let r = unsafe { u16x8::from(vld1q_p16(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vld1_p64() {
|
||||
fn test_vld1_p64() {
|
||||
let a: [p64; 2] = [0, 1];
|
||||
let e = u64x1::new(1);
|
||||
let r: u64x1 = transmute(vld1_p64(a[1..].as_ptr()));
|
||||
let r = unsafe { u64x1::from(vld1_p64(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vld1q_p64() {
|
||||
fn test_vld1q_p64() {
|
||||
let a: [p64; 3] = [0, 1, 2];
|
||||
let e = u64x2::new(1, 2);
|
||||
let r: u64x2 = transmute(vld1q_p64(a[1..].as_ptr()));
|
||||
let r = unsafe { u64x2::from(vld1q_p64(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_f32() {
|
||||
fn test_vld1_f32() {
|
||||
let a: [f32; 3] = [0., 1., 2.];
|
||||
let e = f32x2::new(1., 2.);
|
||||
let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr()));
|
||||
let r = unsafe { f32x2::from(vld1_f32(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_f32() {
|
||||
fn test_vld1q_f32() {
|
||||
let a: [f32; 5] = [0., 1., 2., 3., 4.];
|
||||
let e = f32x4::new(1., 2., 3., 4.);
|
||||
let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr()));
|
||||
let r = unsafe { f32x4::from(vld1q_f32(a[1..].as_ptr())) };
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -14,11 +14,13 @@ use crate::core_arch::simd::*;
|
|||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_s8() {
|
||||
fn test_vst1_s8() {
|
||||
let mut vals = [0_i8; 9];
|
||||
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1_s8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_s8(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -32,11 +34,13 @@ unsafe fn test_vst1_s8() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_s8() {
|
||||
fn test_vst1q_s8() {
|
||||
let mut vals = [0_i8; 17];
|
||||
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
||||
vst1q_s8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_s8(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -58,11 +62,13 @@ unsafe fn test_vst1q_s8() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_s16() {
|
||||
fn test_vst1_s16() {
|
||||
let mut vals = [0_i16; 5];
|
||||
let a = i16x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1_s16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_s16(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -72,11 +78,13 @@ unsafe fn test_vst1_s16() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_s16() {
|
||||
fn test_vst1q_s16() {
|
||||
let mut vals = [0_i16; 9];
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1q_s16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_s16(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -90,11 +98,13 @@ unsafe fn test_vst1q_s16() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_s32() {
|
||||
fn test_vst1_s32() {
|
||||
let mut vals = [0_i32; 3];
|
||||
let a = i32x2::new(1, 2);
|
||||
|
||||
vst1_s32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_s32(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -102,11 +112,13 @@ unsafe fn test_vst1_s32() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_s32() {
|
||||
fn test_vst1q_s32() {
|
||||
let mut vals = [0_i32; 5];
|
||||
let a = i32x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1q_s32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_s32(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -116,22 +128,26 @@ unsafe fn test_vst1q_s32() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_s64() {
|
||||
fn test_vst1_s64() {
|
||||
let mut vals = [0_i64; 2];
|
||||
let a = i64x1::new(1);
|
||||
|
||||
vst1_s64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_s64(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_s64() {
|
||||
fn test_vst1q_s64() {
|
||||
let mut vals = [0_i64; 3];
|
||||
let a = i64x2::new(1, 2);
|
||||
|
||||
vst1q_s64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_s64(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -139,11 +155,13 @@ unsafe fn test_vst1q_s64() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_u8() {
|
||||
fn test_vst1_u8() {
|
||||
let mut vals = [0_u8; 9];
|
||||
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1_u8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_u8(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -157,11 +175,13 @@ unsafe fn test_vst1_u8() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_u8() {
|
||||
fn test_vst1q_u8() {
|
||||
let mut vals = [0_u8; 17];
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
||||
vst1q_u8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_u8(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -183,11 +203,13 @@ unsafe fn test_vst1q_u8() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_u16() {
|
||||
fn test_vst1_u16() {
|
||||
let mut vals = [0_u16; 5];
|
||||
let a = u16x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1_u16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_u16(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -197,11 +219,13 @@ unsafe fn test_vst1_u16() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_u16() {
|
||||
fn test_vst1q_u16() {
|
||||
let mut vals = [0_u16; 9];
|
||||
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1q_u16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_u16(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -215,11 +239,13 @@ unsafe fn test_vst1q_u16() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_u32() {
|
||||
fn test_vst1_u32() {
|
||||
let mut vals = [0_u32; 3];
|
||||
let a = u32x2::new(1, 2);
|
||||
|
||||
vst1_u32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_u32(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -227,11 +253,13 @@ unsafe fn test_vst1_u32() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_u32() {
|
||||
fn test_vst1q_u32() {
|
||||
let mut vals = [0_u32; 5];
|
||||
let a = u32x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1q_u32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_u32(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -241,22 +269,26 @@ unsafe fn test_vst1q_u32() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_u64() {
|
||||
fn test_vst1_u64() {
|
||||
let mut vals = [0_u64; 2];
|
||||
let a = u64x1::new(1);
|
||||
|
||||
vst1_u64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_u64(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_u64() {
|
||||
fn test_vst1q_u64() {
|
||||
let mut vals = [0_u64; 3];
|
||||
let a = u64x2::new(1, 2);
|
||||
|
||||
vst1q_u64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_u64(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -264,11 +296,13 @@ unsafe fn test_vst1q_u64() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_p8() {
|
||||
fn test_vst1_p8() {
|
||||
let mut vals = [0_u8; 9];
|
||||
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1_p8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_p8(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -282,11 +316,13 @@ unsafe fn test_vst1_p8() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_p8() {
|
||||
fn test_vst1q_p8() {
|
||||
let mut vals = [0_u8; 17];
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
||||
vst1q_p8(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_p8(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -308,11 +344,13 @@ unsafe fn test_vst1q_p8() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_p16() {
|
||||
fn test_vst1_p16() {
|
||||
let mut vals = [0_u16; 5];
|
||||
let a = u16x4::new(1, 2, 3, 4);
|
||||
|
||||
vst1_p16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_p16(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -322,11 +360,13 @@ unsafe fn test_vst1_p16() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_p16() {
|
||||
fn test_vst1q_p16() {
|
||||
let mut vals = [0_u16; 9];
|
||||
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
vst1q_p16(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_p16(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -340,22 +380,26 @@ unsafe fn test_vst1q_p16() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vst1_p64() {
|
||||
fn test_vst1_p64() {
|
||||
let mut vals = [0_u64; 2];
|
||||
let a = u64x1::new(1);
|
||||
|
||||
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_p64(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vst1q_p64() {
|
||||
fn test_vst1q_p64() {
|
||||
let mut vals = [0_u64; 3];
|
||||
let a = u64x2::new(1, 2);
|
||||
|
||||
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_p64(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
|
|
@ -363,11 +407,13 @@ unsafe fn test_vst1q_p64() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_f32() {
|
||||
fn test_vst1_f32() {
|
||||
let mut vals = [0_f32; 3];
|
||||
let a = f32x2::new(1., 2.);
|
||||
|
||||
vst1_f32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1_f32(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0.);
|
||||
assert_eq!(vals[1], 1.);
|
||||
|
|
@ -375,11 +421,13 @@ unsafe fn test_vst1_f32() {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_f32() {
|
||||
fn test_vst1q_f32() {
|
||||
let mut vals = [0_f32; 5];
|
||||
let a = f32x4::new(1., 2., 3., 4.);
|
||||
|
||||
vst1q_f32(vals[1..].as_mut_ptr(), transmute(a));
|
||||
unsafe {
|
||||
vst1q_f32(vals[1..].as_mut_ptr(), a.into());
|
||||
}
|
||||
|
||||
assert_eq!(vals[0], 0.);
|
||||
assert_eq!(vals[1], 1.);
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@ macro_rules! test_vtbl {
|
|||
) => {
|
||||
#[cfg(target_endian = "little")]
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn $test_name() {
|
||||
fn $test_name() {
|
||||
// create table as array, and transmute it to
|
||||
// arm's table type
|
||||
let table: $table_t = mem::transmute([$($table_v),*]);
|
||||
let table: $table_t = unsafe { mem::transmute([$($table_v),*]) };
|
||||
|
||||
// For each control vector, perform a table lookup and
|
||||
// verify the result:
|
||||
$(
|
||||
{
|
||||
let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]);
|
||||
let result = $fn_id(table, mem::transmute(ctrl));
|
||||
let result: $ctrl_t = mem::transmute(result);
|
||||
let expected: $ctrl_t = mem::transmute([$($exp_v),*]);
|
||||
let ctrl: $ctrl_t = unsafe { mem::transmute([$($ctrl_v),*]) };
|
||||
let result = $fn_id(table, unsafe { mem::transmute(ctrl) });
|
||||
let result: $ctrl_t = unsafe { mem::transmute(result) };
|
||||
let expected: $ctrl_t = unsafe { mem::transmute([$($exp_v),*]) };
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
)*
|
||||
|
|
@ -171,20 +171,19 @@ macro_rules! test_vtbx {
|
|||
) => {
|
||||
#[cfg(target_endian = "little")]
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn $test_name() {
|
||||
fn $test_name() {
|
||||
// create table as array, and transmute it to
|
||||
// arm's table type
|
||||
let table: $table_t = mem::transmute([$($table_v),*]);
|
||||
let ext: $ext_t = mem::transmute([$($ext_v),*]);
|
||||
|
||||
let table: $table_t = unsafe { mem::transmute([$($table_v),*]) };
|
||||
let ext: $ext_t = unsafe { mem::transmute([$($ext_v),*]) };
|
||||
// For each control vector, perform a table lookup and
|
||||
// verify the result:
|
||||
$(
|
||||
{
|
||||
let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]);
|
||||
let result = $fn_id(ext, table, mem::transmute(ctrl));
|
||||
let result: $ctrl_t = mem::transmute(result);
|
||||
let expected: $ctrl_t = mem::transmute([$($exp_v),*]);
|
||||
let ctrl: $ctrl_t = unsafe { mem::transmute([$($ctrl_v),*]) };
|
||||
let result = $fn_id(ext, table, unsafe { mem::transmute(ctrl) });
|
||||
let result: $ctrl_t = unsafe { mem::transmute(result) };
|
||||
let expected: $ctrl_t = unsafe { mem::transmute([$($exp_v),*]) };
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
)*
|
||||
|
|
|
|||
|
|
@ -111,13 +111,13 @@ macro_rules! V_f32 {
|
|||
|
||||
macro_rules! to64 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u64 { transmute(v) }
|
||||
|v: $t| -> u64 { unsafe { transmute(v) } }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! to128 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u128 { transmute(v) }
|
||||
|v: $t| -> u128 { unsafe { transmute(v) } }
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -158,9 +158,7 @@ pub(crate) fn test<T, U, V, W, X>(
|
|||
macro_rules! gen_test_fn {
|
||||
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
|
||||
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
|
||||
unsafe {
|
||||
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
|
||||
};
|
||||
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,12 +32,14 @@
|
|||
x86_amx_intrinsics,
|
||||
f16,
|
||||
aarch64_unstable_target_feature,
|
||||
target_feature_inline_always,
|
||||
bigint_helper_methods,
|
||||
funnel_shifts,
|
||||
avx10_target_feature,
|
||||
const_trait_impl,
|
||||
const_cmp,
|
||||
const_eval_select
|
||||
const_eval_select,
|
||||
maybe_uninit_as_bytes
|
||||
)]
|
||||
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
|
||||
#![deny(clippy::missing_inline_in_public_items)]
|
||||
|
|
@ -87,4 +89,4 @@ pub mod arch {
|
|||
}
|
||||
|
||||
#[allow(unused_imports)]
|
||||
use core::{array, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};
|
||||
use core::{array, cmp, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};
|
||||
|
|
|
|||
|
|
@ -90,17 +90,10 @@ macro_rules! types {
|
|||
pub struct $name($v [$elem_type; $len]);
|
||||
|
||||
impl $name {
|
||||
/// Using `my_simd([x; N])` seemingly fails tests,
|
||||
/// so use this internal helper for it instead.
|
||||
/// Put the same value in every lane.
|
||||
#[inline(always)]
|
||||
$v fn splat(value: $elem_type) -> $name {
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(simd)]
|
||||
struct JustOne([$elem_type; 1]);
|
||||
let one = JustOne([value]);
|
||||
// SAFETY: 0 is always in-bounds because we're shuffling
|
||||
// a simd type with exactly one element.
|
||||
unsafe { simd_shuffle!(one, one, [0; $len]) }
|
||||
unsafe { $crate::intrinsics::simd::simd_splat(value) }
|
||||
}
|
||||
|
||||
/// Returns an array reference containing the entire SIMD vector.
|
||||
|
|
@ -135,6 +128,22 @@ macro_rules! types {
|
|||
crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
|
||||
}
|
||||
}
|
||||
|
||||
$(#[$stability])+
|
||||
impl crate::convert::From<crate::core_arch::simd::Simd<$elem_type, $len>> for $name {
|
||||
#[inline(always)]
|
||||
fn from(simd: crate::core_arch::simd::Simd<$elem_type, $len>) -> Self {
|
||||
unsafe { crate::mem::transmute(simd) }
|
||||
}
|
||||
}
|
||||
|
||||
$(#[$stability])+
|
||||
impl crate::convert::From<$name> for crate::core_arch::simd::Simd<$elem_type, $len> {
|
||||
#[inline(always)]
|
||||
fn from(simd: $name) -> Self {
|
||||
unsafe { crate::mem::transmute(simd) }
|
||||
}
|
||||
}
|
||||
)*);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,29 +23,29 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.nvvm.barrier0"]
|
||||
fn syncthreads() -> ();
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
|
||||
fn block_dim_x() -> i32;
|
||||
fn block_dim_x() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
|
||||
fn block_dim_y() -> i32;
|
||||
fn block_dim_y() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
|
||||
fn block_dim_z() -> i32;
|
||||
fn block_dim_z() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
|
||||
fn block_idx_x() -> i32;
|
||||
fn block_idx_x() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
|
||||
fn block_idx_y() -> i32;
|
||||
fn block_idx_y() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
|
||||
fn block_idx_z() -> i32;
|
||||
fn block_idx_z() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
|
||||
fn grid_dim_x() -> i32;
|
||||
fn grid_dim_x() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
|
||||
fn grid_dim_y() -> i32;
|
||||
fn grid_dim_y() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
|
||||
fn grid_dim_z() -> i32;
|
||||
fn grid_dim_z() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
|
||||
fn thread_idx_x() -> i32;
|
||||
fn thread_idx_x() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
|
||||
fn thread_idx_y() -> i32;
|
||||
fn thread_idx_y() -> u32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
|
||||
fn thread_idx_z() -> i32;
|
||||
fn thread_idx_z() -> u32;
|
||||
}
|
||||
|
||||
/// Synchronizes all threads in the block.
|
||||
|
|
@ -58,84 +58,84 @@ pub unsafe fn _syncthreads() -> () {
|
|||
/// x-th thread-block dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_dim_x() -> i32 {
|
||||
pub unsafe fn _block_dim_x() -> u32 {
|
||||
block_dim_x()
|
||||
}
|
||||
|
||||
/// y-th thread-block dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_dim_y() -> i32 {
|
||||
pub unsafe fn _block_dim_y() -> u32 {
|
||||
block_dim_y()
|
||||
}
|
||||
|
||||
/// z-th thread-block dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_dim_z() -> i32 {
|
||||
pub unsafe fn _block_dim_z() -> u32 {
|
||||
block_dim_z()
|
||||
}
|
||||
|
||||
/// x-th thread-block index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_idx_x() -> i32 {
|
||||
pub unsafe fn _block_idx_x() -> u32 {
|
||||
block_idx_x()
|
||||
}
|
||||
|
||||
/// y-th thread-block index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_idx_y() -> i32 {
|
||||
pub unsafe fn _block_idx_y() -> u32 {
|
||||
block_idx_y()
|
||||
}
|
||||
|
||||
/// z-th thread-block index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _block_idx_z() -> i32 {
|
||||
pub unsafe fn _block_idx_z() -> u32 {
|
||||
block_idx_z()
|
||||
}
|
||||
|
||||
/// x-th block-grid dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _grid_dim_x() -> i32 {
|
||||
pub unsafe fn _grid_dim_x() -> u32 {
|
||||
grid_dim_x()
|
||||
}
|
||||
|
||||
/// y-th block-grid dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _grid_dim_y() -> i32 {
|
||||
pub unsafe fn _grid_dim_y() -> u32 {
|
||||
grid_dim_y()
|
||||
}
|
||||
|
||||
/// z-th block-grid dimension.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _grid_dim_z() -> i32 {
|
||||
pub unsafe fn _grid_dim_z() -> u32 {
|
||||
grid_dim_z()
|
||||
}
|
||||
|
||||
/// x-th thread index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _thread_idx_x() -> i32 {
|
||||
pub unsafe fn _thread_idx_x() -> u32 {
|
||||
thread_idx_x()
|
||||
}
|
||||
|
||||
/// y-th thread index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _thread_idx_y() -> i32 {
|
||||
pub unsafe fn _thread_idx_y() -> u32 {
|
||||
thread_idx_y()
|
||||
}
|
||||
|
||||
/// z-th thread index.
|
||||
#[inline]
|
||||
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
|
||||
pub unsafe fn _thread_idx_z() -> i32 {
|
||||
pub unsafe fn _thread_idx_z() -> u32 {
|
||||
thread_idx_z()
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -364,17 +364,46 @@ unsafe extern "C" {
|
|||
fn vrfin(a: vector_float) -> vector_float;
|
||||
}
|
||||
|
||||
impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4 }
|
||||
|
||||
impl_neg! { i8x16 : 0 }
|
||||
impl_neg! { i16x8 : 0 }
|
||||
impl_neg! { i32x4 : 0 }
|
||||
impl_neg! { f32x4 : 0f32 }
|
||||
|
||||
#[macro_use]
|
||||
mod sealed {
|
||||
use super::*;
|
||||
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub trait VectorNeg {
|
||||
unsafe fn vec_neg(self) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! impl_neg {
|
||||
($($v:ty)*) => {
|
||||
$(
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl VectorNeg for $v {
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn vec_neg(self) -> Self {
|
||||
simd_neg(self)
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
impl_neg! {
|
||||
vector_signed_char
|
||||
vector_unsigned_char
|
||||
vector_bool_char
|
||||
|
||||
vector_signed_short
|
||||
vector_unsigned_short
|
||||
vector_bool_short
|
||||
|
||||
vector_signed_int
|
||||
vector_unsigned_int
|
||||
vector_bool_int
|
||||
|
||||
vector_float
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub trait VectorInsert {
|
||||
type Scalar;
|
||||
|
|
@ -1380,7 +1409,7 @@ mod sealed {
|
|||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) {
|
||||
v.vec_max(-v)
|
||||
v.vec_max(simd_neg(v))
|
||||
}
|
||||
|
||||
impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) }
|
||||
|
|
@ -1428,7 +1457,7 @@ mod sealed {
|
|||
#[cfg_attr(test, assert_instr(vspltb, IMM4 = 15))]
|
||||
unsafe fn vspltb<const IMM4: u32>(a: vector_signed_char) -> vector_signed_char {
|
||||
static_assert_uimm_bits!(IMM4, 4);
|
||||
simd_shuffle(a, a, const { u32x16::from_array([IMM4; 16]) })
|
||||
simd_shuffle(a, a, const { u32x16::splat(IMM4) })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
@ -1436,7 +1465,7 @@ mod sealed {
|
|||
#[cfg_attr(test, assert_instr(vsplth, IMM3 = 7))]
|
||||
unsafe fn vsplth<const IMM3: u32>(a: vector_signed_short) -> vector_signed_short {
|
||||
static_assert_uimm_bits!(IMM3, 3);
|
||||
simd_shuffle(a, a, const { u32x8::from_array([IMM3; 8]) })
|
||||
simd_shuffle(a, a, const { u32x8::splat(IMM3) })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
@ -1445,7 +1474,7 @@ mod sealed {
|
|||
#[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxspltw, IMM2 = 3))]
|
||||
unsafe fn vspltw<const IMM2: u32>(a: vector_signed_int) -> vector_signed_int {
|
||||
static_assert_uimm_bits!(IMM2, 2);
|
||||
simd_shuffle(a, a, const { u32x4::from_array([IMM2; 4]) })
|
||||
simd_shuffle(a, a, const { u32x4::splat(IMM2) })
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
|
|
@ -4032,6 +4061,14 @@ pub unsafe fn vec_mfvscr() -> vector_unsigned_short {
|
|||
mfvscr()
|
||||
}
|
||||
|
||||
/// Vector Negate
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
pub unsafe fn vec_neg<T: sealed::VectorNeg>(a: T) -> T {
|
||||
a.vec_neg()
|
||||
}
|
||||
|
||||
/// Vector add.
|
||||
#[inline]
|
||||
#[target_feature(enable = "altivec")]
|
||||
|
|
@ -4703,7 +4740,7 @@ mod tests {
|
|||
for off in 0..16 {
|
||||
let val: u8x16 = transmute(vec_xl(0, (pat.as_ptr() as *const u8).offset(off)));
|
||||
for i in 0..16 {
|
||||
let v = val.extract(i);
|
||||
let v = val.extract_dyn(i);
|
||||
assert_eq!(off as usize + i, v as usize);
|
||||
}
|
||||
}
|
||||
|
|
@ -4758,7 +4795,7 @@ mod tests {
|
|||
)];
|
||||
for off in 0..16 {
|
||||
let v: u8x16 = transmute(vec_lde(off, pat.as_ptr() as *const u8));
|
||||
assert_eq!(off as u8, v.extract(off as _));
|
||||
assert_eq!(off as u8, v.extract_dyn(off as _));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4767,7 +4804,7 @@ mod tests {
|
|||
let pat = [u16x8::new(0, 1, 2, 3, 4, 5, 6, 7)];
|
||||
for off in 0..8 {
|
||||
let v: u16x8 = transmute(vec_lde(off * 2, pat.as_ptr() as *const u16));
|
||||
assert_eq!(off as u16, v.extract(off as _));
|
||||
assert_eq!(off as u16, v.extract_dyn(off as _));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4776,7 +4813,7 @@ mod tests {
|
|||
let pat = [u32x4::new(0, 1, 2, 3)];
|
||||
for off in 0..4 {
|
||||
let v: u32x4 = transmute(vec_lde(off * 4, pat.as_ptr() as *const u32));
|
||||
assert_eq!(off as u32, v.extract(off as _));
|
||||
assert_eq!(off as u32, v.extract_dyn(off as _));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -274,40 +274,6 @@ macro_rules! t_b {
|
|||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_from {
|
||||
($s: ident) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl From<$s> for s_t_l!($s) {
|
||||
#[inline]
|
||||
fn from (v: $s) -> Self {
|
||||
unsafe {
|
||||
transmute(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
($($s: ident),*) => {
|
||||
$(
|
||||
impl_from! { $s }
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_neg {
|
||||
($s: ident : $zero: expr) => {
|
||||
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
|
||||
impl crate::ops::Neg for s_t_l!($s) {
|
||||
type Output = s_t_l!($s);
|
||||
#[inline]
|
||||
fn neg(self) -> Self::Output {
|
||||
unsafe { simd_neg(self) }
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) use impl_from;
|
||||
pub(crate) use impl_neg;
|
||||
pub(crate) use impl_vec_trait;
|
||||
pub(crate) use s_t_l;
|
||||
pub(crate) use t_b;
|
||||
|
|
|
|||
|
|
@ -431,40 +431,6 @@ macro_rules! t_b {
|
|||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_from {
|
||||
($s: ident) => {
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
impl From<$s> for s_t_l!($s) {
|
||||
#[inline]
|
||||
fn from (v: $s) -> Self {
|
||||
unsafe {
|
||||
transmute(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
($($s: ident),*) => {
|
||||
$(
|
||||
impl_from! { $s }
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_neg {
|
||||
($s: ident : $zero: expr) => {
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
impl crate::ops::Neg for s_t_l!($s) {
|
||||
type Output = s_t_l!($s);
|
||||
#[inline]
|
||||
fn neg(self) -> Self::Output {
|
||||
unsafe { simd_neg(self) }
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) use impl_from;
|
||||
pub(crate) use impl_neg;
|
||||
pub(crate) use impl_vec_trait;
|
||||
pub(crate) use l_t_t;
|
||||
pub(crate) use s_t_l;
|
||||
|
|
|
|||
|
|
@ -281,17 +281,14 @@ unsafe extern "unadjusted" {
|
|||
#[link_name = "llvm.s390.vfenezbs"] fn vfenezbs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
|
||||
#[link_name = "llvm.s390.vfenezhs"] fn vfenezhs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
|
||||
#[link_name = "llvm.s390.vfenezfs"] fn vfenezfs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
|
||||
|
||||
#[link_name = "llvm.s390.vclfnhs"] fn vclfnhs(a: vector_signed_short, immarg: i32) -> vector_float;
|
||||
#[link_name = "llvm.s390.vclfnls"] fn vclfnls(a: vector_signed_short, immarg: i32) -> vector_float;
|
||||
#[link_name = "llvm.s390.vcfn"] fn vcfn(a: vector_signed_short, immarg: i32) -> vector_signed_short;
|
||||
#[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short;
|
||||
#[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short;
|
||||
}
|
||||
|
||||
impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
|
||||
|
||||
impl_neg! { i8x16 : 0 }
|
||||
impl_neg! { i16x8 : 0 }
|
||||
impl_neg! { i32x4 : 0 }
|
||||
impl_neg! { i64x2 : 0 }
|
||||
impl_neg! { f32x4 : 0f32 }
|
||||
impl_neg! { f64x2 : 0f64 }
|
||||
|
||||
#[repr(simd)]
|
||||
struct ShuffleMask<const N: usize>([u32; N]);
|
||||
|
||||
|
|
@ -439,6 +436,43 @@ enum FindImm {
|
|||
mod sealed {
|
||||
use super::*;
|
||||
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
pub trait VectorNeg {
|
||||
unsafe fn vec_neg(self) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! impl_neg {
|
||||
($($v:ty)*) => {
|
||||
$(
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
impl VectorNeg for $v {
|
||||
#[inline]
|
||||
#[target_feature(enable = "vector")]
|
||||
unsafe fn vec_neg(self) -> Self {
|
||||
simd_neg(self)
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
impl_neg! {
|
||||
vector_signed_char
|
||||
vector_unsigned_char
|
||||
|
||||
vector_signed_short
|
||||
vector_unsigned_short
|
||||
|
||||
vector_signed_int
|
||||
vector_unsigned_int
|
||||
|
||||
vector_signed_long_long
|
||||
vector_unsigned_long_long
|
||||
|
||||
vector_float
|
||||
vector_double
|
||||
}
|
||||
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
pub trait VectorAdd<Other> {
|
||||
type Result;
|
||||
|
|
@ -761,7 +795,7 @@ mod sealed {
|
|||
#[inline]
|
||||
#[target_feature(enable = "vector")]
|
||||
unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) {
|
||||
v.vec_max(-v)
|
||||
v.vec_max(simd_neg(v))
|
||||
}
|
||||
|
||||
impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) }
|
||||
|
|
@ -4055,6 +4089,14 @@ unsafe fn __lcbb<const BLOCK_BOUNDARY: u16>(ptr: *const u8) -> u32 {
|
|||
lcbb(ptr, const { validate_block_boundary(BLOCK_BOUNDARY) })
|
||||
}
|
||||
|
||||
/// Vector Negate
|
||||
#[inline]
|
||||
#[target_feature(enable = "vector")]
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
pub unsafe fn vec_neg<T: sealed::VectorNeg>(a: T) -> T {
|
||||
a.vec_neg()
|
||||
}
|
||||
|
||||
/// Vector Add
|
||||
#[inline]
|
||||
#[target_feature(enable = "vector")]
|
||||
|
|
@ -5875,6 +5917,74 @@ pub unsafe fn vec_promote<T: sealed::VectorPromote>(a: T::ElementType, b: i32) -
|
|||
T::vec_promote(a, b)
|
||||
}
|
||||
|
||||
/// Converts the left-most half of `a` to a vector of single-precision numbers.
|
||||
/// The format of the source vector elements is specified by `B`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "nnp-assist")]
|
||||
#[cfg_attr(test, assert_instr(vclfnh, B = 0))]
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
pub unsafe fn vec_extend_to_fp32_hi<const B: i32>(a: vector_signed_short) -> vector_float {
|
||||
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
|
||||
static_assert_uimm_bits!(B, 4);
|
||||
|
||||
vclfnhs(a, B)
|
||||
}
|
||||
|
||||
/// Converts the right-most half of `a` to a vector of single-precision numbers.
|
||||
/// The format of the source vector elements is specified by `B`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "nnp-assist")]
|
||||
#[cfg_attr(test, assert_instr(vclfnl, B = 0))]
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
pub unsafe fn vec_extend_to_fp32_lo<const B: i32>(a: vector_signed_short) -> vector_float {
|
||||
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
|
||||
static_assert_uimm_bits!(B, 4);
|
||||
|
||||
vclfnls(a, B)
|
||||
}
|
||||
|
||||
/// Converts the elements of vector `a` to the 16-bit IEEE floating point format.
|
||||
/// The format of the source vector elements is specified by `B`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "nnp-assist")]
|
||||
#[cfg_attr(test, assert_instr(vcfn, B = 0))]
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
pub unsafe fn vec_convert_to_fp16<const B: i32>(a: vector_signed_short) -> vector_signed_short {
|
||||
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
|
||||
static_assert_uimm_bits!(B, 4);
|
||||
|
||||
vcfn(a, B)
|
||||
}
|
||||
|
||||
/// Converts the elements of vector `a` to an internal floating point format.
|
||||
/// The format of the target vector elements is specified by `B`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "nnp-assist")]
|
||||
#[cfg_attr(test, assert_instr(vcnf, B = 0))]
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
pub unsafe fn vec_convert_from_fp16<const B: i32>(a: vector_signed_short) -> vector_signed_short {
|
||||
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
|
||||
static_assert_uimm_bits!(B, 4);
|
||||
|
||||
vcnf(a, B)
|
||||
}
|
||||
|
||||
/// Converts the elements of single-precision vectors `a` and `b` to an internal floating point
|
||||
/// format with 16-bit sized elements. The format of the target vector elements is specified by `C`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "nnp-assist")]
|
||||
#[unstable(feature = "stdarch_s390x", issue = "135681")]
|
||||
#[cfg_attr(test, assert_instr(vcrnf, C = 0))]
|
||||
pub unsafe fn vec_round_from_fp32<const C: i32>(
|
||||
a: vector_float,
|
||||
b: vector_float,
|
||||
) -> vector_signed_short {
|
||||
// On processors implementing the IBM z16 architecture, only the value 0 is supported.
|
||||
static_assert_uimm_bits!(C, 4);
|
||||
|
||||
vcrnfs(a, b, C)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
|||
|
|
@ -16,129 +16,216 @@ pub(crate) const unsafe fn simd_imin<T: Copy>(a: T, b: T) -> T {
|
|||
crate::intrinsics::simd::simd_select(mask, a, b)
|
||||
}
|
||||
|
||||
/// SAFETY: All bits patterns must be valid
|
||||
pub(crate) unsafe trait SimdElement:
|
||||
Copy + const PartialEq + crate::fmt::Debug
|
||||
{
|
||||
}
|
||||
|
||||
unsafe impl SimdElement for u8 {}
|
||||
unsafe impl SimdElement for u16 {}
|
||||
unsafe impl SimdElement for u32 {}
|
||||
unsafe impl SimdElement for u64 {}
|
||||
|
||||
unsafe impl SimdElement for i8 {}
|
||||
unsafe impl SimdElement for i16 {}
|
||||
unsafe impl SimdElement for i32 {}
|
||||
unsafe impl SimdElement for i64 {}
|
||||
|
||||
unsafe impl SimdElement for f16 {}
|
||||
unsafe impl SimdElement for f32 {}
|
||||
unsafe impl SimdElement for f64 {}
|
||||
|
||||
#[repr(simd)]
|
||||
#[derive(Copy)]
|
||||
pub(crate) struct Simd<T: SimdElement, const N: usize>([T; N]);
|
||||
|
||||
impl<T: SimdElement, const N: usize> Simd<T, N> {
|
||||
/// A value of this type where all elements are zeroed out.
|
||||
// SAFETY: `T` implements `SimdElement`, so it is zeroable.
|
||||
pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) const fn from_array(elements: [T; N]) -> Self {
|
||||
Self(elements)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn splat(value: T) -> Self {
|
||||
unsafe { crate::intrinsics::simd::simd_splat(value) }
|
||||
}
|
||||
|
||||
/// Extract the element at position `index`. Note that `index` is not a constant so this
|
||||
/// operation is not efficient on most platforms. Use for testing only.
|
||||
#[inline]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn extract_dyn(&self, index: usize) -> T {
|
||||
assert!(index < N);
|
||||
// SAFETY: self is a vector, T its element type.
|
||||
unsafe { crate::intrinsics::simd::simd_extract_dyn(*self, index as u32) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) const fn as_array(&self) -> &[T; N] {
|
||||
let simd_ptr: *const Self = self;
|
||||
let array_ptr: *const [T; N] = simd_ptr.cast();
|
||||
// SAFETY: We can always read the prefix of a simd type as an array.
|
||||
// There might be more padding afterwards for some widths, but
|
||||
// that's not a problem for reading less than that.
|
||||
unsafe { &*array_ptr }
|
||||
}
|
||||
}
|
||||
|
||||
// `#[derive(Clone)]` causes ICE "Projecting into SIMD type core_arch::simd::Simd is banned by MCP#838"
|
||||
impl<T: SimdElement, const N: usize> Clone for Simd<T, N> {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
impl<T: SimdElement, const N: usize> const crate::cmp::PartialEq for Simd<T, N> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.as_array() == other.as_array()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SimdElement, const N: usize> crate::fmt::Debug for Simd<T, N> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
|
||||
debug_simd_finish(f, "Simd", self.as_array())
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Simd<f16, N> {
|
||||
#[inline]
|
||||
pub(crate) const fn to_bits(self) -> Simd<u16, N> {
|
||||
assert!(size_of::<Self>() == size_of::<Simd<u16, N>>());
|
||||
unsafe { crate::mem::transmute_copy(&self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) const fn from_bits(bits: Simd<u16, N>) -> Self {
|
||||
assert!(size_of::<Self>() == size_of::<Simd<u16, N>>());
|
||||
unsafe { crate::mem::transmute_copy(&bits) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Simd<f32, N> {
|
||||
#[inline]
|
||||
pub(crate) const fn to_bits(self) -> Simd<u32, N> {
|
||||
assert!(size_of::<Self>() == size_of::<Simd<u32, N>>());
|
||||
unsafe { crate::mem::transmute_copy(&self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) const fn from_bits(bits: Simd<u32, N>) -> Self {
|
||||
assert!(size_of::<Self>() == size_of::<Simd<u32, N>>());
|
||||
unsafe { crate::mem::transmute_copy(&bits) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Simd<f64, N> {
|
||||
#[inline]
|
||||
pub(crate) const fn to_bits(self) -> Simd<u64, N> {
|
||||
assert!(size_of::<Self>() == size_of::<Simd<u64, N>>());
|
||||
unsafe { crate::mem::transmute_copy(&self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) const fn from_bits(bits: Simd<u64, N>) -> Self {
|
||||
assert!(size_of::<Self>() == size_of::<Simd<u64, N>>());
|
||||
unsafe { crate::mem::transmute_copy(&bits) }
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! simd_ty {
|
||||
($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
|
||||
#[repr(simd)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub(crate) struct $id([$elem_type; $len]);
|
||||
pub(crate) type $id = Simd<$elem_type, $len>;
|
||||
|
||||
#[allow(clippy::use_self)]
|
||||
impl $id {
|
||||
/// A value of this type where all elements are zeroed out.
|
||||
pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
|
||||
$id([$($param_name),*])
|
||||
}
|
||||
#[inline(always)]
|
||||
pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self {
|
||||
$id(elements)
|
||||
}
|
||||
// FIXME: Workaround rust@60637
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn splat(value: $elem_type) -> Self {
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(simd)]
|
||||
struct JustOne([$elem_type; 1]);
|
||||
let one = JustOne([value]);
|
||||
// SAFETY: 0 is always in-bounds because we're shuffling
|
||||
// a simd type with exactly one element.
|
||||
unsafe { simd_shuffle!(one, one, [0; $len]) }
|
||||
}
|
||||
|
||||
/// Extract the element at position `index`.
|
||||
/// `index` is not a constant so this is not efficient!
|
||||
/// Use for testing only.
|
||||
// FIXME: Workaround rust@60637
|
||||
#[inline(always)]
|
||||
pub(crate) const fn extract(&self, index: usize) -> $elem_type {
|
||||
self.as_array()[index]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) const fn as_array(&self) -> &[$elem_type; $len] {
|
||||
let simd_ptr: *const Self = self;
|
||||
let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
|
||||
// SAFETY: We can always read the prefix of a simd type as an array.
|
||||
// There might be more padding afterwards for some widths, but
|
||||
// that's not a problem for reading less than that.
|
||||
unsafe { &*array_ptr }
|
||||
}
|
||||
}
|
||||
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const impl core::cmp::PartialEq for $id {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.as_array() == other.as_array()
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Debug for $id {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
debug_simd_finish(f, stringify!($id), self.as_array())
|
||||
Self([$($param_name),*])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(simd)]
|
||||
#[derive(Copy)]
|
||||
pub(crate) struct SimdM<T: SimdElement, const N: usize>([T; N]);
|
||||
|
||||
impl<T: SimdElement, const N: usize> SimdM<T, N> {
|
||||
#[inline(always)]
|
||||
const fn bool_to_internal(x: bool) -> T {
|
||||
// SAFETY: `T` implements `SimdElement`, so all bit patterns are valid.
|
||||
let zeros = const { unsafe { crate::mem::zeroed::<T>() } };
|
||||
let ones = const {
|
||||
// Ideally, this would be `transmute([0xFFu8; size_of::<T>()])`, but
|
||||
// `size_of::<T>()` is not allowed to use a generic parameter there.
|
||||
let mut r = crate::mem::MaybeUninit::<T>::uninit();
|
||||
let mut i = 0;
|
||||
while i < crate::mem::size_of::<T>() {
|
||||
r.as_bytes_mut()[i] = crate::mem::MaybeUninit::new(0xFF);
|
||||
i += 1;
|
||||
}
|
||||
unsafe { r.assume_init() }
|
||||
};
|
||||
[zeros, ones][x as usize]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn splat(value: bool) -> Self {
|
||||
unsafe { crate::intrinsics::simd::simd_splat(value) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) const fn as_array(&self) -> &[T; N] {
|
||||
let simd_ptr: *const Self = self;
|
||||
let array_ptr: *const [T; N] = simd_ptr.cast();
|
||||
// SAFETY: We can always read the prefix of a simd type as an array.
|
||||
// There might be more padding afterwards for some widths, but
|
||||
// that's not a problem for reading less than that.
|
||||
unsafe { &*array_ptr }
|
||||
}
|
||||
}
|
||||
|
||||
// `#[derive(Clone)]` causes ICE "Projecting into SIMD type core_arch::simd::SimdM is banned by MCP#838"
|
||||
impl<T: SimdElement, const N: usize> Clone for SimdM<T, N> {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
impl<T: SimdElement, const N: usize> const crate::cmp::PartialEq for SimdM<T, N> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.as_array() == other.as_array()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SimdElement, const N: usize> crate::fmt::Debug for SimdM<T, N> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
|
||||
debug_simd_finish(f, "SimdM", self.as_array())
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! simd_m_ty {
|
||||
($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
|
||||
#[repr(simd)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub(crate) struct $id([$elem_type; $len]);
|
||||
pub(crate) type $id = SimdM<$elem_type, $len>;
|
||||
|
||||
#[allow(clippy::use_self)]
|
||||
impl $id {
|
||||
#[inline(always)]
|
||||
const fn bool_to_internal(x: bool) -> $elem_type {
|
||||
[0 as $elem_type, !(0 as $elem_type)][x as usize]
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) const fn new($($param_name: bool),*) -> Self {
|
||||
$id([$(Self::bool_to_internal($param_name)),*])
|
||||
}
|
||||
|
||||
// FIXME: Workaround rust@60637
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn splat(value: bool) -> Self {
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(simd)]
|
||||
struct JustOne([$elem_type; 1]);
|
||||
let one = JustOne([Self::bool_to_internal(value)]);
|
||||
// SAFETY: 0 is always in-bounds because we're shuffling
|
||||
// a simd type with exactly one element.
|
||||
unsafe { simd_shuffle!(one, one, [0; $len]) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) const fn as_array(&self) -> &[$elem_type; $len] {
|
||||
let simd_ptr: *const Self = self;
|
||||
let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
|
||||
// SAFETY: We can always read the prefix of a simd type as an array.
|
||||
// There might be more padding afterwards for some widths, but
|
||||
// that's not a problem for reading less than that.
|
||||
unsafe { &*array_ptr }
|
||||
}
|
||||
}
|
||||
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const impl core::cmp::PartialEq for $id {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.as_array() == other.as_array()
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Debug for $id {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
debug_simd_finish(f, stringify!($id), self.as_array())
|
||||
Self([$(Self::bool_to_internal($param_name)),*])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2746,7 +2746,7 @@ pub const fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm256_set1_pd(a: f64) -> __m256d {
|
||||
_mm256_setr_pd(a, a, a, a)
|
||||
f64x4::splat(a).as_m256d()
|
||||
}
|
||||
|
||||
/// Broadcasts single-precision (32-bit) floating-point value `a` to all
|
||||
|
|
@ -2759,7 +2759,7 @@ pub const fn _mm256_set1_pd(a: f64) -> __m256d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm256_set1_ps(a: f32) -> __m256 {
|
||||
_mm256_setr_ps(a, a, a, a, a, a, a, a)
|
||||
f32x8::splat(a).as_m256()
|
||||
}
|
||||
|
||||
/// Broadcasts 8-bit integer `a` to all elements of returned vector.
|
||||
|
|
@ -2772,13 +2772,7 @@ pub const fn _mm256_set1_ps(a: f32) -> __m256 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm256_set1_epi8(a: i8) -> __m256i {
|
||||
#[rustfmt::skip]
|
||||
_mm256_setr_epi8(
|
||||
a, a, a, a, a, a, a, a,
|
||||
a, a, a, a, a, a, a, a,
|
||||
a, a, a, a, a, a, a, a,
|
||||
a, a, a, a, a, a, a, a,
|
||||
)
|
||||
i8x32::splat(a).as_m256i()
|
||||
}
|
||||
|
||||
/// Broadcasts 16-bit integer `a` to all elements of returned vector.
|
||||
|
|
@ -2793,7 +2787,7 @@ pub const fn _mm256_set1_epi8(a: i8) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm256_set1_epi16(a: i16) -> __m256i {
|
||||
_mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
|
||||
i16x16::splat(a).as_m256i()
|
||||
}
|
||||
|
||||
/// Broadcasts 32-bit integer `a` to all elements of returned vector.
|
||||
|
|
@ -2806,7 +2800,7 @@ pub const fn _mm256_set1_epi16(a: i16) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm256_set1_epi32(a: i32) -> __m256i {
|
||||
_mm256_setr_epi32(a, a, a, a, a, a, a, a)
|
||||
i32x8::splat(a).as_m256i()
|
||||
}
|
||||
|
||||
/// Broadcasts 64-bit integer `a` to all elements of returned vector.
|
||||
|
|
@ -2821,7 +2815,7 @@ pub const fn _mm256_set1_epi32(a: i32) -> __m256i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm256_set1_epi64x(a: i64) -> __m256i {
|
||||
_mm256_setr_epi64x(a, a, a, a)
|
||||
i64x4::splat(a).as_m256i()
|
||||
}
|
||||
|
||||
/// Cast vector of type __m256d to type __m256.
|
||||
|
|
|
|||
|
|
@ -932,7 +932,7 @@ pub const fn _mm_set_ss(a: f32) -> __m128 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm_set1_ps(a: f32) -> __m128 {
|
||||
__m128([a, a, a, a])
|
||||
f32x4::splat(a).as_m128()
|
||||
}
|
||||
|
||||
/// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html)
|
||||
|
|
@ -2079,7 +2079,7 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::assert_eq_const as assert_eq;
|
||||
use crate::{hint::black_box, mem::transmute, ptr};
|
||||
use crate::{hint::black_box, ptr};
|
||||
use std::boxed;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
|
|
@ -2221,7 +2221,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_min_ps() {
|
||||
fn test_mm_min_ps() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_min_ps(a, b);
|
||||
|
|
@ -2234,10 +2234,10 @@ mod tests {
|
|||
// `r1` to `a` and `r2` to `b`.
|
||||
let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
|
||||
let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
|
||||
let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
|
||||
let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
|
||||
let a: [u8; 16] = transmute(a);
|
||||
let b: [u8; 16] = transmute(b);
|
||||
let r1 = _mm_min_ps(a, b).as_f32x4().to_bits();
|
||||
let r2 = _mm_min_ps(b, a).as_f32x4().to_bits();
|
||||
let a = a.as_f32x4().to_bits();
|
||||
let b = b.as_f32x4().to_bits();
|
||||
assert_eq!(r1, b);
|
||||
assert_eq!(r2, a);
|
||||
assert_ne!(a, b); // sanity check that -0.0 is actually present
|
||||
|
|
@ -2252,7 +2252,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_max_ps() {
|
||||
fn test_mm_max_ps() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_max_ps(a, b);
|
||||
|
|
@ -2261,67 +2261,67 @@ mod tests {
|
|||
// Check SSE-specific semantics for -0.0 handling.
|
||||
let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
|
||||
let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
|
||||
let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
|
||||
let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
|
||||
let a: [u8; 16] = transmute(a);
|
||||
let b: [u8; 16] = transmute(b);
|
||||
let r1 = _mm_max_ps(a, b).as_f32x4().to_bits();
|
||||
let r2 = _mm_max_ps(b, a).as_f32x4().to_bits();
|
||||
let a = a.as_f32x4().to_bits();
|
||||
let b = b.as_f32x4().to_bits();
|
||||
assert_eq!(r1, b);
|
||||
assert_eq!(r2, a);
|
||||
assert_ne!(a, b); // sanity check that -0.0 is actually present
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_and_ps() {
|
||||
let a = transmute(u32x4::splat(0b0011));
|
||||
let b = transmute(u32x4::splat(0b0101));
|
||||
const fn test_mm_and_ps() {
|
||||
let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
|
||||
let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
|
||||
let r = _mm_and_ps(*black_box(&a), *black_box(&b));
|
||||
let e = transmute(u32x4::splat(0b0001));
|
||||
let e = f32x4::from_bits(u32x4::splat(0b0001)).as_m128();
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_andnot_ps() {
|
||||
let a = transmute(u32x4::splat(0b0011));
|
||||
let b = transmute(u32x4::splat(0b0101));
|
||||
const fn test_mm_andnot_ps() {
|
||||
let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
|
||||
let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
|
||||
let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
|
||||
let e = transmute(u32x4::splat(0b0100));
|
||||
let e = f32x4::from_bits(u32x4::splat(0b0100)).as_m128();
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_or_ps() {
|
||||
let a = transmute(u32x4::splat(0b0011));
|
||||
let b = transmute(u32x4::splat(0b0101));
|
||||
const fn test_mm_or_ps() {
|
||||
let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
|
||||
let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
|
||||
let r = _mm_or_ps(*black_box(&a), *black_box(&b));
|
||||
let e = transmute(u32x4::splat(0b0111));
|
||||
let e = f32x4::from_bits(u32x4::splat(0b0111)).as_m128();
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
const unsafe fn test_mm_xor_ps() {
|
||||
let a = transmute(u32x4::splat(0b0011));
|
||||
let b = transmute(u32x4::splat(0b0101));
|
||||
const fn test_mm_xor_ps() {
|
||||
let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
|
||||
let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
|
||||
let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
|
||||
let e = transmute(u32x4::splat(0b0110));
|
||||
let e = f32x4::from_bits(u32x4::splat(0b0110)).as_m128();
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpeq_ss() {
|
||||
fn test_mm_cmpeq_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
|
||||
let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
|
||||
let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
|
||||
let r = _mm_cmpeq_ss(a, b).as_f32x4().to_bits();
|
||||
let e = f32x4::new(f32::from_bits(0), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(r, e);
|
||||
|
||||
let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
|
||||
let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
|
||||
let r2 = _mm_cmpeq_ss(a, b2).as_f32x4().to_bits();
|
||||
let e2 = f32x4::new(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(r2, e2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmplt_ss() {
|
||||
fn test_mm_cmplt_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
|
|
@ -2331,21 +2331,21 @@ mod tests {
|
|||
let c1 = 0u32; // a.extract(0) < c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) < d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmplt_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmplt_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmplt_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmple_ss() {
|
||||
fn test_mm_cmple_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
|
|
@ -2355,21 +2355,21 @@ mod tests {
|
|||
let c1 = !0u32; // a.extract(0) <= c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) <= d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmple_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmple_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmple_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpgt_ss() {
|
||||
fn test_mm_cmpgt_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
|
|
@ -2379,21 +2379,21 @@ mod tests {
|
|||
let c1 = 0u32; // a.extract(0) > c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) > d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmpgt_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmpgt_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmpgt_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpge_ss() {
|
||||
fn test_mm_cmpge_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
|
|
@ -2403,21 +2403,21 @@ mod tests {
|
|||
let c1 = !0u32; // a.extract(0) >= c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) >= d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmpge_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmpge_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmpge_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpneq_ss() {
|
||||
fn test_mm_cmpneq_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
|
|
@ -2427,21 +2427,21 @@ mod tests {
|
|||
let c1 = 0u32; // a.extract(0) != c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) != d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmpneq_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmpneq_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmpneq_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnlt_ss() {
|
||||
fn test_mm_cmpnlt_ss() {
|
||||
// TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there
|
||||
// must be a difference. It may have to do with behavior in the
|
||||
// presence of NaNs (signaling or quiet). If so, we should add tests
|
||||
|
|
@ -2456,21 +2456,21 @@ mod tests {
|
|||
let c1 = !0u32; // a.extract(0) >= c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) >= d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmpnlt_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmpnlt_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmpnlt_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnle_ss() {
|
||||
fn test_mm_cmpnle_ss() {
|
||||
// TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there
|
||||
// must be a difference. It may have to do with behavior in the
|
||||
// presence
|
||||
|
|
@ -2485,21 +2485,21 @@ mod tests {
|
|||
let c1 = 0u32; // a.extract(0) > c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) > d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmpnle_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmpnle_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmpnle_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpngt_ss() {
|
||||
fn test_mm_cmpngt_ss() {
|
||||
// TODO: this test is exactly the same as for `_mm_cmple_ss`, but there
|
||||
// must be a difference. It may have to do with behavior in the
|
||||
// presence of NaNs (signaling or quiet). If so, we should add tests
|
||||
|
|
@ -2514,21 +2514,21 @@ mod tests {
|
|||
let c1 = !0u32; // a.extract(0) <= c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) <= d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmpngt_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmpngt_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmpngt_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnge_ss() {
|
||||
fn test_mm_cmpnge_ss() {
|
||||
// TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there
|
||||
// must be a difference. It may have to do with behavior in the
|
||||
// presence of NaNs (signaling or quiet). If so, we should add tests
|
||||
|
|
@ -2543,21 +2543,21 @@ mod tests {
|
|||
let c1 = 0u32; // a.extract(0) < c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) < d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmpnge_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmpnge_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmpnge_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpord_ss() {
|
||||
fn test_mm_cmpord_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
|
||||
|
|
@ -2567,21 +2567,21 @@ mod tests {
|
|||
let c1 = 0u32; // a.extract(0) ord c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) ord d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmpord_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmpord_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmpord_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpunord_ss() {
|
||||
fn test_mm_cmpunord_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
|
||||
|
|
@ -2591,160 +2591,160 @@ mod tests {
|
|||
let c1 = !0u32; // a.extract(0) unord c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) unord d.extract(0)
|
||||
|
||||
let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
|
||||
let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
|
||||
let rb = _mm_cmpunord_ss(a, b).as_f32x4().to_bits();
|
||||
let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
|
||||
let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
|
||||
let rc = _mm_cmpunord_ss(a, c).as_f32x4().to_bits();
|
||||
let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
|
||||
let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
|
||||
let rd = _mm_cmpunord_ss(a, d).as_f32x4().to_bits();
|
||||
let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpeq_ps() {
|
||||
fn test_mm_cmpeq_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(fls, fls, tru, fls);
|
||||
let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
|
||||
let r = _mm_cmpeq_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmplt_ps() {
|
||||
fn test_mm_cmplt_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(tru, fls, fls, fls);
|
||||
let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
|
||||
let r = _mm_cmplt_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmple_ps() {
|
||||
fn test_mm_cmple_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(tru, fls, tru, fls);
|
||||
let r: u32x4 = transmute(_mm_cmple_ps(a, b));
|
||||
let r = _mm_cmple_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpgt_ps() {
|
||||
fn test_mm_cmpgt_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(fls, tru, fls, fls);
|
||||
let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
|
||||
let r = _mm_cmpgt_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpge_ps() {
|
||||
fn test_mm_cmpge_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(fls, tru, tru, fls);
|
||||
let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
|
||||
let r = _mm_cmpge_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpneq_ps() {
|
||||
fn test_mm_cmpneq_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(tru, tru, fls, tru);
|
||||
let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
|
||||
let r = _mm_cmpneq_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnlt_ps() {
|
||||
fn test_mm_cmpnlt_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(fls, tru, tru, tru);
|
||||
let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
|
||||
let r = _mm_cmpnlt_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnle_ps() {
|
||||
fn test_mm_cmpnle_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(fls, tru, fls, tru);
|
||||
let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
|
||||
let r = _mm_cmpnle_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpngt_ps() {
|
||||
fn test_mm_cmpngt_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(tru, fls, tru, tru);
|
||||
let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
|
||||
let r = _mm_cmpngt_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnge_ps() {
|
||||
fn test_mm_cmpnge_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(tru, fls, fls, tru);
|
||||
let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
|
||||
let r = _mm_cmpnge_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpord_ps() {
|
||||
fn test_mm_cmpord_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
|
||||
let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(tru, fls, fls, fls);
|
||||
let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
|
||||
let r = _mm_cmpord_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_cmpunord_ps() {
|
||||
fn test_mm_cmpunord_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
|
||||
let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = u32x4::new(fls, tru, tru, tru);
|
||||
let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
|
||||
let r = _mm_cmpunord_ps(a, b).as_f32x4().to_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1176,7 +1176,7 @@ pub const fn _mm_set_epi8(
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
|
||||
_mm_set_epi64x(a, a)
|
||||
i64x2::splat(a).as_m128i()
|
||||
}
|
||||
|
||||
/// Broadcasts 32-bit integer `a` to all elements.
|
||||
|
|
@ -1188,7 +1188,7 @@ pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm_set1_epi32(a: i32) -> __m128i {
|
||||
_mm_set_epi32(a, a, a, a)
|
||||
i32x4::splat(a).as_m128i()
|
||||
}
|
||||
|
||||
/// Broadcasts 16-bit integer `a` to all elements.
|
||||
|
|
@ -1200,7 +1200,7 @@ pub const fn _mm_set1_epi32(a: i32) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm_set1_epi16(a: i16) -> __m128i {
|
||||
_mm_set_epi16(a, a, a, a, a, a, a, a)
|
||||
i16x8::splat(a).as_m128i()
|
||||
}
|
||||
|
||||
/// Broadcasts 8-bit integer `a` to all elements.
|
||||
|
|
@ -1212,7 +1212,7 @@ pub const fn _mm_set1_epi16(a: i16) -> __m128i {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
|
||||
pub const fn _mm_set1_epi8(a: i8) -> __m128i {
|
||||
_mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
|
||||
i8x16::splat(a).as_m128i()
|
||||
}
|
||||
|
||||
/// Sets packed 32-bit integers with the supplied values in reverse order.
|
||||
|
|
@ -3280,11 +3280,7 @@ mod tests {
|
|||
core_arch::{simd::*, x86::*},
|
||||
hint::black_box,
|
||||
};
|
||||
use std::{
|
||||
boxed, f32, f64,
|
||||
mem::{self, transmute},
|
||||
ptr,
|
||||
};
|
||||
use std::{boxed, f32, f64, mem, ptr};
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
const NAN: f64 = f64::NAN;
|
||||
|
|
@ -4593,38 +4589,38 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_and_pd() {
|
||||
let a = transmute(u64x2::splat(5));
|
||||
let b = transmute(u64x2::splat(3));
|
||||
const fn test_mm_and_pd() {
|
||||
let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
|
||||
let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
|
||||
let r = _mm_and_pd(a, b);
|
||||
let e = transmute(u64x2::splat(1));
|
||||
let e = f64x2::from_bits(u64x2::splat(1)).as_m128d();
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_andnot_pd() {
|
||||
let a = transmute(u64x2::splat(5));
|
||||
let b = transmute(u64x2::splat(3));
|
||||
const fn test_mm_andnot_pd() {
|
||||
let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
|
||||
let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
|
||||
let r = _mm_andnot_pd(a, b);
|
||||
let e = transmute(u64x2::splat(2));
|
||||
let e = f64x2::from_bits(u64x2::splat(2)).as_m128d();
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_or_pd() {
|
||||
let a = transmute(u64x2::splat(5));
|
||||
let b = transmute(u64x2::splat(3));
|
||||
const fn test_mm_or_pd() {
|
||||
let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
|
||||
let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
|
||||
let r = _mm_or_pd(a, b);
|
||||
let e = transmute(u64x2::splat(7));
|
||||
let e = f64x2::from_bits(u64x2::splat(7)).as_m128d();
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
const unsafe fn test_mm_xor_pd() {
|
||||
let a = transmute(u64x2::splat(5));
|
||||
let b = transmute(u64x2::splat(3));
|
||||
const fn test_mm_xor_pd() {
|
||||
let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
|
||||
let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
|
||||
let r = _mm_xor_pd(a, b);
|
||||
let e = transmute(u64x2::splat(6));
|
||||
let e = f64x2::from_bits(u64x2::splat(6)).as_m128d();
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -78,38 +78,45 @@ pub(crate) const fn assert_eq_m512h(a: __m512h, b: __m512h) {
|
|||
}
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn get_m128d(a: __m128d, idx: usize) -> f64 {
|
||||
a.as_f64x2().extract(idx)
|
||||
a.as_f64x2().extract_dyn(idx)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn get_m128(a: __m128, idx: usize) -> f32 {
|
||||
a.as_f32x4().extract(idx)
|
||||
a.as_f32x4().extract_dyn(idx)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn get_m256d(a: __m256d, idx: usize) -> f64 {
|
||||
a.as_f64x4().extract(idx)
|
||||
a.as_f64x4().extract_dyn(idx)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn get_m256(a: __m256, idx: usize) -> f32 {
|
||||
a.as_f32x8().extract(idx)
|
||||
a.as_f32x8().extract_dyn(idx)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn get_m512(a: __m512, idx: usize) -> f32 {
|
||||
a.as_f32x16().extract(idx)
|
||||
a.as_f32x16().extract_dyn(idx)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn get_m512d(a: __m512d, idx: usize) -> f64 {
|
||||
a.as_f64x8().extract(idx)
|
||||
a.as_f64x8().extract_dyn(idx)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const fn get_m512i(a: __m512i, idx: usize) -> i64 {
|
||||
a.as_i64x8().extract(idx)
|
||||
a.as_i64x8().extract_dyn(idx)
|
||||
}
|
||||
|
||||
// not actually an intrinsic but useful in various tests as we ported from
|
||||
|
|
|
|||
|
|
@ -59,6 +59,3 @@ vluti4q_laneq_u8
|
|||
|
||||
# Broken in Clang
|
||||
vcvth_s16_f16
|
||||
# FIXME: Broken output due to missing f16 printing support in Rust, see git blame for this line
|
||||
vmulh_lane_f16
|
||||
vmulh_laneq_f16
|
||||
|
|
|
|||
|
|
@ -100,6 +100,3 @@ vluti4q_laneq_u8
|
|||
|
||||
# Broken in Clang
|
||||
vcvth_s16_f16
|
||||
# FIXME: Broken output due to missing f16 printing support in Rust
|
||||
vmulh_lane_f16
|
||||
vmulh_laneq_f16
|
||||
|
|
|
|||
|
|
@ -17,6 +17,15 @@ pub fn simd_test(
|
|||
item: proc_macro::TokenStream,
|
||||
) -> proc_macro::TokenStream {
|
||||
let tokens = TokenStream::from(attr).into_iter().collect::<Vec<_>>();
|
||||
|
||||
let target = env::var("TARGET").expect(
|
||||
"TARGET environment variable should be set for rustc (e.g. TARGET=x86_64-apple-darwin cargo test)"
|
||||
);
|
||||
let target_arch = target
|
||||
.split('-')
|
||||
.next()
|
||||
.unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"));
|
||||
|
||||
let (target_features, target_feature_attr) = match &tokens[..] {
|
||||
[] => (Vec::new(), TokenStream::new()),
|
||||
[
|
||||
|
|
@ -24,13 +33,20 @@ pub fn simd_test(
|
|||
TokenTree::Punct(equals),
|
||||
TokenTree::Literal(literal),
|
||||
] if enable == "enable" && equals.as_char() == '=' => {
|
||||
let enable_feature = literal.to_string();
|
||||
let enable_feature = enable_feature.trim_start_matches('"').trim_end_matches('"');
|
||||
let mut enable_feature = literal
|
||||
.to_string()
|
||||
.trim_start_matches('"')
|
||||
.trim_end_matches('"')
|
||||
.to_string();
|
||||
let target_features: Vec<_> = enable_feature
|
||||
.replace('+', "")
|
||||
.split(',')
|
||||
.map(String::from)
|
||||
.collect();
|
||||
// Allows using `#[simd_test(enable = "neon")]` on aarch64/armv7 shared tests.
|
||||
if target_arch == "armv7" && target_features.iter().any(|feat| feat == "neon") {
|
||||
enable_feature.push_str(",v7");
|
||||
}
|
||||
|
||||
(
|
||||
target_features,
|
||||
|
|
@ -46,14 +62,7 @@ pub fn simd_test(
|
|||
let item_attrs = std::mem::take(&mut item.attrs);
|
||||
let name = &item.sig.ident;
|
||||
|
||||
let target = env::var("TARGET").expect(
|
||||
"TARGET environment variable should be set for rustc (e.g. TARGET=x86_64-apple-darwin cargo test)"
|
||||
);
|
||||
let macro_test = match target
|
||||
.split('-')
|
||||
.next()
|
||||
.unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"))
|
||||
{
|
||||
let macro_test = match target_arch {
|
||||
"i686" | "x86_64" | "i586" => "is_x86_feature_detected",
|
||||
"arm" | "armv7" | "thumbv7neon" => "is_arm_feature_detected",
|
||||
"aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected",
|
||||
|
|
@ -85,10 +94,20 @@ pub fn simd_test(
|
|||
|
||||
let mut detect_missing_features = TokenStream::new();
|
||||
for feature in target_features {
|
||||
let q = quote_spanned! {
|
||||
proc_macro2::Span::call_site() =>
|
||||
if !::std::arch::#macro_test!(#feature) {
|
||||
missing_features.push(#feature);
|
||||
let q = if target_arch == "armv7" && feature == "fp16" {
|
||||
// "fp16" cannot be checked at runtime
|
||||
quote_spanned! {
|
||||
proc_macro2::Span::call_site() =>
|
||||
if !cfg!(target_feature = #feature) {
|
||||
missing_features.push(#feature);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
quote_spanned! {
|
||||
proc_macro2::Span::call_site() =>
|
||||
if !::std::arch::#macro_test!(#feature) {
|
||||
missing_features.push(#feature);
|
||||
}
|
||||
}
|
||||
};
|
||||
q.to_tokens(&mut detect_missing_features);
|
||||
|
|
|
|||
|
|
@ -63,8 +63,8 @@ neon-unstable-f16: &neon-unstable-f16
|
|||
neon-unstable-feat-lut: &neon-unstable-feat-lut
|
||||
FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']]
|
||||
|
||||
aarch64-unstable-jscvt: &aarch64-unstable-jscvt
|
||||
FnCall: [unstable, ['feature = "stdarch_aarch64_jscvt"', 'issue = "147555"']]
|
||||
aarch64-stable-jscvt: &aarch64-stable-jscvt
|
||||
FnCall: [stable, ['feature = "stdarch_aarch64_jscvt"', 'since = "CURRENT_RUSTC_VERSION"']]
|
||||
|
||||
# #[cfg(target_endian = "little")]
|
||||
little-endian: &little-endian
|
||||
|
|
@ -14275,7 +14275,7 @@ intrinsics:
|
|||
attr:
|
||||
- FnCall: [target_feature, ['enable = "jsconv"']]
|
||||
- FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["fjcvtzs"]] }]]
|
||||
- *aarch64-unstable-jscvt
|
||||
- *aarch64-stable-jscvt
|
||||
safety: safe
|
||||
types:
|
||||
- f64
|
||||
|
|
|
|||
|
|
@ -1736,7 +1736,7 @@ fn create_tokens(intrinsic: &Intrinsic, endianness: Endianness, tokens: &mut Tok
|
|||
);
|
||||
}
|
||||
|
||||
tokens.append_all(quote! { #[inline] });
|
||||
tokens.append_all(quote! { #[inline(always)] });
|
||||
|
||||
match endianness {
|
||||
Endianness::Little => tokens.append_all(quote! { #[cfg(target_endian = "little")] }),
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
48622726c4a91c87bf6cd4dbe1000c95df59906e
|
||||
873d4682c7d285540b8f28bfe637006cef8918a6
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue