Completes SSE and adds some MMX intrinsics (#247)
* Completes SSE and adds some MMX intrinsics
MMX:
- `_mm_cmpgt_pi{8,16,32}`
- `_mm_unpack{hi,lo}_pi{8,16,32}`
SSE (is now complete):
- `_mm_cvtp{i,u}{8,16}_ps`
- add test for `_m_pmulhuw`
* fmt and clippy
* add an exception for intrinsics using cvtpi2ps
This commit is contained in:
parent
4f1f2bd550
commit
4bb1ea5a05
14 changed files with 408 additions and 268 deletions
|
|
@ -1,3 +1,7 @@
|
|||
//! Byte swap intrinsics.
|
||||
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
|
|
|
|||
|
|
@ -3299,7 +3299,8 @@ mod tests {
|
|||
use v64::*;
|
||||
|
||||
let a = mem::transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
|
||||
let mut mem = ::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
|
||||
let mut mem =
|
||||
::std::boxed::Box::<__m64>::new(mem::transmute(i8x8::splat(1)));
|
||||
sse::_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
|
||||
assert_eq!(a, *mem);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ use stdsimd_test::assert_instr;
|
|||
|
||||
/// Constructs a 64-bit integer vector initialized to zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx,+sse"]
|
||||
#[target_feature = "+mmx"]
|
||||
// FIXME: this produces a movl instead of xorps on x86
|
||||
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
|
||||
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
|
||||
|
|
@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
|
|||
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
|
||||
/// less than 0x80 are saturated to 0x80.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx,+sse"]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(packsswb))]
|
||||
pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
packsswb(a, b)
|
||||
|
|
@ -42,17 +42,14 @@ pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
|
||||
/// less than 0x80 are saturated to 0x80.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx,+sse"]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(packssdw))]
|
||||
pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
packssdw(a, b)
|
||||
}
|
||||
|
||||
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
|
||||
/// [8 x i8] to determine if the element of the first vector is greater than
|
||||
/// the corresponding element of the second vector.
|
||||
///
|
||||
/// The comparison yields 0 for false, 0xFF for true.
|
||||
/// Compares whether each element of `a` is greater than the corresponding
|
||||
/// element of `b` returning `0` for `false` and `-1` for `true`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtb))]
|
||||
|
|
@ -60,11 +57,8 @@ pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
|
|||
pcmpgtb(a, b)
|
||||
}
|
||||
|
||||
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
|
||||
/// [4 x i16] to determine if the element of the first vector is greater than
|
||||
/// the corresponding element of the second vector.
|
||||
///
|
||||
/// The comparison yields 0 for false, 0xFFFF for true.
|
||||
/// Compares whether each element of `a` is greater than the corresponding
|
||||
/// element of `b` returning `0` for `false` and `-1` for `true`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtw))]
|
||||
|
|
@ -72,8 +66,17 @@ pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
pcmpgtw(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
|
||||
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
|
||||
/// Compares whether each element of `a` is greater than the corresponding
|
||||
/// element of `b` returning `0` for `false` and `-1` for `true`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtd))]
|
||||
pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
pcmpgtd(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
|
||||
/// them into the result: `[a.2, b.2, a.3, b.3]`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
|
||||
|
|
@ -81,8 +84,17 @@ pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
punpckhwd(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
|
||||
/// and interleaves them into a 64-bit integer vector of [8 x i8].
|
||||
/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
|
||||
/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(punpckhbw))]
|
||||
pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
punpckhbw(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
|
||||
/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(punpcklbw))]
|
||||
|
|
@ -90,8 +102,8 @@ pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
|
|||
punpcklbw(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
|
||||
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
|
||||
/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
|
||||
/// them into the result: `[a.0 b.0 a.1 b.1]`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(punpcklwd))]
|
||||
|
|
@ -99,6 +111,24 @@ pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
punpcklwd(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the upper element from two `i32x2` vectors and interleaves them
|
||||
/// into the result: `[a.1, b.1]`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(punpckhdq))]
|
||||
pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
punpckhdq(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the lower element from two `i32x2` vectors and interleaves them
|
||||
/// into the result: `[a.0, b.0]`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(punpckldq))]
|
||||
pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
punpckldq(a, b)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.mmx.packsswb"]
|
||||
|
|
@ -109,12 +139,20 @@ extern "C" {
|
|||
fn pcmpgtb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pcmpgt.w"]
|
||||
fn pcmpgtw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pcmpgt.d"]
|
||||
fn pcmpgtd(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpckhwd"]
|
||||
fn punpckhwd(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpcklbw"]
|
||||
fn punpcklbw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpcklwd"]
|
||||
fn punpcklwd(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpckhbw"]
|
||||
fn punpckhbw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpcklbw"]
|
||||
fn punpcklbw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpckhdq"]
|
||||
fn punpckhdq(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpckldq"]
|
||||
fn punpckldq(a: __m64, b: __m64) -> __m64;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -123,13 +161,13 @@ mod tests {
|
|||
use x86::i686::mmx;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[simd_test = "sse"] // FIXME: should be mmx
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_setzero_si64() {
|
||||
let r: __m64 = ::std::mem::transmute(0_i64);
|
||||
assert_eq!(r, mmx::_mm_setzero_si64());
|
||||
}
|
||||
|
||||
#[simd_test = "sse"] // FIXME: should be mmx
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_packs_pi16() {
|
||||
let a = i16x4::new(-1, 2, -3, 4);
|
||||
let b = i16x4::new(-5, 6, -7, 8);
|
||||
|
|
@ -137,7 +175,7 @@ mod tests {
|
|||
assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"] // FIXME: should be mmx
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_packs_pi32() {
|
||||
let a = i32x2::new(-1, 2);
|
||||
let b = i32x2::new(-5, 6);
|
||||
|
|
@ -162,11 +200,23 @@ mod tests {
|
|||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_unpackhi_pi16() {
|
||||
let a = i16x4::new(0, 1, 2, 3);
|
||||
let b = i16x4::new(4, 5, 6, 7);
|
||||
let r = i16x4::new(2, 6, 3, 7);
|
||||
assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
|
||||
unsafe fn _mm_cmpgt_pi32() {
|
||||
let a = i32x2::new(0, 3);
|
||||
let b = i32x2::new(1, 2);
|
||||
let r0 = i32x2::new(0, -1);
|
||||
let r1 = i32x2::new(-1, 0);
|
||||
|
||||
assert_eq!(r0, mmx::_mm_cmpgt_pi32(a.into(), b.into()).into());
|
||||
assert_eq!(r1, mmx::_mm_cmpgt_pi32(b.into(), a.into()).into());
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_unpackhi_pi8() {
|
||||
let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
|
||||
let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
|
||||
let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14);
|
||||
|
||||
assert_eq!(r, mmx::_mm_unpackhi_pi8(a.into(), b.into()).into());
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
|
|
@ -177,6 +227,14 @@ mod tests {
|
|||
assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_unpackhi_pi16() {
|
||||
let a = i16x4::new(0, 1, 2, 3);
|
||||
let b = i16x4::new(4, 5, 6, 7);
|
||||
let r = i16x4::new(2, 6, 3, 7);
|
||||
assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_unpacklo_pi16() {
|
||||
let a = i16x4::new(0, 1, 2, 3);
|
||||
|
|
@ -184,4 +242,22 @@ mod tests {
|
|||
let r = i16x4::new(0, 4, 1, 5);
|
||||
assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_unpackhi_pi32() {
|
||||
let a = i32x2::new(0, 3);
|
||||
let b = i32x2::new(1, 2);
|
||||
let r = i32x2::new(3, 2);
|
||||
|
||||
assert_eq!(r, mmx::_mm_unpackhi_pi32(a.into(), b.into()).into());
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
unsafe fn _mm_unpacklo_pi32() {
|
||||
let a = i32x2::new(0, 3);
|
||||
let b = i32x2::new(1, 2);
|
||||
let r = i32x2::new(0, 1);
|
||||
|
||||
assert_eq!(r, mmx::_mm_unpacklo_pi32(a.into(), b.into()).into());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -221,27 +221,34 @@ pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
|
|||
_mm_cvtpi32_ps(a, b)
|
||||
}
|
||||
|
||||
/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x
|
||||
/// float].
|
||||
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 {
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
|
||||
let b = mmx::_mm_setzero_si64();
|
||||
let b = mmx::_mm_cmpgt_pi16(mem::transmute(b), a);
|
||||
let c = mmx::_mm_unpackhi_pi16(a, b);
|
||||
let r = i586::_mm_setzero_ps();
|
||||
let r = cvtpi2ps(r, mem::transmute(c));
|
||||
let r = i586::_mm_movelh_ps(r, r);
|
||||
let c = mmx::_mm_unpacklo_pi16(a, b);
|
||||
cvtpi2ps(r, mem::transmute(c))
|
||||
let b = mmx::_mm_cmpgt_pi8(b, a);
|
||||
let b = mmx::_mm_unpacklo_pi8(a, b);
|
||||
_mm_cvtpi16_ps(b)
|
||||
}
|
||||
|
||||
/// Converts a 64-bit vector of 16-bit unsigned integer values into a
|
||||
/// 128-bit vector of [4 x float].
|
||||
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
|
||||
let b = mmx::_mm_setzero_si64();
|
||||
let b = mmx::_mm_unpacklo_pi8(a, b);
|
||||
_mm_cvtpi16_ps(b)
|
||||
}
|
||||
|
||||
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 {
|
||||
let b = mmx::_mm_setzero_si64();
|
||||
let b = mmx::_mm_cmpgt_pi16(b, a);
|
||||
let c = mmx::_mm_unpackhi_pi16(a, b);
|
||||
let r = i586::_mm_setzero_ps();
|
||||
let r = cvtpi2ps(r, c);
|
||||
|
|
@ -250,25 +257,18 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
|
|||
cvtpi2ps(r, c)
|
||||
}
|
||||
|
||||
/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
|
||||
/// into a 128-bit vector of [4 x float].
|
||||
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
|
||||
let b = mmx::_mm_setzero_si64();
|
||||
let b = mmx::_mm_cmpgt_pi8(b, a);
|
||||
let b = mmx::_mm_unpacklo_pi8(a, b);
|
||||
_mm_cvtpi16_ps(b)
|
||||
}
|
||||
|
||||
/// Converts the lower four unsigned 8-bit integer values from a 64-bit
|
||||
/// vector of [8 x u8] into a 128-bit vector of [4 x float].
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
|
||||
let b = mmx::_mm_setzero_si64();
|
||||
let b = mmx::_mm_unpacklo_pi8(a, b);
|
||||
_mm_cvtpi16_ps(b)
|
||||
let c = mmx::_mm_unpackhi_pi16(a, b);
|
||||
let r = i586::_mm_setzero_ps();
|
||||
let r = cvtpi2ps(r, c);
|
||||
let r = i586::_mm_movelh_ps(r, r);
|
||||
let c = mmx::_mm_unpacklo_pi16(a, b);
|
||||
cvtpi2ps(r, c)
|
||||
}
|
||||
|
||||
/// Converts the two 32-bit signed integer values from each 64-bit vector
|
||||
|
|
@ -512,6 +512,13 @@ mod tests {
|
|||
assert_eq!(r, u16x4::splat(15));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn _m_pmulhuw() {
|
||||
let (a, b) = (u16x4::splat(1000), u16x4::splat(1001));
|
||||
let r = sse::_m_pmulhuw(a.into(), b.into());
|
||||
assert_eq!(r, u16x4::splat(15).into());
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn _mm_avg_pu8() {
|
||||
let (a, b) = (u8x8::splat(3), u8x8::splat(9));
|
||||
|
|
@ -601,7 +608,11 @@ mod tests {
|
|||
let a = i8x8::splat(9);
|
||||
let mask = i8x8::splat(0).replace(2, 0x80u8 as i8);
|
||||
let mut r = i8x8::splat(0);
|
||||
sse::_mm_maskmove_si64(a.into(), mask.into(), &mut r as *mut _ as *mut i8);
|
||||
sse::_mm_maskmove_si64(
|
||||
a.into(),
|
||||
mask.into(),
|
||||
&mut r as *mut _ as *mut i8,
|
||||
);
|
||||
assert_eq!(r, i8x8::splat(0).replace(2, 9));
|
||||
|
||||
let mut r = i8x8::splat(0);
|
||||
|
|
|
|||
|
|
@ -76,7 +76,8 @@ pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 {
|
|||
/// integer.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong instr?
|
||||
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
|
||||
// instr?
|
||||
pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
|
||||
mem::transmute(a.extract(0))
|
||||
}
|
||||
|
|
@ -85,7 +86,8 @@ pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
|
|||
/// upper bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong instr?
|
||||
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
|
||||
// instr?
|
||||
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 {
|
||||
i64x2::new(mem::transmute(a), 0)
|
||||
}
|
||||
|
|
@ -175,7 +177,8 @@ mod tests {
|
|||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_set_epi64() {
|
||||
let r = sse2::_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
let r =
|
||||
sse2::_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
assert_eq!(r, i64x2::new(2, 1));
|
||||
}
|
||||
|
||||
|
|
@ -187,7 +190,8 @@ mod tests {
|
|||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_setr_epi64() {
|
||||
let r = sse2::_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
let r =
|
||||
sse2::_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
assert_eq!(r, i64x2::new(1, 2));
|
||||
}
|
||||
|
||||
|
|
@ -199,7 +203,16 @@ mod tests {
|
|||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_movpi64_epi64() {
|
||||
let r = sse2::_mm_movpi64_epi64(mem::transmute(i8x8::new(5, 0, 0, 0, 0, 0, 0, 0)));
|
||||
let r = sse2::_mm_movpi64_epi64(mem::transmute(i8x8::new(
|
||||
5,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
)));
|
||||
assert_eq!(r, i64x2::new(5, 0));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
//! `i686`'s Streaming SIMD Extensions 4a (SSE4a)
|
||||
//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`)
|
||||
|
||||
use core::mem;
|
||||
use v128::*;
|
||||
|
|
@ -52,7 +52,7 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
|
|||
#[target_feature = "+sse4a"]
|
||||
#[cfg_attr(test, assert_instr(insertq))]
|
||||
pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 {
|
||||
insertq(x, mem::transmute(y))
|
||||
insertq(x, y)
|
||||
}
|
||||
|
||||
/// Non-temporal store of `a.0` into `p`.
|
||||
|
|
|
|||
|
|
@ -256,7 +256,11 @@ mod tests {
|
|||
unsafe fn _mm_alignr_pi8() {
|
||||
let a = u32x2::new(0x89ABCDEF_u32, 0x01234567_u32);
|
||||
let b = u32x2::new(0xBBAA9988_u32, 0xFFDDEECC_u32);
|
||||
let r = ssse3::_mm_alignr_pi8(u8x8::from(a).into(), u8x8::from(b).into(), 4);
|
||||
let r = ssse3::_mm_alignr_pi8(
|
||||
u8x8::from(a).into(),
|
||||
u8x8::from(b).into(),
|
||||
4,
|
||||
);
|
||||
assert_eq!(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
macro_rules! constify_imm8 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match $imm8 & 0b1111_1111 {
|
||||
match ($imm8) & 0b1111_1111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
|
|
@ -267,7 +267,7 @@ macro_rules! constify_imm8 {
|
|||
macro_rules! constify_imm6 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match $imm8 & 0b1_1111 {
|
||||
match ($imm8) & 0b1_1111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
|
|
@ -307,7 +307,7 @@ macro_rules! constify_imm6 {
|
|||
macro_rules! constify_imm4 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match $imm8 & 0b1111 {
|
||||
match ($imm8) & 0b1111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
|
|
@ -331,7 +331,7 @@ macro_rules! constify_imm4 {
|
|||
macro_rules! constify_imm3 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match $imm8 & 0b111 {
|
||||
match ($imm8) & 0b111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
|
|
@ -347,7 +347,7 @@ macro_rules! constify_imm3 {
|
|||
macro_rules! constify_imm2 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match $imm8 & 0b11 {
|
||||
match ($imm8) & 0b11 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
|
||||
|
||||
use v128::*;
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -42,8 +42,7 @@ pub fn assert_instr(
|
|||
let assert_name = syn::Ident::from(
|
||||
&format!("assert_{}_{}", name.as_ref(), instr.as_ref())[..],
|
||||
);
|
||||
let shim_name =
|
||||
syn::Ident::from(format!("{}_shim", name.as_ref()));
|
||||
let shim_name = syn::Ident::from(format!("{}_shim", name.as_ref()));
|
||||
let (to_test, test_name) = if invoc.args.len() == 0 {
|
||||
(TokenStream::empty(), &func.ident)
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -304,19 +304,20 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
|
|||
None => continue,
|
||||
};
|
||||
if !part.contains("call") {
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
|
||||
// On 32-bit x86 position independent code will call itself and be
|
||||
// immediately followed by a `pop` to learn about the current address.
|
||||
// Let's not take that into account when considering whether a function
|
||||
// failed inlining something.
|
||||
let followed_by_pop = function.instrs.get(i + 1)
|
||||
let followed_by_pop = function
|
||||
.instrs
|
||||
.get(i + 1)
|
||||
.and_then(|i| i.parts.get(0))
|
||||
.map(|s| s.contains("pop"))
|
||||
.unwrap_or(false);
|
||||
.map_or(false, |s| s.contains("pop"));
|
||||
if followed_by_pop && cfg!(target_arch = "x86") {
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
|
||||
inlining_failed = true;
|
||||
|
|
@ -324,16 +325,20 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
|
|||
}
|
||||
|
||||
let instruction_limit = match expected {
|
||||
// cpuid returns a pretty big aggregate structure so excempt it from the
|
||||
// slightly more restrictive 20 instructions below
|
||||
// cpuid returns a pretty big aggregate structure so excempt it from
|
||||
// the slightly more restrictive 20 instructions below
|
||||
"cpuid" => 30,
|
||||
|
||||
// Apparently on Windows LLVM generates a bunch of saves/restores of xmm
|
||||
// registers around these intstructions which blows the 20 limit
|
||||
// below. As it seems dictates by Windows's abi (I guess?) we probably
|
||||
// can't do much about it...
|
||||
// Apparently on Windows LLVM generates a bunch of saves/restores of
|
||||
// xmm registers around these intstructions which blows the 20
|
||||
// limit below. As it seems dictates by Windows's abi (I
|
||||
// guess?) we probably can't do much about it...
|
||||
"vzeroall" | "vzeroupper" if cfg!(windows) => 30,
|
||||
|
||||
// Intrinsics using `cvtpi2ps` are typically "composites" and in some
|
||||
// cases exceed the limit.
|
||||
"cvtpi2ps" => 25,
|
||||
|
||||
_ => 20,
|
||||
};
|
||||
let probably_only_one_instruction =
|
||||
|
|
@ -363,12 +368,17 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
|
|||
expected
|
||||
);
|
||||
} else if !probably_only_one_instruction {
|
||||
panic!("instruction found, but the disassembly contains too many \
|
||||
instructions: #instructions = {} >= {} (limit)",
|
||||
function.instrs.len(), instruction_limit);
|
||||
panic!(
|
||||
"instruction found, but the disassembly contains too many \
|
||||
instructions: #instructions = {} >= {} (limit)",
|
||||
function.instrs.len(),
|
||||
instruction_limit
|
||||
);
|
||||
} else if inlining_failed {
|
||||
panic!("instruction found, but the disassembly contains `call` \
|
||||
instructions, which hint that inlining failed");
|
||||
panic!(
|
||||
"instruction found, but the disassembly contains `call` \
|
||||
instructions, which hint that inlining failed"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,11 +12,11 @@ fn walk(root: &Path) {
|
|||
let file = file.unwrap();
|
||||
if file.file_type().unwrap().is_dir() {
|
||||
walk(&file.path());
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
let path = file.path();
|
||||
if path.extension().and_then(|s| s.to_str()) != Some("rs") {
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
|
||||
println!("cargo:rerun-if-changed={}", path.display());
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
#![feature(proc_macro)]
|
||||
|
||||
extern crate proc_macro;
|
||||
extern crate proc_macro2;
|
||||
extern crate syn;
|
||||
extern crate proc_macro;
|
||||
#[macro_use]
|
||||
extern crate quote;
|
||||
extern crate syn;
|
||||
|
||||
use std::path::Path;
|
||||
use std::fs::File;
|
||||
|
|
@ -42,21 +42,21 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
|
|||
_ => return false,
|
||||
}
|
||||
if f.unsafety.is_none() {
|
||||
return false
|
||||
return false;
|
||||
}
|
||||
f.attrs.iter()
|
||||
f.attrs
|
||||
.iter()
|
||||
.filter_map(|a| a.meta_item())
|
||||
.any(|a| {
|
||||
match a {
|
||||
syn::MetaItem::NameValue(i) => i.ident == "target_feature",
|
||||
_ => false,
|
||||
}
|
||||
.any(|a| match a {
|
||||
syn::MetaItem::NameValue(i) => i.ident == "target_feature",
|
||||
_ => false,
|
||||
})
|
||||
});
|
||||
|
||||
let input = proc_macro2::TokenStream::from(input);
|
||||
|
||||
let functions = functions.iter()
|
||||
let functions = functions
|
||||
.iter()
|
||||
.map(|f| {
|
||||
let name = f.ident;
|
||||
// println!("{}", name);
|
||||
|
|
@ -96,53 +96,51 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
|
|||
|
||||
fn to_type(t: &syn::Type) -> Tokens {
|
||||
match *t {
|
||||
syn::Type::Path(ref p) => {
|
||||
match extract_path_ident(&p.path).as_ref() {
|
||||
"__m128i" => my_quote! { &I8x16 },
|
||||
"__m256i" => my_quote! { &I8x32 },
|
||||
"__m64" => my_quote! { &I8x8 },
|
||||
"bool" => my_quote! { &BOOL },
|
||||
"f32" => my_quote! { &F32 },
|
||||
"f32x4" => my_quote! { &F32x4 },
|
||||
"f32x8" => my_quote! { &F32x8 },
|
||||
"f64" => my_quote! { &F64 },
|
||||
"f64x2" => my_quote! { &F64x2 },
|
||||
"f64x4" => my_quote! { &F64x4 },
|
||||
"i16" => my_quote! { &I16 },
|
||||
"i16x16" => my_quote! { &I16x16 },
|
||||
"i16x4" => my_quote! { &I16x4 },
|
||||
"i16x8" => my_quote! { &I16x8 },
|
||||
"i32" => my_quote! { &I32 },
|
||||
"i32x2" => my_quote! { &I32x2 },
|
||||
"i32x4" => my_quote! { &I32x4 },
|
||||
"i32x8" => my_quote! { &I32x8 },
|
||||
"i64" => my_quote! { &I64 },
|
||||
"i64x2" => my_quote! { &I64x2 },
|
||||
"i64x4" => my_quote! { &I64x4 },
|
||||
"i8" => my_quote! { &I8 },
|
||||
"i8x16" => my_quote! { &I8x16 },
|
||||
"i8x32" => my_quote! { &I8x32 },
|
||||
"i8x8" => my_quote! { &I8x8 },
|
||||
"u16x4" => my_quote! { &U16x4 },
|
||||
"u16x8" => my_quote! { &U16x8 },
|
||||
"u32" => my_quote! { &U32 },
|
||||
"u32x2" => my_quote! { &U32x2 },
|
||||
"u32x4" => my_quote! { &U32x4 },
|
||||
"u32x8" => my_quote! { &U32x8 },
|
||||
"u64" => my_quote! { &U64 },
|
||||
"u64x2" => my_quote! { &U64x2 },
|
||||
"u64x4" => my_quote! { &U64x4 },
|
||||
"u8" => my_quote! { &U8 },
|
||||
"u16" => my_quote! { &U16 },
|
||||
"u8x16" => my_quote! { &U8x16 },
|
||||
"u8x32" => my_quote! { &U8x32 },
|
||||
"u16x16" => my_quote! { &U16x16 },
|
||||
"u8x8" => my_quote! { &U8x8 },
|
||||
s => panic!("unspported type: {}", s),
|
||||
}
|
||||
}
|
||||
syn::Type::Ptr(syn::TypePtr { ref elem, .. }) |
|
||||
syn::Type::Reference(syn::TypeReference { ref elem, .. }) => {
|
||||
syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() {
|
||||
"__m128i" => my_quote! { &I8x16 },
|
||||
"__m256i" => my_quote! { &I8x32 },
|
||||
"__m64" => my_quote! { &I8x8 },
|
||||
"bool" => my_quote! { &BOOL },
|
||||
"f32" => my_quote! { &F32 },
|
||||
"f32x4" => my_quote! { &F32x4 },
|
||||
"f32x8" => my_quote! { &F32x8 },
|
||||
"f64" => my_quote! { &F64 },
|
||||
"f64x2" => my_quote! { &F64x2 },
|
||||
"f64x4" => my_quote! { &F64x4 },
|
||||
"i16" => my_quote! { &I16 },
|
||||
"i16x16" => my_quote! { &I16x16 },
|
||||
"i16x4" => my_quote! { &I16x4 },
|
||||
"i16x8" => my_quote! { &I16x8 },
|
||||
"i32" => my_quote! { &I32 },
|
||||
"i32x2" => my_quote! { &I32x2 },
|
||||
"i32x4" => my_quote! { &I32x4 },
|
||||
"i32x8" => my_quote! { &I32x8 },
|
||||
"i64" => my_quote! { &I64 },
|
||||
"i64x2" => my_quote! { &I64x2 },
|
||||
"i64x4" => my_quote! { &I64x4 },
|
||||
"i8" => my_quote! { &I8 },
|
||||
"i8x16" => my_quote! { &I8x16 },
|
||||
"i8x32" => my_quote! { &I8x32 },
|
||||
"i8x8" => my_quote! { &I8x8 },
|
||||
"u16x4" => my_quote! { &U16x4 },
|
||||
"u16x8" => my_quote! { &U16x8 },
|
||||
"u32" => my_quote! { &U32 },
|
||||
"u32x2" => my_quote! { &U32x2 },
|
||||
"u32x4" => my_quote! { &U32x4 },
|
||||
"u32x8" => my_quote! { &U32x8 },
|
||||
"u64" => my_quote! { &U64 },
|
||||
"u64x2" => my_quote! { &U64x2 },
|
||||
"u64x4" => my_quote! { &U64x4 },
|
||||
"u8" => my_quote! { &U8 },
|
||||
"u16" => my_quote! { &U16 },
|
||||
"u8x16" => my_quote! { &U8x16 },
|
||||
"u8x32" => my_quote! { &U8x32 },
|
||||
"u16x16" => my_quote! { &U16x16 },
|
||||
"u8x8" => my_quote! { &U8x8 },
|
||||
s => panic!("unspported type: {}", s),
|
||||
},
|
||||
syn::Type::Ptr(syn::TypePtr { ref elem, .. })
|
||||
| syn::Type::Reference(syn::TypeReference { ref elem, .. }) => {
|
||||
let tokens = to_type(&elem);
|
||||
my_quote! { &Type::Ptr(#tokens) }
|
||||
}
|
||||
|
|
@ -162,7 +160,7 @@ fn extract_path_ident(path: &syn::Path) -> syn::Ident {
|
|||
}
|
||||
match path.segments.first().unwrap().item().arguments {
|
||||
syn::PathArguments::None => {}
|
||||
_ => panic!("unsupported path that has path arguments")
|
||||
_ => panic!("unsupported path that has path arguments"),
|
||||
}
|
||||
path.segments.first().unwrap().item().ident
|
||||
}
|
||||
|
|
@ -172,71 +170,72 @@ fn walk(root: &Path, files: &mut Vec<syn::File>) {
|
|||
let file = file.unwrap();
|
||||
if file.file_type().unwrap().is_dir() {
|
||||
walk(&file.path(), files);
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
let path = file.path();
|
||||
if path.extension().and_then(|s| s.to_str()) != Some("rs") {
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut contents = String::new();
|
||||
File::open(&path).unwrap().read_to_string(&mut contents).unwrap();
|
||||
File::open(&path)
|
||||
.unwrap()
|
||||
.read_to_string(&mut contents)
|
||||
.unwrap();
|
||||
|
||||
files.push(syn::parse_str::<syn::File>(&contents).expect("failed to parse"));
|
||||
files.push(
|
||||
syn::parse_str::<syn::File>(&contents).expect("failed to parse"),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn find_instrs(attrs: &[syn::Attribute]) -> Vec<syn::Ident> {
|
||||
attrs.iter()
|
||||
attrs
|
||||
.iter()
|
||||
.filter_map(|a| a.meta_item())
|
||||
.filter_map(|a| {
|
||||
match a {
|
||||
syn::MetaItem::List(i) => {
|
||||
if i.ident == "cfg_attr" {
|
||||
i.nested.into_iter().next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.filter_map(|a| match a {
|
||||
syn::MetaItem::List(i) => {
|
||||
if i.ident == "cfg_attr" {
|
||||
i.nested.into_iter().next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.filter_map(|nested| {
|
||||
match nested {
|
||||
syn::NestedMetaItem::MetaItem(syn::MetaItem::List(i)) => {
|
||||
if i.ident == "assert_instr" {
|
||||
i.nested.into_iter().next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.filter_map(|nested| match nested {
|
||||
syn::NestedMetaItem::MetaItem(syn::MetaItem::List(i)) => {
|
||||
if i.ident == "assert_instr" {
|
||||
i.nested.into_iter().next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.filter_map(|nested| {
|
||||
match nested {
|
||||
syn::NestedMetaItem::MetaItem(syn::MetaItem::Term(i)) => Some(i),
|
||||
_ => None,
|
||||
}
|
||||
.filter_map(|nested| match nested {
|
||||
syn::NestedMetaItem::MetaItem(syn::MetaItem::Term(i)) => Some(i),
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn find_target_feature(name: syn::Ident, attrs: &[syn::Attribute]) -> syn::Lit {
|
||||
attrs.iter()
|
||||
fn find_target_feature(
|
||||
name: syn::Ident, attrs: &[syn::Attribute]
|
||||
) -> syn::Lit {
|
||||
attrs
|
||||
.iter()
|
||||
.filter_map(|a| a.meta_item())
|
||||
.filter_map(|a| {
|
||||
match a {
|
||||
syn::MetaItem::NameValue(i) => {
|
||||
if i.ident == "target_feature" {
|
||||
Some(i.lit)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.filter_map(|a| match a {
|
||||
syn::MetaItem::NameValue(i) => {
|
||||
if i.ident == "target_feature" {
|
||||
Some(i.lit)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.next()
|
||||
.expect(&format!("failed to find target_feature for {}",name))
|
||||
.expect(&format!("failed to find target_feature for {}", name))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
#![feature(proc_macro)]
|
||||
#![allow(bad_style)]
|
||||
#![cfg_attr(feature = "cargo-clippy",
|
||||
allow(shadow_reuse, cast_lossless, match_same_arms))]
|
||||
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
|
|
@ -42,7 +44,7 @@ static I8x32: Type = Type::Signed(8, 32);
|
|||
static I8x8: Type = Type::Signed(8, 8);
|
||||
static U16: Type = Type::PrimUnsigned(16);
|
||||
static U16x16: Type = Type::Unsigned(16, 16);
|
||||
static U16x4: Type = Type::Unsigned(16, 4);
|
||||
// static U16x4: Type = Type::Unsigned(16, 4);
|
||||
static U16x8: Type = Type::Unsigned(16, 8);
|
||||
static U32: Type = Type::PrimUnsigned(32);
|
||||
static U32x2: Type = Type::Unsigned(32, 2);
|
||||
|
|
@ -54,7 +56,7 @@ static U64x4: Type = Type::Unsigned(64, 4);
|
|||
static U8: Type = Type::PrimUnsigned(8);
|
||||
static U8x16: Type = Type::Unsigned(8, 16);
|
||||
static U8x32: Type = Type::Unsigned(8, 32);
|
||||
static U8x8: Type = Type::Unsigned(8, 8);
|
||||
// static U8x8: Type = Type::Unsigned(8, 8);
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Type {
|
||||
|
|
@ -72,8 +74,7 @@ x86_functions!(static FUNCTIONS);
|
|||
|
||||
#[derive(Deserialize)]
|
||||
struct Data {
|
||||
#[serde(rename = "intrinsic", default)]
|
||||
intrinsics: Vec<Intrinsic>,
|
||||
#[serde(rename = "intrinsic", default)] intrinsics: Vec<Intrinsic>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
|
@ -81,18 +82,14 @@ struct Intrinsic {
|
|||
rettype: String,
|
||||
name: String,
|
||||
tech: String,
|
||||
#[serde(rename = "CPUID", default)]
|
||||
cpuid: Vec<String>,
|
||||
#[serde(rename = "parameter", default)]
|
||||
parameters: Vec<Parameter>,
|
||||
#[serde(default)]
|
||||
instruction: Vec<Instruction>,
|
||||
#[serde(rename = "CPUID", default)] cpuid: Vec<String>,
|
||||
#[serde(rename = "parameter", default)] parameters: Vec<Parameter>,
|
||||
#[serde(default)] instruction: Vec<Instruction>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Parameter {
|
||||
#[serde(rename = "type")]
|
||||
type_: String,
|
||||
#[serde(rename = "type")] type_: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
|
@ -113,18 +110,20 @@ fn verify_all_signatures() {
|
|||
let xml = include_bytes!("../x86-intel.xml");
|
||||
|
||||
let xml = &xml[..];
|
||||
let data: Data = serde_xml_rs::deserialize(xml).expect("failed to deserialize xml");
|
||||
let data: Data =
|
||||
serde_xml_rs::deserialize(xml).expect("failed to deserialize xml");
|
||||
let mut map = HashMap::new();
|
||||
for intrinsic in data.intrinsics.iter() {
|
||||
// This intrinsic has multiple definitions in the XML, so just ignore it.
|
||||
for intrinsic in &data.intrinsics {
|
||||
// This intrinsic has multiple definitions in the XML, so just ignore
|
||||
// it.
|
||||
if intrinsic.name == "_mm_prefetch" {
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
|
||||
// These'll need to get added eventually, but right now they have some
|
||||
// duplicate names in the XML which we're not dealing with yet
|
||||
if intrinsic.tech == "AVX-512" {
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(map.insert(&intrinsic.name[..], intrinsic).is_none());
|
||||
|
|
@ -133,13 +132,14 @@ fn verify_all_signatures() {
|
|||
for rust in FUNCTIONS {
|
||||
// This was ignored above, we ignore it here as well.
|
||||
if rust.name == "_mm_prefetch" {
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
|
||||
// these are all AMD-specific intrinsics
|
||||
if rust.target_feature.contains("sse4a") ||
|
||||
rust.target_feature.contains("tbm") {
|
||||
continue
|
||||
if rust.target_feature.contains("sse4a")
|
||||
|| rust.target_feature.contains("tbm")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let intel = match map.get(rust.name) {
|
||||
|
|
@ -147,15 +147,15 @@ fn verify_all_signatures() {
|
|||
None => panic!("missing intel definition for {}", rust.name),
|
||||
};
|
||||
|
||||
// Verify that all `#[target_feature]` annotations are correct, ensuring
|
||||
// that we've actually enabled the right instruction set for this
|
||||
// intrinsic.
|
||||
assert!(intel.cpuid.len() > 0, "missing cpuid for {}", rust.name);
|
||||
for cpuid in intel.cpuid.iter() {
|
||||
// Verify that all `#[target_feature]` annotations are correct,
|
||||
// ensuring that we've actually enabled the right instruction
|
||||
// set for this intrinsic.
|
||||
assert!(!intel.cpuid.is_empty(), "missing cpuid for {}", rust.name);
|
||||
for cpuid in &intel.cpuid {
|
||||
// this is needed by _xsave and probably some related intrinsics,
|
||||
// but let's just skip it for now.
|
||||
if *cpuid == "XSS" {
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
|
||||
let cpuid = cpuid
|
||||
|
|
@ -163,62 +163,78 @@ fn verify_all_signatures() {
|
|||
.flat_map(|c| c.to_lowercase())
|
||||
.collect::<String>();
|
||||
|
||||
// Normalize `bmi1` to `bmi` as apparently that's what we're calling
|
||||
// it.
|
||||
// Normalize `bmi1` to `bmi` as apparently that's what we're
|
||||
// calling it.
|
||||
let cpuid = if cpuid == "bmi1" {
|
||||
String::from("bmi")
|
||||
} else {
|
||||
cpuid
|
||||
};
|
||||
|
||||
assert!(rust.target_feature.contains(&cpuid),
|
||||
"intel cpuid `{}` not in `{}` for {}",
|
||||
cpuid,
|
||||
rust.target_feature,
|
||||
rust.name);
|
||||
assert!(
|
||||
rust.target_feature.contains(&cpuid),
|
||||
"intel cpuid `{}` not in `{}` for {}",
|
||||
cpuid,
|
||||
rust.target_feature,
|
||||
rust.name
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: we should test this, but it generates too many failures right
|
||||
// now
|
||||
if false {
|
||||
if rust.instrs.len() == 0 {
|
||||
assert_eq!(intel.instruction.len(), 0,
|
||||
"instruction not listed for {}", rust.name);
|
||||
if rust.instrs.is_empty() {
|
||||
assert_eq!(
|
||||
intel.instruction.len(),
|
||||
0,
|
||||
"instruction not listed for {}",
|
||||
rust.name
|
||||
);
|
||||
|
||||
// If intel doesn't list any instructions and we do then don't
|
||||
// bother trying to look for instructions in intel, we've just got
|
||||
// some extra assertions on our end.
|
||||
} else if intel.instruction.len() > 0 {
|
||||
for instr in rust.instrs.iter() {
|
||||
assert!(intel.instruction.iter().any(|a| a.name.starts_with(instr)),
|
||||
"intel failed to list `{}` as an instruction for `{}`",
|
||||
instr, rust.name);
|
||||
} else if !intel.instruction.is_empty() {
|
||||
for instr in rust.instrs {
|
||||
assert!(
|
||||
intel
|
||||
.instruction
|
||||
.iter()
|
||||
.any(|a| a.name.starts_with(instr)),
|
||||
"intel failed to list `{}` as an instruction for `{}`",
|
||||
instr,
|
||||
rust.name
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we've got the right return type.
|
||||
match rust.ret {
|
||||
Some(t) => equate(t, &intel.rettype, &rust.name),
|
||||
None => {
|
||||
assert!(intel.rettype == "" || intel.rettype == "void",
|
||||
"{} returns `{}` with intel, void in rust",
|
||||
rust.name, intel.rettype);
|
||||
}
|
||||
if let Some(t) = rust.ret {
|
||||
equate(t, &intel.rettype, rust.name);
|
||||
} else {
|
||||
assert!(
|
||||
intel.rettype == "" || intel.rettype == "void",
|
||||
"{} returns `{}` with intel, void in rust",
|
||||
rust.name,
|
||||
intel.rettype
|
||||
);
|
||||
}
|
||||
|
||||
// If there's no arguments on Rust's side intel may list one "void"
|
||||
// argument, so handle that here.
|
||||
if rust.arguments.len() == 0 {
|
||||
if intel.parameters.len() == 1 {
|
||||
assert_eq!(intel.parameters[0].type_, "void");
|
||||
continue
|
||||
}
|
||||
if rust.arguments.is_empty() && intel.parameters.len() == 1 {
|
||||
assert_eq!(intel.parameters[0].type_, "void");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise we want all parameters to be exactly the same
|
||||
assert_eq!(rust.arguments.len(), intel.parameters.len(),
|
||||
"wrong number of arguments on {}", rust.name);
|
||||
assert_eq!(
|
||||
rust.arguments.len(),
|
||||
intel.parameters.len(),
|
||||
"wrong number of arguments on {}",
|
||||
rust.name
|
||||
);
|
||||
for (a, b) in intel.parameters.iter().zip(rust.arguments) {
|
||||
equate(b, &a.type_, &intel.name);
|
||||
}
|
||||
|
|
@ -255,20 +271,21 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
|
|||
(&Type::Ptr(&Type::PrimUnsigned(8)), "const void*") => {}
|
||||
(&Type::Ptr(&Type::PrimUnsigned(8)), "void*") => {}
|
||||
|
||||
(&Type::Signed(a, b), "__m128i") |
|
||||
(&Type::Unsigned(a, b), "__m128i") |
|
||||
(&Type::Ptr(&Type::Signed(a, b)), "__m128i*") |
|
||||
(&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {}
|
||||
(&Type::Signed(a, b), "__m128i")
|
||||
| (&Type::Unsigned(a, b), "__m128i")
|
||||
| (&Type::Ptr(&Type::Signed(a, b)), "__m128i*")
|
||||
| (&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {}
|
||||
|
||||
(&Type::Signed(a, b), "__m256i") |
|
||||
(&Type::Unsigned(a, b), "__m256i") |
|
||||
(&Type::Ptr(&Type::Signed(a, b)), "__m256i*") |
|
||||
(&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*") if (a as u32) * (b as u32) == 256 => {}
|
||||
(&Type::Signed(a, b), "__m256i")
|
||||
| (&Type::Unsigned(a, b), "__m256i")
|
||||
| (&Type::Ptr(&Type::Signed(a, b)), "__m256i*")
|
||||
| (&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*")
|
||||
if (a as u32) * (b as u32) == 256 => {}
|
||||
|
||||
(&Type::Signed(a, b), "__m64") |
|
||||
(&Type::Unsigned(a, b), "__m64") |
|
||||
(&Type::Ptr(&Type::Signed(a, b)), "__m64*") |
|
||||
(&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {}
|
||||
(&Type::Signed(a, b), "__m64")
|
||||
| (&Type::Unsigned(a, b), "__m64")
|
||||
| (&Type::Ptr(&Type::Signed(a, b)), "__m64*")
|
||||
| (&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {}
|
||||
|
||||
(&Type::Float(32, 4), "__m128") => {}
|
||||
(&Type::Ptr(&Type::Float(32, 4)), "__m128*") => {}
|
||||
|
|
@ -291,20 +308,24 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
|
|||
// Intel says the argument is i32...
|
||||
(&Type::PrimSigned(8), "int") if intrinsic == "_mm_insert_epi8" => {}
|
||||
|
||||
// This is a macro (?) in C which seems to mutate its arguments, but that
|
||||
// means that we're taking pointers to arguments in rust as we're not
|
||||
// exposing it as a macro.
|
||||
(&Type::Ptr(&Type::Float(32, 4)), "__m128") if intrinsic == "_MM_TRANSPOSE4_PS" => {}
|
||||
// This is a macro (?) in C which seems to mutate its arguments, but
|
||||
// that means that we're taking pointers to arguments in rust
|
||||
// as we're not exposing it as a macro.
|
||||
(&Type::Ptr(&Type::Float(32, 4)), "__m128")
|
||||
if intrinsic == "_MM_TRANSPOSE4_PS" => {}
|
||||
|
||||
// These intrinsics return an `int` in C but they're always either the
|
||||
// bit 1 or 0 so we switch it to returning `bool` in rust
|
||||
(&Type::Bool, "int")
|
||||
if intrinsic.starts_with("_mm_comi") && intrinsic.ends_with("_sd")
|
||||
=> {}
|
||||
if intrinsic.starts_with("_mm_comi")
|
||||
&& intrinsic.ends_with("_sd") => {}
|
||||
(&Type::Bool, "int")
|
||||
if intrinsic.starts_with("_mm_ucomi") && intrinsic.ends_with("_sd")
|
||||
=> {}
|
||||
if intrinsic.starts_with("_mm_ucomi")
|
||||
&& intrinsic.ends_with("_sd") => {}
|
||||
|
||||
_ => panic!("failed to equate: `{}` and {:?} for {}", intel, t, intrinsic),
|
||||
_ => panic!(
|
||||
"failed to equate: `{}` and {:?} for {}",
|
||||
intel, t, intrinsic
|
||||
),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue