diff --git a/library/stdarch/src/macros.rs b/library/stdarch/src/macros.rs index 87f1242619e4..b3cf8ecf4e85 100644 --- a/library/stdarch/src/macros.rs +++ b/library/stdarch/src/macros.rs @@ -36,6 +36,28 @@ macro_rules! define_impl { assert!(idx < $nelems); unsafe { simd_insert(self, idx, val) } } + + #[inline(always)] + pub fn load(slice: &[$elemty], offset: usize) -> $name { + assert!(slice[offset..].len() >= $nelems); + unsafe { $name::load_unchecked(slice, offset) } + } + + #[inline(always)] + pub unsafe fn load_unchecked( + slice: &[$elemty], + offset: usize, + ) -> $name { + use std::mem::size_of; + use std::ptr; + + let mut x = $name::splat(0 as $elemty); + ptr::copy_nonoverlapping( + slice.get_unchecked(offset) as *const $elemty as *const u8, + &mut x as *mut $name as *mut u8, + size_of::<$name>()); + x + } } } } diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs index a6b4cbd803ba..8bc93f73ec70 100644 --- a/library/stdarch/src/x86/mod.rs +++ b/library/stdarch/src/x86/mod.rs @@ -8,3 +8,4 @@ pub type __m128i = ::v128::i8x16; // mod sse; mod sse2; mod ssse3; +mod sse42; diff --git a/library/stdarch/src/x86/sse42.rs b/library/stdarch/src/x86/sse42.rs new file mode 100644 index 000000000000..be1d97edf9e6 --- /dev/null +++ b/library/stdarch/src/x86/sse42.rs @@ -0,0 +1,56 @@ +// use v128::*; +use x86::__m128i; + +pub const _SIDD_UBYTE_OPS: i8 = 0b00000000; +pub const _SIDD_UWORD_OPS: i8 = 0b00000001; +pub const _SIDD_SBYTE_OPS: i8 = 0b00000010; +pub const _SIDD_SWORD_OPS: i8 = 0b00000011; + +pub const _SIDD_CMP_EQUAL_ANY: i8 = 0b00000000; +pub const _SIDD_CMP_RANGES: i8 = 0b00000100; +pub const _SIDD_CMP_EQUAL_EACH: i8 = 0b00001000; +pub const _SIDD_CMP_EQUAL_ORDERED: i8 = 0b00001100; + +pub const _SIDD_POSITIVE_POLARITY: i8 = 0b00000000; +pub const _SIDD_NEGATIVE_POLARITY: i8 = 0b00010000; +pub const _SIDD_MASKED_NEGATIVE_POLARITY: i8 = 0b00110000; + +pub const _SIDD_LEAST_SIGNIFICANT: i8 = 0b00000000; +pub const _SIDD_MOST_SIGNIFICANT: i8 = 0b01000000; + +#[inline(always)] +#[target_feature = "+sse4.2"] +pub fn _mm_cmpestri( + a: __m128i, + la: i32, + b: __m128i, + lb: i32, + imm8: i8, +) -> i32 { + unsafe { pcmpestri128(a, la, b, lb, imm8) } +} + +#[allow(improper_ctypes)] +extern { + #[link_name = "llvm.x86.sse42.pcmpestri128"] + fn pcmpestri128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32; +} + +#[cfg(test)] +mod tests { + use v128::*; + use x86::{__m128i, sse42}; + + #[test] + #[target_feature = "+sse4.2"] + fn _mm_cmpestri() { + let a = &b"bar "[..]; + let b = &b"foobar "[..]; + let va = __m128i::from(u8x16::load(a, 0)); + let vb = __m128i::from(u8x16::load(b, 0)); + let i = sse42::_mm_cmpestri( + va, 3, vb, 6, + sse42::_SIDD_CMP_EQUAL_ORDERED | sse42::_SIDD_MOST_SIGNIFICANT); + assert_eq!(3, i); + } +} diff --git a/library/stdarch/src/x86/ssse3.rs b/library/stdarch/src/x86/ssse3.rs index 006069b1ec54..8f45c2ec4c2d 100644 --- a/library/stdarch/src/x86/ssse3.rs +++ b/library/stdarch/src/x86/ssse3.rs @@ -11,7 +11,7 @@ pub fn _mm_abs_epi8(a: i8x16) -> u8x16 { #[allow(improper_ctypes)] extern { #[link_name = "llvm.x86.ssse3.pabs.b.128"] - pub fn pabsb128(a: i8x16) -> u8x16; + fn pabsb128(a: i8x16) -> u8x16; } #[cfg(test)]