Implement missing in SSE4a and TBM
Add `extracti`, `inserti` and `bextri` intrinsics. Refactor TBM into 2 modules
This commit is contained in:
parent
7378b35fd0
commit
1da646fcab
4 changed files with 310 additions and 258 deletions
|
|
@ -9,17 +9,18 @@ use stdarch_test::assert_instr;
|
|||
extern "C" {
|
||||
#[link_name = "llvm.x86.sse4a.extrq"]
|
||||
fn extrq(x: i64x2, y: i8x16) -> i64x2;
|
||||
#[link_name = "llvm.x86.sse4a.extrqi"]
|
||||
fn extrqi(x: i64x2, len: u8, idx: u8) -> i64x2;
|
||||
#[link_name = "llvm.x86.sse4a.insertq"]
|
||||
fn insertq(x: i64x2, y: i64x2) -> i64x2;
|
||||
#[link_name = "llvm.x86.sse4a.insertqi"]
|
||||
fn insertqi(x: i64x2, y: i64x2, len: u8, idx: u8) -> i64x2;
|
||||
#[link_name = "llvm.x86.sse4a.movnt.sd"]
|
||||
fn movntsd(x: *mut f64, y: __m128d);
|
||||
#[link_name = "llvm.x86.sse4a.movnt.ss"]
|
||||
fn movntss(x: *mut f32, y: __m128);
|
||||
}
|
||||
|
||||
// FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
|
||||
// FIXME(blocked on #248): _mm_inserti_si64(x, y, len, idx) // INSERTQ
|
||||
|
||||
/// Extracts the bit range specified by `y` from the lower 64 bits of `x`.
|
||||
///
|
||||
/// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The
|
||||
|
|
@ -39,6 +40,27 @@ pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
|
|||
transmute(extrq(x.as_i64x2(), y.as_i8x16()))
|
||||
}
|
||||
|
||||
/// Extracts the specified bits from the lower 64 bits of the 128-bit integer vector operand at the
|
||||
/// index `idx` and of the length `len`.
|
||||
///
|
||||
/// `idx` specifies the index of the LSB. `len` specifies the number of bits to extract. If length
|
||||
/// and index are both zero, bits `[63:0]` of parameter `x` are extracted. It is a compile-time error
|
||||
/// for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero.
|
||||
///
|
||||
/// Returns a 128-bit integer vector whose lower 64 bits contain the extracted bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4a")]
|
||||
#[cfg_attr(test, assert_instr(extrq, LEN = 5, IDX = 5))]
|
||||
#[rustc_legacy_const_generics(1, 2)]
|
||||
#[unstable(feature = "simd_x86_updates", issue = "126936")]
|
||||
pub unsafe fn _mm_extracti_si64<const LEN: i32, const IDX: i32>(x: __m128i) -> __m128i {
|
||||
// LLVM mentions that it is UB if these are not satisfied
|
||||
static_assert_uimm_bits!(LEN, 6);
|
||||
static_assert_uimm_bits!(IDX, 6);
|
||||
static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64));
|
||||
transmute(extrqi(x.as_i64x2(), LEN as u8, IDX as u8))
|
||||
}
|
||||
|
||||
/// Inserts the `[length:0]` bits of `y` into `x` at `index`.
|
||||
///
|
||||
/// The bits of `y`:
|
||||
|
|
@ -56,6 +78,25 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
|
|||
transmute(insertq(x.as_i64x2(), y.as_i64x2()))
|
||||
}
|
||||
|
||||
/// Inserts the `len` least-significant bits from the lower 64 bits of the 128-bit integer vector operand `y` into
|
||||
/// the lower 64 bits of the 128-bit integer vector operand `x` at the index `idx` and of the length `len`.
|
||||
///
|
||||
/// `idx` specifies the index of the LSB. `len` specifies the number of bits to insert. If length and index
|
||||
/// are both zero, bits `[63:0]` of parameter `x` are replaced with bits `[63:0]` of parameter `y`. It is a
|
||||
/// compile-time error for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4a")]
|
||||
#[cfg_attr(test, assert_instr(insertq, LEN = 5, IDX = 5))]
|
||||
#[rustc_legacy_const_generics(2, 3)]
|
||||
#[unstable(feature = "simd_x86_updates", issue = "126936")]
|
||||
pub unsafe fn _mm_inserti_si64<const LEN: i32, const IDX: i32>(x: __m128i, y: __m128i) -> __m128i {
|
||||
// LLVM mentions that it is UB if these are not satisfied
|
||||
static_assert_uimm_bits!(LEN, 6);
|
||||
static_assert_uimm_bits!(IDX, 6);
|
||||
static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64));
|
||||
transmute(insertqi(x.as_i64x2(), y.as_i64x2(), LEN as u8, IDX as u8))
|
||||
}
|
||||
|
||||
/// Non-temporal store of `a.0` into `p`.
|
||||
///
|
||||
/// Writes 64-bit data to a memory location without polluting the caches.
|
||||
|
|
@ -114,6 +155,14 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse4a")]
|
||||
unsafe fn test_mm_extracti_si64() {
|
||||
let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
|
||||
let r = _mm_extracti_si64::<8, 8>(a);
|
||||
let e = _mm_setr_epi64x(0xcd, 0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse4a")]
|
||||
unsafe fn test_mm_insert_si64() {
|
||||
let i = 0b0110_i64;
|
||||
|
|
@ -131,6 +180,15 @@ mod tests {
|
|||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse4a")]
|
||||
unsafe fn test_mm_inserti_si64() {
|
||||
let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
|
||||
let b = _mm_setr_epi64x(0x0011223344556677, 0);
|
||||
let r = _mm_inserti_si64::<8, 8>(a, b);
|
||||
let e = _mm_setr_epi64x(0x0123456789ab77ef, 0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[repr(align(16))]
|
||||
struct MemoryF64 {
|
||||
data: [f64; 2],
|
||||
|
|
|
|||
|
|
@ -13,57 +13,28 @@
|
|||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
// FIXME(blocked on #248)
|
||||
// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select:
|
||||
// intrinsic %llvm.x86.tbm.bextri.u32
|
||||
/*
|
||||
#[allow(dead_code)]
|
||||
extern "C" {
|
||||
#[link_name="llvm.x86.tbm.bextri.u32"]
|
||||
fn x86_tbm_bextri_u32(a: u32, y: u32) -> u32;
|
||||
#[link_name="llvm.x86.tbm.bextri.u64"]
|
||||
fn x86_tbm_bextri_u64(x: u64, y: u64) -> u64;
|
||||
}
|
||||
|
||||
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
|
||||
/// the least significant bits of the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
|
||||
_bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
|
||||
}
|
||||
|
||||
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
|
||||
/// the least significant bits of the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
|
||||
_bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
|
||||
#[link_name = "llvm.x86.tbm.bextri.u32"]
|
||||
fn bextri_u32(a: u32, control: u32) -> u32;
|
||||
}
|
||||
|
||||
/// Extracts bits of `a` specified by `control` into
|
||||
/// the least significant bits of the result.
|
||||
///
|
||||
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to
|
||||
/// be extracted, and bits `[15,8]` specify the length of the range.
|
||||
/// be extracted, and bits `[15,8]` specify the length of the range. For any bit
|
||||
/// position in the specified range that lie beyond the MSB of the source operand,
|
||||
/// zeroes will be written. If the range is empty, the result is zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
|
||||
unsafe { x86_tbm_bextri_u32(a, control) }
|
||||
#[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "simd_x86_updates", issue = "126936")]
|
||||
pub unsafe fn _bextri_u32<const CONTROL: u32>(a: u32) -> u32 {
|
||||
static_assert_uimm_bits!(CONTROL, 16);
|
||||
unsafe { bextri_u32(a, CONTROL) }
|
||||
}
|
||||
|
||||
/// Extracts bits of `a` specified by `control` into
|
||||
/// the least significant bits of the result.
|
||||
///
|
||||
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to
|
||||
/// be extracted, and bits `[15,8]` specify the length of the range.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
|
||||
unsafe { x86_tbm_bextri_u64(a, control) }
|
||||
}
|
||||
*/
|
||||
|
||||
/// Clears all bits below the least significant zero bit of `x`.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it returns zero.
|
||||
|
|
@ -75,18 +46,6 @@ pub unsafe fn _blcfill_u32(x: u32) -> u32 {
|
|||
x & (x.wrapping_add(1))
|
||||
}
|
||||
|
||||
/// Clears all bits below the least significant zero bit of `x`.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it returns zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcfill))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcfill_u64(x: u64) -> u64 {
|
||||
x & (x.wrapping_add(1))
|
||||
}
|
||||
|
||||
/// Sets all bits of `x` to 1 except for the least significant zero bit.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it sets all bits.
|
||||
|
|
@ -95,19 +54,7 @@ pub unsafe fn _blcfill_u64(x: u64) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(blci))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blci_u32(x: u32) -> u32 {
|
||||
x | !(x.wrapping_add(1))
|
||||
}
|
||||
|
||||
/// Sets all bits of `x` to 1 except for the least significant zero bit.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it sets all bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blci))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blci_u64(x: u64) -> u64 {
|
||||
x | !(x.wrapping_add(1))
|
||||
x | !x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets the least significant zero bit of `x` and clears all other bits.
|
||||
|
|
@ -118,19 +65,7 @@ pub unsafe fn _blci_u64(x: u64) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(blcic))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcic_u32(x: u32) -> u32 {
|
||||
!x & (x.wrapping_add(1))
|
||||
}
|
||||
|
||||
/// Sets the least significant zero bit of `x` and clears all other bits.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it returns zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcic))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcic_u64(x: u64) -> u64 {
|
||||
!x & (x.wrapping_add(1))
|
||||
!x & x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets the least significant zero bit of `x` and clears all bits above
|
||||
|
|
@ -142,20 +77,7 @@ pub unsafe fn _blcic_u64(x: u64) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(blcmsk))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
|
||||
x ^ (x.wrapping_add(1))
|
||||
}
|
||||
|
||||
/// Sets the least significant zero bit of `x` and clears all bits above
|
||||
/// that bit.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it sets all the bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcmsk))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
|
||||
x ^ (x.wrapping_add(1))
|
||||
x ^ x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets the least significant zero bit of `x`.
|
||||
|
|
@ -166,18 +88,6 @@ pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(blcs))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcs_u32(x: u32) -> u32 {
|
||||
x | (x.wrapping_add(1))
|
||||
}
|
||||
|
||||
/// Sets the least significant zero bit of `x`.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it returns `x`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcs))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcs_u64(x: u64) -> u64 {
|
||||
x | x.wrapping_add(1)
|
||||
}
|
||||
|
||||
|
|
@ -189,19 +99,7 @@ pub unsafe fn _blcs_u64(x: u64) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(blsfill))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsfill_u32(x: u32) -> u32 {
|
||||
x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
||||
/// Sets all bits of `x` below the least significant one.
|
||||
///
|
||||
/// If there is no set bit in `x`, it sets all the bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blsfill))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsfill_u64(x: u64) -> u64 {
|
||||
x | (x.wrapping_sub(1))
|
||||
x | x.wrapping_sub(1)
|
||||
}
|
||||
|
||||
/// Clears least significant bit and sets all other bits.
|
||||
|
|
@ -212,19 +110,7 @@ pub unsafe fn _blsfill_u64(x: u64) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(blsic))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsic_u32(x: u32) -> u32 {
|
||||
!x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
||||
/// Clears least significant bit and sets all other bits.
|
||||
///
|
||||
/// If there is no set bit in `x`, it sets all the bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blsic))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsic_u64(x: u64) -> u64 {
|
||||
!x | (x.wrapping_sub(1))
|
||||
!x | x.wrapping_sub(1)
|
||||
}
|
||||
|
||||
/// Clears all bits below the least significant zero of `x` and sets all other
|
||||
|
|
@ -236,20 +122,7 @@ pub unsafe fn _blsic_u64(x: u64) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(t1mskc))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _t1mskc_u32(x: u32) -> u32 {
|
||||
!x | (x.wrapping_add(1))
|
||||
}
|
||||
|
||||
/// Clears all bits below the least significant zero of `x` and sets all other
|
||||
/// bits.
|
||||
///
|
||||
/// If the least significant bit of `x` is `0`, it sets all bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(t1mskc))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
|
||||
!x | (x.wrapping_add(1))
|
||||
!x | x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets all bits below the least significant one of `x` and clears all other
|
||||
|
|
@ -261,20 +134,7 @@ pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(tzmsk))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _tzmsk_u32(x: u32) -> u32 {
|
||||
!x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
||||
/// Sets all bits below the least significant one of `x` and clears all other
|
||||
/// bits.
|
||||
///
|
||||
/// If the least significant bit of `x` is 1, it returns zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(tzmsk))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
|
||||
!x & (x.wrapping_sub(1))
|
||||
!x & x.wrapping_sub(1)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -283,31 +143,17 @@ mod tests {
|
|||
|
||||
use crate::core_arch::x86::*;
|
||||
|
||||
/*
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_bextr_u32() {
|
||||
assert_eq!(_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32);
|
||||
unsafe fn test_bextri_u32() {
|
||||
assert_eq!(_bextri_u32::<0x0404>(0b0101_0000u32), 0b0000_0101u32);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_bextr_u64() {
|
||||
assert_eq!(_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64);
|
||||
}
|
||||
*/
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blcfill_u32() {
|
||||
assert_eq!(_blcfill_u32(0b0101_0111u32), 0b0101_0000u32);
|
||||
assert_eq!(_blcfill_u32(0b1111_1111u32), 0u32);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
unsafe fn test_blcfill_u64() {
|
||||
assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
|
||||
assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blci_u32() {
|
||||
assert_eq!(
|
||||
|
|
@ -320,59 +166,24 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
#[rustfmt::skip]
|
||||
unsafe fn test_blci_u64() {
|
||||
assert_eq!(
|
||||
_blci_u64(0b0101_0000u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64
|
||||
);
|
||||
assert_eq!(
|
||||
_blci_u64(0b1111_1111u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blcic_u32() {
|
||||
assert_eq!(_blcic_u32(0b0101_0001u32), 0b0000_0010u32);
|
||||
assert_eq!(_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
unsafe fn test_blcic_u64() {
|
||||
assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
|
||||
assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blcmsk_u32() {
|
||||
assert_eq!(_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32);
|
||||
assert_eq!(_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
unsafe fn test_blcmsk_u64() {
|
||||
assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
|
||||
assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blcs_u32() {
|
||||
assert_eq!(_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
|
||||
assert_eq!(_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
unsafe fn test_blcs_u64() {
|
||||
assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
|
||||
assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blsfill_u32() {
|
||||
assert_eq!(_blsfill_u32(0b0101_0100u32), 0b0101_0111u32);
|
||||
|
|
@ -382,17 +193,6 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
#[rustfmt::skip]
|
||||
unsafe fn test_blsfill_u64() {
|
||||
assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
|
||||
assert_eq!(
|
||||
_blsfill_u64(0u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blsic_u32() {
|
||||
assert_eq!(
|
||||
|
|
@ -405,20 +205,6 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
#[rustfmt::skip]
|
||||
unsafe fn test_blsic_u64() {
|
||||
assert_eq!(
|
||||
_blsic_u64(0b0101_0100u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64
|
||||
);
|
||||
assert_eq!(
|
||||
_blsic_u64(0u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_t1mskc_u32() {
|
||||
assert_eq!(
|
||||
|
|
@ -431,30 +217,9 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
#[rustfmt::skip]
|
||||
unsafe fn test_t1mksc_u64() {
|
||||
assert_eq!(
|
||||
_t1mskc_u64(0b0101_0111u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64
|
||||
);
|
||||
assert_eq!(
|
||||
_t1mskc_u64(0u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_tzmsk_u32() {
|
||||
assert_eq!(_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32);
|
||||
assert_eq!(_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
unsafe fn test_tzmsk_u64() {
|
||||
assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64);
|
||||
assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,6 +42,10 @@ mod bmi2;
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use self::bmi2::*;
|
||||
|
||||
mod tbm;
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use self::tbm::*;
|
||||
|
||||
mod avx512f;
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub use self::avx512f::*;
|
||||
|
|
|
|||
225
library/stdarch/crates/core_arch/src/x86_64/tbm.rs
Normal file
225
library/stdarch/crates/core_arch/src/x86_64/tbm.rs
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
//! Trailing Bit Manipulation (TBM) instruction set.
|
||||
//!
|
||||
//! The reference is [AMD64 Architecture Programmer's Manual, Volume 3:
|
||||
//! General-Purpose and System Instructions][amd64_ref].
|
||||
//!
|
||||
//! [Wikipedia][wikipedia_bmi] provides a quick overview of the available
|
||||
//! instructions.
|
||||
//!
|
||||
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
||||
//! [wikipedia_bmi]:
|
||||
//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.tbm.bextri.u64"]
|
||||
fn bextri_u64(a: u64, control: u64) -> u64;
|
||||
}
|
||||
|
||||
/// Extracts bits of `a` specified by `control` into
|
||||
/// the least significant bits of the result.
|
||||
///
|
||||
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to
|
||||
/// be extracted, and bits `[15,8]` specify the length of the range. For any bit
|
||||
/// position in the specified range that lie beyond the MSB of the source operand,
|
||||
/// zeroes will be written. If the range is empty, the result is zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "simd_x86_updates", issue = "126936")]
|
||||
pub unsafe fn _bextri_u64<const CONTROL: u64>(a: u64) -> u64 {
|
||||
static_assert_uimm_bits!(CONTROL, 16);
|
||||
unsafe { bextri_u64(a, CONTROL) }
|
||||
}
|
||||
|
||||
/// Clears all bits below the least significant zero bit of `x`.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it returns zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcfill))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcfill_u64(x: u64) -> u64 {
|
||||
x & x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets all bits of `x` to 1 except for the least significant zero bit.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it sets all bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blci))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blci_u64(x: u64) -> u64 {
|
||||
x | !x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets the least significant zero bit of `x` and clears all other bits.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it returns zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcic))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcic_u64(x: u64) -> u64 {
|
||||
!x & x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets the least significant zero bit of `x` and clears all bits above
|
||||
/// that bit.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it sets all the bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcmsk))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
|
||||
x ^ x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets the least significant zero bit of `x`.
|
||||
///
|
||||
/// If there is no zero bit in `x`, it returns `x`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcs))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcs_u64(x: u64) -> u64 {
|
||||
x | x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets all bits of `x` below the least significant one.
|
||||
///
|
||||
/// If there is no set bit in `x`, it sets all the bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blsfill))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsfill_u64(x: u64) -> u64 {
|
||||
x | x.wrapping_sub(1)
|
||||
}
|
||||
|
||||
/// Clears least significant bit and sets all other bits.
|
||||
///
|
||||
/// If there is no set bit in `x`, it sets all the bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blsic))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsic_u64(x: u64) -> u64 {
|
||||
!x | x.wrapping_sub(1)
|
||||
}
|
||||
|
||||
/// Clears all bits below the least significant zero of `x` and sets all other
|
||||
/// bits.
|
||||
///
|
||||
/// If the least significant bit of `x` is `0`, it sets all bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(t1mskc))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
|
||||
!x | x.wrapping_add(1)
|
||||
}
|
||||
|
||||
/// Sets all bits below the least significant one of `x` and clears all other
|
||||
/// bits.
|
||||
///
|
||||
/// If the least significant bit of `x` is 1, it returns zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(tzmsk))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
|
||||
!x & x.wrapping_sub(1)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::x86_64::*;
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_bextri_u64() {
|
||||
assert_eq!(_bextri_u64::<0x0404>(0b0101_0000u64), 0b0000_0101u64);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blcfill_u64() {
|
||||
assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
|
||||
assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blci_u64() {
|
||||
assert_eq!(
|
||||
_blci_u64(0b0101_0000u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64
|
||||
);
|
||||
assert_eq!(
|
||||
_blci_u64(0b1111_1111u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blcic_u64() {
|
||||
assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
|
||||
assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blcmsk_u64() {
|
||||
assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
|
||||
assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blcs_u64() {
|
||||
assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
|
||||
assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blsfill_u64() {
|
||||
assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
|
||||
assert_eq!(
|
||||
_blsfill_u64(0u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_blsic_u64() {
|
||||
assert_eq!(
|
||||
_blsic_u64(0b0101_0100u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64
|
||||
);
|
||||
assert_eq!(
|
||||
_blsic_u64(0u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_t1mksc_u64() {
|
||||
assert_eq!(
|
||||
_t1mskc_u64(0b0101_0111u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64
|
||||
);
|
||||
assert_eq!(
|
||||
_t1mskc_u64(0u64),
|
||||
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tbm")]
|
||||
unsafe fn test_tzmsk_u64() {
|
||||
assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64);
|
||||
assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue