Implement missing in SSE4a and TBM

Add `extracti`, `inserti` and `bextri` intrinsics. Refactor TBM into 2 modules
This commit is contained in:
sayantn 2024-07-07 18:30:31 +05:30 committed by Amanieu d'Antras
parent 7378b35fd0
commit 1da646fcab
4 changed files with 310 additions and 258 deletions

View file

@ -9,17 +9,18 @@ use stdarch_test::assert_instr;
extern "C" {
#[link_name = "llvm.x86.sse4a.extrq"]
fn extrq(x: i64x2, y: i8x16) -> i64x2;
#[link_name = "llvm.x86.sse4a.extrqi"]
fn extrqi(x: i64x2, len: u8, idx: u8) -> i64x2;
#[link_name = "llvm.x86.sse4a.insertq"]
fn insertq(x: i64x2, y: i64x2) -> i64x2;
#[link_name = "llvm.x86.sse4a.insertqi"]
fn insertqi(x: i64x2, y: i64x2, len: u8, idx: u8) -> i64x2;
#[link_name = "llvm.x86.sse4a.movnt.sd"]
fn movntsd(x: *mut f64, y: __m128d);
#[link_name = "llvm.x86.sse4a.movnt.ss"]
fn movntss(x: *mut f32, y: __m128);
}
// FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
// FIXME(blocked on #248): _mm_inserti_si64(x, y, len, idx) // INSERTQ
/// Extracts the bit range specified by `y` from the lower 64 bits of `x`.
///
/// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The
@ -39,6 +40,27 @@ pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
transmute(extrq(x.as_i64x2(), y.as_i8x16()))
}
/// Extracts the specified bits from the lower 64 bits of the 128-bit integer vector operand at the
/// index `idx` and of the length `len`.
///
/// `idx` specifies the index of the LSB. `len` specifies the number of bits to extract. If length
/// and index are both zero, bits `[63:0]` of parameter `x` are extracted. It is a compile-time error
/// for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero.
///
/// Returns a 128-bit integer vector whose lower 64 bits contain the extracted bits.
#[inline]
#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(extrq, LEN = 5, IDX = 5))]
#[rustc_legacy_const_generics(1, 2)]
#[unstable(feature = "simd_x86_updates", issue = "126936")]
pub unsafe fn _mm_extracti_si64<const LEN: i32, const IDX: i32>(x: __m128i) -> __m128i {
// LLVM mentions that it is UB if these are not satisfied
static_assert_uimm_bits!(LEN, 6);
static_assert_uimm_bits!(IDX, 6);
static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64));
transmute(extrqi(x.as_i64x2(), LEN as u8, IDX as u8))
}
/// Inserts the `[length:0]` bits of `y` into `x` at `index`.
///
/// The bits of `y`:
@ -56,6 +78,25 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
transmute(insertq(x.as_i64x2(), y.as_i64x2()))
}
/// Inserts the `len` least-significant bits from the lower 64 bits of the 128-bit integer vector operand `y` into
/// the lower 64 bits of the 128-bit integer vector operand `x` at the index `idx` and of the length `len`.
///
/// `idx` specifies the index of the LSB. `len` specifies the number of bits to insert. If length and index
/// are both zero, bits `[63:0]` of parameter `x` are replaced with bits `[63:0]` of parameter `y`. It is a
/// compile-time error for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero.
#[inline]
#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(insertq, LEN = 5, IDX = 5))]
#[rustc_legacy_const_generics(2, 3)]
#[unstable(feature = "simd_x86_updates", issue = "126936")]
pub unsafe fn _mm_inserti_si64<const LEN: i32, const IDX: i32>(x: __m128i, y: __m128i) -> __m128i {
// LLVM mentions that it is UB if these are not satisfied
static_assert_uimm_bits!(LEN, 6);
static_assert_uimm_bits!(IDX, 6);
static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64));
transmute(insertqi(x.as_i64x2(), y.as_i64x2(), LEN as u8, IDX as u8))
}
/// Non-temporal store of `a.0` into `p`.
///
/// Writes 64-bit data to a memory location without polluting the caches.
@ -114,6 +155,14 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "sse4a")]
unsafe fn test_mm_extracti_si64() {
let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
let r = _mm_extracti_si64::<8, 8>(a);
let e = _mm_setr_epi64x(0xcd, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "sse4a")]
unsafe fn test_mm_insert_si64() {
let i = 0b0110_i64;
@ -131,6 +180,15 @@ mod tests {
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "sse4a")]
unsafe fn test_mm_inserti_si64() {
let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
let b = _mm_setr_epi64x(0x0011223344556677, 0);
let r = _mm_inserti_si64::<8, 8>(a, b);
let e = _mm_setr_epi64x(0x0123456789ab77ef, 0);
assert_eq_m128i(r, e);
}
#[repr(align(16))]
struct MemoryF64 {
data: [f64; 2],

View file

@ -13,57 +13,28 @@
#[cfg(test)]
use stdarch_test::assert_instr;
// FIXME(blocked on #248)
// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select:
// intrinsic %llvm.x86.tbm.bextri.u32
/*
#[allow(dead_code)]
extern "C" {
#[link_name="llvm.x86.tbm.bextri.u32"]
fn x86_tbm_bextri_u32(a: u32, y: u32) -> u32;
#[link_name="llvm.x86.tbm.bextri.u64"]
fn x86_tbm_bextri_u64(x: u64, y: u64) -> u64;
}
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline]
#[target_feature(enable = "tbm")]
pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
}
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline]
#[target_feature(enable = "tbm")]
pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
_bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
#[link_name = "llvm.x86.tbm.bextri.u32"]
fn bextri_u32(a: u32, control: u32) -> u32;
}
/// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result.
///
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to
/// be extracted, and bits `[15,8]` specify the length of the range.
/// be extracted, and bits `[15,8]` specify the length of the range. For any bit
/// position in the specified range that lie beyond the MSB of the source operand,
/// zeroes will be written. If the range is empty, the result is zero.
#[inline]
#[target_feature(enable = "tbm")]
pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
unsafe { x86_tbm_bextri_u32(a, control) }
#[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "simd_x86_updates", issue = "126936")]
pub unsafe fn _bextri_u32<const CONTROL: u32>(a: u32) -> u32 {
static_assert_uimm_bits!(CONTROL, 16);
unsafe { bextri_u32(a, CONTROL) }
}
/// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result.
///
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to
/// be extracted, and bits `[15,8]` specify the length of the range.
#[inline]
#[target_feature(enable = "tbm")]
pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
unsafe { x86_tbm_bextri_u64(a, control) }
}
*/
/// Clears all bits below the least significant zero bit of `x`.
///
/// If there is no zero bit in `x`, it returns zero.
@ -75,18 +46,6 @@ pub unsafe fn _blcfill_u32(x: u32) -> u32 {
x & (x.wrapping_add(1))
}
/// Clears all bits below the least significant zero bit of `x`.
///
/// If there is no zero bit in `x`, it returns zero.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcfill))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcfill_u64(x: u64) -> u64 {
x & (x.wrapping_add(1))
}
/// Sets all bits of `x` to 1 except for the least significant zero bit.
///
/// If there is no zero bit in `x`, it sets all bits.
@ -95,19 +54,7 @@ pub unsafe fn _blcfill_u64(x: u64) -> u64 {
#[cfg_attr(test, assert_instr(blci))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blci_u32(x: u32) -> u32 {
x | !(x.wrapping_add(1))
}
/// Sets all bits of `x` to 1 except for the least significant zero bit.
///
/// If there is no zero bit in `x`, it sets all bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blci))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blci_u64(x: u64) -> u64 {
x | !(x.wrapping_add(1))
x | !x.wrapping_add(1)
}
/// Sets the least significant zero bit of `x` and clears all other bits.
@ -118,19 +65,7 @@ pub unsafe fn _blci_u64(x: u64) -> u64 {
#[cfg_attr(test, assert_instr(blcic))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcic_u32(x: u32) -> u32 {
!x & (x.wrapping_add(1))
}
/// Sets the least significant zero bit of `x` and clears all other bits.
///
/// If there is no zero bit in `x`, it returns zero.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcic))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcic_u64(x: u64) -> u64 {
!x & (x.wrapping_add(1))
!x & x.wrapping_add(1)
}
/// Sets the least significant zero bit of `x` and clears all bits above
@ -142,20 +77,7 @@ pub unsafe fn _blcic_u64(x: u64) -> u64 {
#[cfg_attr(test, assert_instr(blcmsk))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
x ^ (x.wrapping_add(1))
}
/// Sets the least significant zero bit of `x` and clears all bits above
/// that bit.
///
/// If there is no zero bit in `x`, it sets all the bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcmsk))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
x ^ (x.wrapping_add(1))
x ^ x.wrapping_add(1)
}
/// Sets the least significant zero bit of `x`.
@ -166,18 +88,6 @@ pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
#[cfg_attr(test, assert_instr(blcs))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcs_u32(x: u32) -> u32 {
x | (x.wrapping_add(1))
}
/// Sets the least significant zero bit of `x`.
///
/// If there is no zero bit in `x`, it returns `x`.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcs))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcs_u64(x: u64) -> u64 {
x | x.wrapping_add(1)
}
@ -189,19 +99,7 @@ pub unsafe fn _blcs_u64(x: u64) -> u64 {
#[cfg_attr(test, assert_instr(blsfill))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsfill_u32(x: u32) -> u32 {
x | (x.wrapping_sub(1))
}
/// Sets all bits of `x` below the least significant one.
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsfill))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsfill_u64(x: u64) -> u64 {
x | (x.wrapping_sub(1))
x | x.wrapping_sub(1)
}
/// Clears least significant bit and sets all other bits.
@ -212,19 +110,7 @@ pub unsafe fn _blsfill_u64(x: u64) -> u64 {
#[cfg_attr(test, assert_instr(blsic))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsic_u32(x: u32) -> u32 {
!x | (x.wrapping_sub(1))
}
/// Clears least significant bit and sets all other bits.
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsic))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsic_u64(x: u64) -> u64 {
!x | (x.wrapping_sub(1))
!x | x.wrapping_sub(1)
}
/// Clears all bits below the least significant zero of `x` and sets all other
@ -236,20 +122,7 @@ pub unsafe fn _blsic_u64(x: u64) -> u64 {
#[cfg_attr(test, assert_instr(t1mskc))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _t1mskc_u32(x: u32) -> u32 {
!x | (x.wrapping_add(1))
}
/// Clears all bits below the least significant zero of `x` and sets all other
/// bits.
///
/// If the least significant bit of `x` is `0`, it sets all bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(t1mskc))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
!x | (x.wrapping_add(1))
!x | x.wrapping_add(1)
}
/// Sets all bits below the least significant one of `x` and clears all other
@ -261,20 +134,7 @@ pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
#[cfg_attr(test, assert_instr(tzmsk))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _tzmsk_u32(x: u32) -> u32 {
!x & (x.wrapping_sub(1))
}
/// Sets all bits below the least significant one of `x` and clears all other
/// bits.
///
/// If the least significant bit of `x` is 1, it returns zero.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(tzmsk))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
!x & (x.wrapping_sub(1))
!x & x.wrapping_sub(1)
}
#[cfg(test)]
@ -283,31 +143,17 @@ mod tests {
use crate::core_arch::x86::*;
/*
#[simd_test(enable = "tbm")]
unsafe fn test_bextr_u32() {
assert_eq!(_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32);
unsafe fn test_bextri_u32() {
assert_eq!(_bextri_u32::<0x0404>(0b0101_0000u32), 0b0000_0101u32);
}
#[simd_test(enable = "tbm")]
unsafe fn test_bextr_u64() {
assert_eq!(_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64);
}
*/
#[simd_test(enable = "tbm")]
unsafe fn test_blcfill_u32() {
assert_eq!(_blcfill_u32(0b0101_0111u32), 0b0101_0000u32);
assert_eq!(_blcfill_u32(0b1111_1111u32), 0u32);
}
#[simd_test(enable = "tbm")]
#[cfg(not(target_arch = "x86"))]
unsafe fn test_blcfill_u64() {
assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blci_u32() {
assert_eq!(
@ -320,59 +166,24 @@ mod tests {
);
}
#[simd_test(enable = "tbm")]
#[cfg(not(target_arch = "x86"))]
#[rustfmt::skip]
unsafe fn test_blci_u64() {
assert_eq!(
_blci_u64(0b0101_0000u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64
);
assert_eq!(
_blci_u64(0b1111_1111u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64
);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blcic_u32() {
assert_eq!(_blcic_u32(0b0101_0001u32), 0b0000_0010u32);
assert_eq!(_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32);
}
#[simd_test(enable = "tbm")]
#[cfg(not(target_arch = "x86"))]
unsafe fn test_blcic_u64() {
assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blcmsk_u32() {
assert_eq!(_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32);
assert_eq!(_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32);
}
#[simd_test(enable = "tbm")]
#[cfg(not(target_arch = "x86"))]
unsafe fn test_blcmsk_u64() {
assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blcs_u32() {
assert_eq!(_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
assert_eq!(_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
}
#[simd_test(enable = "tbm")]
#[cfg(not(target_arch = "x86"))]
unsafe fn test_blcs_u64() {
assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blsfill_u32() {
assert_eq!(_blsfill_u32(0b0101_0100u32), 0b0101_0111u32);
@ -382,17 +193,6 @@ mod tests {
);
}
#[simd_test(enable = "tbm")]
#[cfg(not(target_arch = "x86"))]
#[rustfmt::skip]
unsafe fn test_blsfill_u64() {
assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
assert_eq!(
_blsfill_u64(0u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blsic_u32() {
assert_eq!(
@ -405,20 +205,6 @@ mod tests {
);
}
#[simd_test(enable = "tbm")]
#[cfg(not(target_arch = "x86"))]
#[rustfmt::skip]
unsafe fn test_blsic_u64() {
assert_eq!(
_blsic_u64(0b0101_0100u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64
);
assert_eq!(
_blsic_u64(0u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
);
}
#[simd_test(enable = "tbm")]
unsafe fn test_t1mskc_u32() {
assert_eq!(
@ -431,30 +217,9 @@ mod tests {
);
}
#[simd_test(enable = "tbm")]
#[cfg(not(target_arch = "x86"))]
#[rustfmt::skip]
unsafe fn test_t1mksc_u64() {
assert_eq!(
_t1mskc_u64(0b0101_0111u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64
);
assert_eq!(
_t1mskc_u64(0u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
);
}
#[simd_test(enable = "tbm")]
unsafe fn test_tzmsk_u32() {
assert_eq!(_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32);
assert_eq!(_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32);
}
#[simd_test(enable = "tbm")]
#[cfg(not(target_arch = "x86"))]
unsafe fn test_tzmsk_u64() {
assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64);
assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64);
}
}

View file

@ -42,6 +42,10 @@ mod bmi2;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use self::bmi2::*;
mod tbm;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use self::tbm::*;
mod avx512f;
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub use self::avx512f::*;

View file

@ -0,0 +1,225 @@
//! Trailing Bit Manipulation (TBM) instruction set.
//!
//! The reference is [AMD64 Architecture Programmer's Manual, Volume 3:
//! General-Purpose and System Instructions][amd64_ref].
//!
//! [Wikipedia][wikipedia_bmi] provides a quick overview of the available
//! instructions.
//!
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
//! [wikipedia_bmi]:
//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
#[cfg(test)]
use stdarch_test::assert_instr;
extern "C" {
#[link_name = "llvm.x86.tbm.bextri.u64"]
fn bextri_u64(a: u64, control: u64) -> u64;
}
/// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result.
///
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to
/// be extracted, and bits `[15,8]` specify the length of the range. For any bit
/// position in the specified range that lie beyond the MSB of the source operand,
/// zeroes will be written. If the range is empty, the result is zero.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "simd_x86_updates", issue = "126936")]
pub unsafe fn _bextri_u64<const CONTROL: u64>(a: u64) -> u64 {
static_assert_uimm_bits!(CONTROL, 16);
unsafe { bextri_u64(a, CONTROL) }
}
/// Clears all bits below the least significant zero bit of `x`.
///
/// If there is no zero bit in `x`, it returns zero.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcfill))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcfill_u64(x: u64) -> u64 {
x & x.wrapping_add(1)
}
/// Sets all bits of `x` to 1 except for the least significant zero bit.
///
/// If there is no zero bit in `x`, it sets all bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blci))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blci_u64(x: u64) -> u64 {
x | !x.wrapping_add(1)
}
/// Sets the least significant zero bit of `x` and clears all other bits.
///
/// If there is no zero bit in `x`, it returns zero.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcic))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcic_u64(x: u64) -> u64 {
!x & x.wrapping_add(1)
}
/// Sets the least significant zero bit of `x` and clears all bits above
/// that bit.
///
/// If there is no zero bit in `x`, it sets all the bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcmsk))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
x ^ x.wrapping_add(1)
}
/// Sets the least significant zero bit of `x`.
///
/// If there is no zero bit in `x`, it returns `x`.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcs))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcs_u64(x: u64) -> u64 {
x | x.wrapping_add(1)
}
/// Sets all bits of `x` below the least significant one.
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsfill))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsfill_u64(x: u64) -> u64 {
x | x.wrapping_sub(1)
}
/// Clears least significant bit and sets all other bits.
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsic))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsic_u64(x: u64) -> u64 {
!x | x.wrapping_sub(1)
}
/// Clears all bits below the least significant zero of `x` and sets all other
/// bits.
///
/// If the least significant bit of `x` is `0`, it sets all bits.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(t1mskc))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
!x | x.wrapping_add(1)
}
/// Sets all bits below the least significant one of `x` and clears all other
/// bits.
///
/// If the least significant bit of `x` is 1, it returns zero.
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(tzmsk))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
!x & x.wrapping_sub(1)
}
#[cfg(test)]
mod tests {
use stdarch_test::simd_test;
use crate::core_arch::x86_64::*;
#[simd_test(enable = "tbm")]
unsafe fn test_bextri_u64() {
assert_eq!(_bextri_u64::<0x0404>(0b0101_0000u64), 0b0000_0101u64);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blcfill_u64() {
assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blci_u64() {
assert_eq!(
_blci_u64(0b0101_0000u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64
);
assert_eq!(
_blci_u64(0b1111_1111u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64
);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blcic_u64() {
assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blcmsk_u64() {
assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blcs_u64() {
assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blsfill_u64() {
assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
assert_eq!(
_blsfill_u64(0u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
);
}
#[simd_test(enable = "tbm")]
unsafe fn test_blsic_u64() {
assert_eq!(
_blsic_u64(0b0101_0100u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64
);
assert_eq!(
_blsic_u64(0u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
);
}
#[simd_test(enable = "tbm")]
unsafe fn test_t1mksc_u64() {
assert_eq!(
_t1mskc_u64(0b0101_0111u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64
);
assert_eq!(
_t1mskc_u64(0u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
);
}
#[simd_test(enable = "tbm")]
unsafe fn test_tzmsk_u64() {
assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64);
assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64);
}
}