Migrate existing tests to #[assert_instr]
Also add some documentation to the assert_instr infrastructure
This commit is contained in:
parent
5e8f0e72b5
commit
124f731ce2
49 changed files with 97 additions and 644 deletions
|
|
@ -14,6 +14,7 @@ example for `_mm_adds_epi16`:
|
|||
/// Add packed 16-bit integers in `a` and `b` using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
#[cfg_attr(test, assert_instr(paddsw))]
|
||||
pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 {
|
||||
unsafe { paddsw(a, b) }
|
||||
}
|
||||
|
|
@ -32,6 +33,10 @@ Let's break this down:
|
|||
support `sse2`, the compiler will still generate code for `_mm_adds_epi16`
|
||||
*as if* `sse2` support existed. Without this attribute, the compiler might
|
||||
not generate the intended CPU instruction.
|
||||
* The `#[cfg_attr(test, assert_instr(paddsw))]` attribute indicates that when
|
||||
we're testing the crate we'll assert that the `paddsw` instruction is
|
||||
generated inside this function, ensuring that the SIMD intrinsic truly is an
|
||||
intrinsic for the instruction!
|
||||
* The types of the vectors given to the intrinsic should generally match the
|
||||
types as provided in the vendor interface. We'll talk about this more below.
|
||||
* The implementation of the vendor intrinsic is generally very simple.
|
||||
|
|
@ -40,7 +45,7 @@ Let's break this down:
|
|||
compiler intrinsic (in this case, `paddsw`) when one is available. More on
|
||||
this below as well.
|
||||
|
||||
Once a function has been added, you should add at least one test for basic
|
||||
Once a function has been added, you should also add at least one test for basic
|
||||
functionality. Here's an example for `_mm_adds_epi16`:
|
||||
|
||||
```rust
|
||||
|
|
|
|||
|
|
@ -1,12 +0,0 @@
|
|||
_bzhi_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
bzhil %esi, %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_bzhi_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
bzhiq %rsi, %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn bzhi_u32(x: u32, mask: u32) -> u32 {
|
||||
stdsimd::vendor::_bzhi_u32(x, mask)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn bzhi_u64(x: u64, mask: u64) -> u64 {
|
||||
stdsimd::vendor::_bzhi_u64(x, mask)
|
||||
}
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
_umulx_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
movl %edi, %ecx
|
||||
movl %esi, %eax
|
||||
imulq %rcx, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
_umulx_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
mulxq %rsi, %rcx, %rax
|
||||
movq %rcx, (%rdi)
|
||||
movq %rax, 8(%rdi)
|
||||
movq %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn umulx_u32(x: u32, y: u32) -> (u32, u32) {
|
||||
stdsimd::vendor::_mulx_u32(x, y)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn umulx_u64(x: u64, y: u64) -> (u64, u64) {
|
||||
stdsimd::vendor::_mulx_u64(x, y)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_pdep_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
pdepl %esi, %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_pdep_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
pdepq %rsi, %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn pdep_u32(x: u32, mask: u32) -> u32 {
|
||||
stdsimd::vendor::_pdep_u32(x, mask)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn pdep_u64(x: u64, mask: u64) -> u64 {
|
||||
stdsimd::vendor::_pdep_u64(x, mask)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_pext_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
pextl %esi, %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_pext_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
pextq %rsi, %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn pext_u32(x: u32, mask: u32) -> u32 {
|
||||
stdsimd::vendor::_pext_u32(x, mask)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn pext_u64(x: u64, mask: u64) -> u64 {
|
||||
stdsimd::vendor::_pext_u64(x, mask)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_andn_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
andnl %esi, %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_andn_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
andnq %rsi, %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn andn_u32(x: u32, y: u32) -> u32 {
|
||||
stdsimd::vendor::_andn_u32(x, y)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn andn_u64(x: u64, y: u64) -> u64 {
|
||||
stdsimd::vendor::_andn_u64(x, y)
|
||||
}
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
_bextr_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
movzbl %sil, %eax
|
||||
shll $8, %edx
|
||||
movzwl %dx, %ecx
|
||||
orl %eax, %ecx
|
||||
bextrl %ecx, %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_bextr_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
movzbl %sil, %eax
|
||||
shlq $8, %rdx
|
||||
movzwl %dx, %ecx
|
||||
orq %rax, %rcx
|
||||
bextrq %rcx, %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
_bextr2_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
bextrl %esi, %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_bextr2_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
bextrq %rsi, %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn bextr_u32(x: u32, y: u32, z: u32) -> u32 {
|
||||
stdsimd::vendor::_bextr_u32(x, y, z)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn bextr_u64(x: u64, y: u64, z: u64) -> u64 {
|
||||
stdsimd::vendor::_bextr_u64(x, y, z)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn bextr2_u32(x: u32, y: u32) -> u32 {
|
||||
stdsimd::vendor::_bextr2_u32(x, y)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn bextr2_u64(x: u64, y: u64) -> u64 {
|
||||
stdsimd::vendor::_bextr2_u64(x, y)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_blsi_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blsil %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_blsi_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blsiq %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blsi_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_blsi_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blsi_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_blsi_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_blsr_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blsrl %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_blsr_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blsrq %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blsr_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_blsr_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blsr_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_blsr_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_tzcnt_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
tzcntl %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_tzcnt_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
tzcntq %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn tzcnt_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_tzcnt_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn tzcnt_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_tzcnt_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_lzcnt_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
lzcntl %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_lzcnt_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
lzcntq %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn lzcnt_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_lzcnt_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn lzcnt_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_lzcnt_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_popcnt_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
popcntl %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_popcnt_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
popcntq %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn popcnt_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_popcnt32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn popcnt_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_popcnt64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_blcfill_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blcfill %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_blcfill_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blcfill %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blcfill_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_blcfill_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blcfill_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_blcfill_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_blci_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blci %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_blci_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blci %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blci_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_blci_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blci_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_blci_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_blcic_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blcic %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_blcic_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blcic %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blcic_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_blcic_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blcic_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_blcic_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_blcmsk_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blcmsk %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_blcmsk_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blcmsk %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blcmsk_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_blcmsk_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blcmsk_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_blcmsk_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_blcs_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blcs %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_blcs_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blcs %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blcs_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_blcs_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blcs_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_blcs_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_blsfill_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blsfill %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_blsfill_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blsfill %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blsfill_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_blsfill_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blsfill_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_blsfill_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_blsic_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blsic %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_blsic_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
blsic %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blsic_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_blsic_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn blsic_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_blsic_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_t1mskc_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
t1mskc %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_t1mskc_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
t1mskc %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn t1mskc_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_t1mskc_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn t1mskc_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_t1mskc_u64(x)
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
_tzmsk_u32:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
tzmsk %edi, %eax
|
||||
popq %rbp
|
||||
retq
|
||||
_tzmsk_u64:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
tzmsk %rdi, %rax
|
||||
popq %rbp
|
||||
retq
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
extern crate stdsimd;
|
||||
|
||||
#[no_mangle]
|
||||
pub fn tzmsk_u32(x: u32) -> u32 {
|
||||
stdsimd::vendor::_tzmsk_u32(x)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn tzmsk_u64(x: u64) -> u64 {
|
||||
stdsimd::vendor::_tzmsk_u64(x)
|
||||
}
|
||||
|
|
@ -1,3 +1,13 @@
|
|||
//! Implementation of the `#[assert_instr]` macro
|
||||
//!
|
||||
//! This macro is used when testing the `stdsimd` crate and is used to generate
|
||||
//! test cases to assert that functions do indeed contain the instructions that
|
||||
//! we're expecting them to contain.
|
||||
//!
|
||||
//! The procedural macro here is relatively simple, it simply appends a
|
||||
//! `#[test]` function to the original token stream which asserts that the
|
||||
//! function itself contains the relevant instruction.
|
||||
|
||||
#![feature(proc_macro)]
|
||||
|
||||
extern crate proc_macro;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,9 @@
|
|||
//! Runtime support needed for the `#![assert_instr]` macro
|
||||
//!
|
||||
//! This basically just disassembles the current executable and then parses the
|
||||
//! output once globally and then provides the `assert` function which makes
|
||||
//! assertions about the disassembly of a function.
|
||||
|
||||
#![feature(proc_macro)]
|
||||
|
||||
extern crate assert_instr_macro;
|
||||
|
|
@ -211,21 +217,30 @@ fn normalize(symbol: &str) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
/// Main entry point for this crate, called by the `#[assert_instr]` macro.
|
||||
///
|
||||
/// This asserts that the function at `fnptr` contains the instruction
|
||||
/// `expected` provided.
|
||||
pub fn assert(fnptr: usize, expected: &str) {
|
||||
// Translate this function pointer to a symbolic name that we'd have found
|
||||
// in the disassembly.
|
||||
let mut sym = None;
|
||||
backtrace::resolve(fnptr as *mut _, |name| {
|
||||
sym = name.name().and_then(|s| s.as_str()).map(normalize);
|
||||
});
|
||||
|
||||
let sym = match sym {
|
||||
Some(s) => s,
|
||||
None => panic!("failed to get symbol of function pointer: {}", fnptr),
|
||||
};
|
||||
|
||||
// Find our function in the list of all disassembled functions
|
||||
let functions = &DISASSEMBLY.get(&sym)
|
||||
.expect(&format!("failed to find disassembly of {}", sym));
|
||||
assert_eq!(functions.len(), 1);
|
||||
let function = &functions[0];
|
||||
|
||||
// Look for `expected` as the first part of any instruction in this
|
||||
// function, returning if we do indeed find it.
|
||||
for instr in function.instrs.iter() {
|
||||
if let Some(part) = instr.parts.get(0) {
|
||||
if part == expected {
|
||||
|
|
@ -234,6 +249,8 @@ pub fn assert(fnptr: usize, expected: &str) {
|
|||
}
|
||||
}
|
||||
|
||||
// Help debug by printing out the found disassembly, and then panic as we
|
||||
// didn't find the instruction.
|
||||
println!("disassembly for {}: ", sym);
|
||||
for (i, instr) in function.instrs.iter().enumerate() {
|
||||
print!("\t{:2}: ", i);
|
||||
|
|
|
|||
|
|
@ -1,144 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# Script to check the assembly generated
|
||||
import os, sys
|
||||
import os.path
|
||||
from subprocess import Popen, PIPE
|
||||
import argparse
|
||||
|
||||
asm_dir = './asm'
|
||||
|
||||
files = set()
|
||||
verbose = False
|
||||
extern_crate = None
|
||||
|
||||
def arm_triplet(arch) :
|
||||
triples = { 'armv7' : 'armv7-unknown-linux-gnueabihf',
|
||||
'armv8' : 'aarch64-unknown-linux-gnu' }
|
||||
return triples[arch]
|
||||
|
||||
class File(object):
|
||||
def __init__(self, path_rs):
|
||||
self.path_rs = path_rs
|
||||
self.path_asm_should = os.path.join(os.path.splitext(path_rs)[0] + ".asm")
|
||||
self.path_asm_output = os.path.join(os.path.splitext(path_rs)[0] + "_output.asm")
|
||||
self.path_llvmir_output = os.path.join(os.path.splitext(path_rs)[0] + "_ir.ll")
|
||||
self.name = os.path.splitext(os.path.basename(path_rs))[0]
|
||||
self.feature = self.name.split("_")[1]
|
||||
self.arch = self.name.split("_")[0]
|
||||
|
||||
if self.feature == "none":
|
||||
self.feature = None
|
||||
|
||||
def __str__(self):
|
||||
return "name: " + self.name + ", path-rs: " + self.path_rs + ", path-asm: " + self.path_asm_should + ', arch: ' + self.arch + ", feature: " + str(self.feature)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.name)
|
||||
|
||||
def find_files():
|
||||
for dirpath, dirnames, filenames in os.walk(asm_dir):
|
||||
for filename in [f for f in filenames if f.endswith(".rs")]:
|
||||
files.add(File(os.path.join(dirpath, filename)))
|
||||
|
||||
def call(args):
|
||||
if verbose:
|
||||
print "command: " + str(args)
|
||||
p = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True)
|
||||
lines = p.stdout.readlines()
|
||||
if verbose and p.returncode != 0:
|
||||
error = p.stderr.readlines()
|
||||
print >>sys.stdout, lines
|
||||
print >>sys.stderr, "ERROR: %s" % error
|
||||
|
||||
def compile_file(file):
|
||||
if verbose:
|
||||
print "Checking: " + str(file) + "..."
|
||||
|
||||
cargo_args = 'cargo rustc --verbose --release -- -C panic=abort '
|
||||
if file.feature:
|
||||
cargo_args = cargo_args + '-C target-feature=+{}'.format(file.feature)
|
||||
if file.arch == 'armv7' or file.arch == 'armv8':
|
||||
cargo_args = cargo_args + '--target={}'.format(arm_triplet(file.arch))
|
||||
call(str(cargo_args))
|
||||
|
||||
rustc_args = 'rustc --verbose -C opt-level=3 -C panic="abort" --extern %s=target/release/lib%s.rlib --crate-type lib' % (extern_crate, extern_crate);
|
||||
if file.feature:
|
||||
rustc_args = rustc_args + ' -C target-feature=+{}'.format(file.feature)
|
||||
if file.arch == 'armv7' or file.arch == 'armv8':
|
||||
rustc_args = rustc_args + ' --target={}'.format(arm_triplet(file.arch))
|
||||
rustc_args_asm = rustc_args + ' --emit asm {} -o {}'.format(file.path_rs, file.path_asm_output)
|
||||
call(rustc_args_asm)
|
||||
rustc_args_ll = rustc_args + ' --emit llvm-ir {} -o {}'.format(file.path_rs, file.path_llvmir_output)
|
||||
call(rustc_args_ll)
|
||||
|
||||
if verbose:
|
||||
print "...done!"
|
||||
|
||||
def diff_files(rustc_output, asm_snippet):
|
||||
with open(rustc_output, 'r') as rustc_output_file:
|
||||
rustc_output_lines = rustc_output_file.readlines()
|
||||
|
||||
with open(asm_snippet, 'r') as asm_snippet_file:
|
||||
asm_snippet_lines = asm_snippet_file.readlines()
|
||||
|
||||
# remove all empty lines and lines starting with "."
|
||||
rustc_output_lines = [l.strip() for l in rustc_output_lines]
|
||||
rustc_output_lines = [l for l in rustc_output_lines if not l.startswith(".") and not len(l) == 0]
|
||||
asm_snippet_lines = [l.strip() for l in asm_snippet_lines]
|
||||
asm_snippet_lines = [l for l in asm_snippet_lines if not l.startswith(".") and not len(l) == 0]
|
||||
|
||||
results_differ = False
|
||||
|
||||
if len(rustc_output_lines) != len(asm_snippet_lines):
|
||||
results_differ = True
|
||||
|
||||
for line_is, line_should in zip(rustc_output_lines, asm_snippet_lines):
|
||||
if line_is != line_should:
|
||||
results_differ = True
|
||||
|
||||
if results_differ:
|
||||
print "Error: results differ"
|
||||
print "Is:"
|
||||
print rustc_output_lines
|
||||
print "Should:"
|
||||
print asm_snippet_lines
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def check_file(file):
|
||||
compile_file(file)
|
||||
return diff_files(file.path_asm_output, file.path_asm_should)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(description='Checks ASM code')
|
||||
parser.add_argument('-verbose', action="store_true", default=False)
|
||||
parser.add_argument('-extern-crate', dest='extern_crate', default='stdsimd')
|
||||
results = parser.parse_args()
|
||||
|
||||
global verbose
|
||||
if results.verbose:
|
||||
verbose = True
|
||||
|
||||
global extern_crate
|
||||
extern_crate = results.extern_crate
|
||||
|
||||
find_files()
|
||||
|
||||
if verbose:
|
||||
for f in files:
|
||||
print f
|
||||
error = False
|
||||
for f in files:
|
||||
result = check_file(f)
|
||||
if not result:
|
||||
error = True
|
||||
|
||||
if error == True:
|
||||
exit(1)
|
||||
else:
|
||||
exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
//! Advanced Bit Manipulation (ABM) instructions
|
||||
//!
|
||||
//!
|
||||
//! The POPCNT and LZCNT have their own CPUID bits to indicate support.
|
||||
//!
|
||||
//! The references are:
|
||||
|
|
@ -10,12 +10,15 @@
|
|||
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29)
|
||||
//! provides a quick overview of the instructions available.
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
|
||||
/// Counts the leading most significant zero bits.
|
||||
///
|
||||
/// When the operand is zero, it returns its size in bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+lzcnt"]
|
||||
#[cfg_attr(test, assert_instr(lzcnt))]
|
||||
pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
|
||||
|
||||
/// Counts the leading most significant zero bits.
|
||||
|
|
@ -23,16 +26,19 @@ pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
|
|||
/// When the operand is zero, it returns its size in bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+lzcnt"]
|
||||
#[cfg_attr(test, assert_instr(lzcnt))]
|
||||
pub fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 }
|
||||
|
||||
/// Counts the bits that are set.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+popcnt"]
|
||||
#[cfg_attr(test, assert_instr(popcnt))]
|
||||
pub fn _popcnt32(x: u32) -> u32 { x.count_ones() }
|
||||
|
||||
/// Counts the bits that are set.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+popcnt"]
|
||||
#[cfg_attr(test, assert_instr(popcnt))]
|
||||
pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 }
|
||||
|
||||
#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
|
|
|
|||
|
|
@ -7,6 +7,9 @@
|
|||
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI1_.28Bit_Manipulation_Instruction_Set_1.29)
|
||||
//! provides a quick overview of the available instructions.
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
|
||||
#[allow(dead_code)]
|
||||
extern "C" {
|
||||
#[link_name="llvm.x86.bmi.bextr.32"]
|
||||
|
|
@ -19,6 +22,7 @@ extern "C" {
|
|||
/// the least significant bits of the result.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
|
||||
_bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
|
||||
}
|
||||
|
|
@ -27,6 +31,7 @@ pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
|
|||
/// the least significant bits of the result.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
|
||||
_bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
|
||||
}
|
||||
|
|
@ -38,6 +43,7 @@ pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
|
|||
/// extracted, and bits [15,8] specify the length of the range.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
|
||||
unsafe { x86_bmi_bextr_32(a, control) }
|
||||
}
|
||||
|
|
@ -49,6 +55,7 @@ pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
|
|||
/// extracted, and bits [15,8] specify the length of the range.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
|
||||
unsafe { x86_bmi_bextr_64(a, control) }
|
||||
}
|
||||
|
|
@ -56,6 +63,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
|
|||
/// Bitwise logical `AND` of inverted `a` with `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(andn))]
|
||||
pub fn _andn_u32(a: u32, b: u32) -> u32 {
|
||||
!a & b
|
||||
}
|
||||
|
|
@ -63,6 +71,7 @@ pub fn _andn_u32(a: u32, b: u32) -> u32 {
|
|||
/// Bitwise logical `AND` of inverted `a` with `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(andn))]
|
||||
pub fn _andn_u64(a: u64, b: u64) -> u64 {
|
||||
!a & b
|
||||
}
|
||||
|
|
@ -70,6 +79,7 @@ pub fn _andn_u64(a: u64, b: u64) -> u64 {
|
|||
/// Extract lowest set isolated bit.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(blsi))]
|
||||
pub fn _blsi_u32(x: u32) -> u32 {
|
||||
x & x.wrapping_neg()
|
||||
}
|
||||
|
|
@ -77,6 +87,7 @@ pub fn _blsi_u32(x: u32) -> u32 {
|
|||
/// Extract lowest set isolated bit.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(blsi))]
|
||||
pub fn _blsi_u64(x: u64) -> u64 {
|
||||
x & x.wrapping_neg()
|
||||
}
|
||||
|
|
@ -84,6 +95,7 @@ pub fn _blsi_u64(x: u64) -> u64 {
|
|||
/// Get mask up to lowest set bit.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(blsmsk))]
|
||||
pub fn _blsmsk_u32(x: u32) -> u32 {
|
||||
x ^ (x.wrapping_sub(1u32))
|
||||
}
|
||||
|
|
@ -91,6 +103,7 @@ pub fn _blsmsk_u32(x: u32) -> u32 {
|
|||
/// Get mask up to lowest set bit.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(blsmsk))]
|
||||
pub fn _blsmsk_u64(x: u64) -> u64 {
|
||||
x ^ (x.wrapping_sub(1u64))
|
||||
}
|
||||
|
|
@ -100,6 +113,7 @@ pub fn _blsmsk_u64(x: u64) -> u64 {
|
|||
/// If `x` is sets CF.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(blsr))]
|
||||
pub fn _blsr_u32(x: u32) -> u32 {
|
||||
x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -109,6 +123,7 @@ pub fn _blsr_u32(x: u32) -> u32 {
|
|||
/// If `x` is sets CF.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(blsr))]
|
||||
pub fn _blsr_u64(x: u64) -> u64 {
|
||||
x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -118,6 +133,7 @@ pub fn _blsr_u64(x: u64) -> u64 {
|
|||
/// When the source operand is 0, it returns its size in bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
pub fn _tzcnt_u16(x: u16) -> u16 {
|
||||
x.trailing_zeros() as u16
|
||||
}
|
||||
|
|
@ -127,6 +143,7 @@ pub fn _tzcnt_u16(x: u16) -> u16 {
|
|||
/// When the source operand is 0, it returns its size in bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
pub fn _tzcnt_u32(x: u32) -> u32 {
|
||||
x.trailing_zeros()
|
||||
}
|
||||
|
|
@ -136,6 +153,7 @@ pub fn _tzcnt_u32(x: u32) -> u32 {
|
|||
/// When the source operand is 0, it returns its size in bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
pub fn _tzcnt_u64(x: u64) -> u64 {
|
||||
x.trailing_zeros() as u64
|
||||
}
|
||||
|
|
@ -145,6 +163,7 @@ pub fn _tzcnt_u64(x: u64) -> u64 {
|
|||
/// When the source operand is 0, it returns its size in bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
pub fn _mm_tzcnt_u32(x: u32) -> u32 {
|
||||
x.trailing_zeros()
|
||||
}
|
||||
|
|
@ -154,6 +173,7 @@ pub fn _mm_tzcnt_u32(x: u32) -> u32 {
|
|||
/// When the source operand is 0, it returns its size in bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi"]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
pub fn _mm_tzcnt_u64(x: u64) -> u64 {
|
||||
x.trailing_zeros() as u64
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,9 @@
|
|||
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29)
|
||||
//! provides a quick overview of the available instructions.
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
|
||||
/// Unsigned multiply without affecting flags.
|
||||
///
|
||||
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
|
||||
|
|
@ -51,6 +54,7 @@ extern "C" {
|
|||
/// Zero higher bits of `a` >= `index`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[cfg_attr(test, assert_instr(bzhi))]
|
||||
pub fn _bzhi_u32(a: u32, index: u32) -> u32 {
|
||||
unsafe { x86_bmi2_bzhi_32(a, index) }
|
||||
}
|
||||
|
|
@ -58,6 +62,7 @@ pub fn _bzhi_u32(a: u32, index: u32) -> u32 {
|
|||
/// Zero higher bits of `a` >= `index`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[cfg_attr(test, assert_instr(bzhi))]
|
||||
pub fn _bzhi_u64(a: u64, index: u64) -> u64 {
|
||||
unsafe { x86_bmi2_bzhi_64(a, index) }
|
||||
}
|
||||
|
|
@ -67,6 +72,7 @@ pub fn _bzhi_u64(a: u64, index: u64) -> u64 {
|
|||
/// specified by the `mask`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[cfg_attr(test, assert_instr(pdep))]
|
||||
pub fn _pdep_u32(a: u32, mask: u32) -> u32 {
|
||||
unsafe { x86_bmi2_pdep_32(a, mask) }
|
||||
}
|
||||
|
|
@ -75,6 +81,7 @@ pub fn _pdep_u32(a: u32, mask: u32) -> u32 {
|
|||
/// specified by the `mask`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[cfg_attr(test, assert_instr(pdep))]
|
||||
pub fn _pdep_u64(a: u64, mask: u64) -> u64 {
|
||||
unsafe { x86_bmi2_pdep_64(a, mask) }
|
||||
}
|
||||
|
|
@ -83,6 +90,7 @@ pub fn _pdep_u64(a: u64, mask: u64) -> u64 {
|
|||
/// order bit positions of the result.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[cfg_attr(test, assert_instr(pext))]
|
||||
pub fn _pext_u32(a: u32, mask: u32) -> u32 {
|
||||
unsafe { x86_bmi2_pext_32(a, mask) }
|
||||
}
|
||||
|
|
@ -91,6 +99,7 @@ pub fn _pext_u32(a: u32, mask: u32) -> u32 {
|
|||
/// order bit positions of the result.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+bmi2"]
|
||||
#[cfg_attr(test, assert_instr(pext))]
|
||||
pub fn _pext_u64(a: u64, mask: u64) -> u64 {
|
||||
unsafe { x86_bmi2_pext_64(a, mask) }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@ use x86::__m128i;
|
|||
use v128::*;
|
||||
use v64::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
|
||||
/// Provide a hint to the processor that the code sequence is a spin-wait loop.
|
||||
///
|
||||
/// This can help improve the performance and power consumption of spin-wait
|
||||
|
|
@ -89,6 +92,7 @@ pub fn _mm_adds_epi8(a: i8x16, b: i8x16) -> i8x16 {
|
|||
/// Add packed 16-bit integers in `a` and `b` using saturation.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
#[cfg_attr(test, assert_instr(paddsw))]
|
||||
pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 {
|
||||
unsafe { paddsw(a, b) }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,9 @@
|
|||
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_.28Trailing_Bit_Manipulation.29)
|
||||
//! provides a quick overview of the available instructions.
|
||||
|
||||
#[cfg(test)]
|
||||
use assert_instr::assert_instr;
|
||||
|
||||
// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32
|
||||
/*
|
||||
#[allow(dead_code)]
|
||||
|
|
@ -20,7 +23,7 @@ extern "C" {
|
|||
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
|
||||
/// the least significant bits of the result.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[target_feature = "+tbm"]
|
||||
pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
|
||||
_bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
|
||||
}
|
||||
|
|
@ -28,7 +31,7 @@ pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
|
|||
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
|
||||
/// the least significant bits of the result.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[target_feature = "+tbm"]
|
||||
pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
|
||||
_bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
|
||||
}
|
||||
|
|
@ -61,6 +64,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
|
|||
/// If there is no zero bit in `x`, it returns zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blcfill))]
|
||||
pub fn _blcfill_u32(x: u32) -> u32 {
|
||||
x & (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -70,6 +74,7 @@ pub fn _blcfill_u32(x: u32) -> u32 {
|
|||
/// If there is no zero bit in `x`, it returns zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blcfill))]
|
||||
pub fn _blcfill_u64(x: u64) -> u64 {
|
||||
x & (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -79,6 +84,7 @@ pub fn _blcfill_u64(x: u64) -> u64 {
|
|||
/// If there is no zero bit in `x`, it sets all bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blci))]
|
||||
pub fn _blci_u32(x: u32) -> u32 {
|
||||
x | !(x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -88,6 +94,7 @@ pub fn _blci_u32(x: u32) -> u32 {
|
|||
/// If there is no zero bit in `x`, it sets all bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blci))]
|
||||
pub fn _blci_u64(x: u64) -> u64 {
|
||||
x | !(x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -97,6 +104,7 @@ pub fn _blci_u64(x: u64) -> u64 {
|
|||
/// If there is no zero bit in `x`, it returns zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blcic))]
|
||||
pub fn _blcic_u32(x: u32) -> u32 {
|
||||
!x & (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -106,6 +114,7 @@ pub fn _blcic_u32(x: u32) -> u32 {
|
|||
/// If there is no zero bit in `x`, it returns zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blcic))]
|
||||
pub fn _blcic_u64(x: u64) -> u64 {
|
||||
!x & (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -115,6 +124,7 @@ pub fn _blcic_u64(x: u64) -> u64 {
|
|||
/// If there is no zero bit in `x`, it sets all the bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blcmsk))]
|
||||
pub fn _blcmsk_u32(x: u32) -> u32 {
|
||||
x ^ (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -124,6 +134,7 @@ pub fn _blcmsk_u32(x: u32) -> u32 {
|
|||
/// If there is no zero bit in `x`, it sets all the bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blcmsk))]
|
||||
pub fn _blcmsk_u64(x: u64) -> u64 {
|
||||
x ^ (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -133,6 +144,7 @@ pub fn _blcmsk_u64(x: u64) -> u64 {
|
|||
/// If there is no zero bit in `x`, it returns `x`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blcs))]
|
||||
pub fn _blcs_u32(x: u32) -> u32 {
|
||||
x | (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -142,6 +154,7 @@ pub fn _blcs_u32(x: u32) -> u32 {
|
|||
/// If there is no zero bit in `x`, it returns `x`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blcs))]
|
||||
pub fn _blcs_u64(x: u64) -> u64 {
|
||||
x | x.wrapping_add(1)
|
||||
}
|
||||
|
|
@ -151,6 +164,7 @@ pub fn _blcs_u64(x: u64) -> u64 {
|
|||
/// If there is no set bit in `x`, it sets all the bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blsfill))]
|
||||
pub fn _blsfill_u32(x: u32) -> u32 {
|
||||
x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -160,6 +174,7 @@ pub fn _blsfill_u32(x: u32) -> u32 {
|
|||
/// If there is no set bit in `x`, it sets all the bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blsfill))]
|
||||
pub fn _blsfill_u64(x: u64) -> u64 {
|
||||
x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -169,6 +184,7 @@ pub fn _blsfill_u64(x: u64) -> u64 {
|
|||
/// If there is no set bit in `x`, it sets all the bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blsic))]
|
||||
pub fn _blsic_u32(x: u32) -> u32 {
|
||||
!x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -178,6 +194,7 @@ pub fn _blsic_u32(x: u32) -> u32 {
|
|||
/// If there is no set bit in `x`, it sets all the bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(blsic))]
|
||||
pub fn _blsic_u64(x: u64) -> u64 {
|
||||
!x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -188,6 +205,7 @@ pub fn _blsic_u64(x: u64) -> u64 {
|
|||
/// If the least significant bit of `x` is 0, it sets all bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(t1mskc))]
|
||||
pub fn _t1mskc_u32(x: u32) -> u32 {
|
||||
!x | (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -198,6 +216,7 @@ pub fn _t1mskc_u32(x: u32) -> u32 {
|
|||
/// If the least significant bit of `x` is 0, it sets all bits.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(t1mskc))]
|
||||
pub fn _t1mskc_u64(x: u64) -> u64 {
|
||||
!x | (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -208,6 +227,7 @@ pub fn _t1mskc_u64(x: u64) -> u64 {
|
|||
/// If the least significant bit of `x` is 1, it returns zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(tzmsk))]
|
||||
pub fn _tzmsk_u32(x: u32) -> u32 {
|
||||
!x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -218,6 +238,7 @@ pub fn _tzmsk_u32(x: u32) -> u32 {
|
|||
/// If the least significant bit of `x` is 1, it returns zero.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+tbm"]
|
||||
#[cfg_attr(test, assert_instr(tzmsk))]
|
||||
pub fn _tzmsk_u64(x: u64) -> u64 {
|
||||
!x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue