Migrate existing tests to #[assert_instr]

Also add some documentation to the assert_instr infrastructure
This commit is contained in:
Alex Crichton 2017-09-20 10:28:00 -07:00
parent 5e8f0e72b5
commit 124f731ce2
49 changed files with 97 additions and 644 deletions

View file

@ -14,6 +14,7 @@ example for `_mm_adds_epi16`:
/// Add packed 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(paddsw))]
pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 {
unsafe { paddsw(a, b) }
}
@ -32,6 +33,10 @@ Let's break this down:
support `sse2`, the compiler will still generate code for `_mm_adds_epi16`
*as if* `sse2` support existed. Without this attribute, the compiler might
not generate the intended CPU instruction.
* The `#[cfg_attr(test, assert_instr(paddsw))]` attribute indicates that when
we're testing the crate we'll assert that the `paddsw` instruction is
generated inside this function, ensuring that the SIMD intrinsic truly is an
intrinsic for the instruction!
* The types of the vectors given to the intrinsic should generally match the
types as provided in the vendor interface. We'll talk about this more below.
* The implementation of the vendor intrinsic is generally very simple.
@ -40,7 +45,7 @@ Let's break this down:
compiler intrinsic (in this case, `paddsw`) when one is available. More on
this below as well.
Once a function has been added, you should add at least one test for basic
Once a function has been added, you should also add at least one test for basic
functionality. Here's an example for `_mm_adds_epi16`:
```rust

View file

@ -1,12 +0,0 @@
_bzhi_u32:
pushq %rbp
movq %rsp, %rbp
bzhil %esi, %edi, %eax
popq %rbp
retq
_bzhi_u64:
pushq %rbp
movq %rsp, %rbp
bzhiq %rsi, %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn bzhi_u32(x: u32, mask: u32) -> u32 {
stdsimd::vendor::_bzhi_u32(x, mask)
}
#[no_mangle]
pub fn bzhi_u64(x: u64, mask: u64) -> u64 {
stdsimd::vendor::_bzhi_u64(x, mask)
}

View file

@ -1,17 +0,0 @@
_umulx_u32:
pushq %rbp
movq %rsp, %rbp
movl %edi, %ecx
movl %esi, %eax
imulq %rcx, %rax
popq %rbp
retq
_umulx_u64:
pushq %rbp
movq %rsp, %rbp
mulxq %rsi, %rcx, %rax
movq %rcx, (%rdi)
movq %rax, 8(%rdi)
movq %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn umulx_u32(x: u32, y: u32) -> (u32, u32) {
stdsimd::vendor::_mulx_u32(x, y)
}
#[no_mangle]
pub fn umulx_u64(x: u64, y: u64) -> (u64, u64) {
stdsimd::vendor::_mulx_u64(x, y)
}

View file

@ -1,12 +0,0 @@
_pdep_u32:
pushq %rbp
movq %rsp, %rbp
pdepl %esi, %edi, %eax
popq %rbp
retq
_pdep_u64:
pushq %rbp
movq %rsp, %rbp
pdepq %rsi, %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn pdep_u32(x: u32, mask: u32) -> u32 {
stdsimd::vendor::_pdep_u32(x, mask)
}
#[no_mangle]
pub fn pdep_u64(x: u64, mask: u64) -> u64 {
stdsimd::vendor::_pdep_u64(x, mask)
}

View file

@ -1,12 +0,0 @@
_pext_u32:
pushq %rbp
movq %rsp, %rbp
pextl %esi, %edi, %eax
popq %rbp
retq
_pext_u64:
pushq %rbp
movq %rsp, %rbp
pextq %rsi, %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn pext_u32(x: u32, mask: u32) -> u32 {
stdsimd::vendor::_pext_u32(x, mask)
}
#[no_mangle]
pub fn pext_u64(x: u64, mask: u64) -> u64 {
stdsimd::vendor::_pext_u64(x, mask)
}

View file

@ -1,12 +0,0 @@
_andn_u32:
pushq %rbp
movq %rsp, %rbp
andnl %esi, %edi, %eax
popq %rbp
retq
_andn_u64:
pushq %rbp
movq %rsp, %rbp
andnq %rsi, %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn andn_u32(x: u32, y: u32) -> u32 {
stdsimd::vendor::_andn_u32(x, y)
}
#[no_mangle]
pub fn andn_u64(x: u64, y: u64) -> u64 {
stdsimd::vendor::_andn_u64(x, y)
}

View file

@ -1,32 +0,0 @@
_bextr_u32:
pushq %rbp
movq %rsp, %rbp
movzbl %sil, %eax
shll $8, %edx
movzwl %dx, %ecx
orl %eax, %ecx
bextrl %ecx, %edi, %eax
popq %rbp
retq
_bextr_u64:
pushq %rbp
movq %rsp, %rbp
movzbl %sil, %eax
shlq $8, %rdx
movzwl %dx, %ecx
orq %rax, %rcx
bextrq %rcx, %rdi, %rax
popq %rbp
retq
_bextr2_u32:
pushq %rbp
movq %rsp, %rbp
bextrl %esi, %edi, %eax
popq %rbp
retq
_bextr2_u64:
pushq %rbp
movq %rsp, %rbp
bextrq %rsi, %rdi, %rax
popq %rbp
retq

View file

@ -1,21 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn bextr_u32(x: u32, y: u32, z: u32) -> u32 {
stdsimd::vendor::_bextr_u32(x, y, z)
}
#[no_mangle]
pub fn bextr_u64(x: u64, y: u64, z: u64) -> u64 {
stdsimd::vendor::_bextr_u64(x, y, z)
}
#[no_mangle]
pub fn bextr2_u32(x: u32, y: u32) -> u32 {
stdsimd::vendor::_bextr2_u32(x, y)
}
#[no_mangle]
pub fn bextr2_u64(x: u64, y: u64) -> u64 {
stdsimd::vendor::_bextr2_u64(x, y)
}

View file

@ -1,12 +0,0 @@
_blsi_u32:
pushq %rbp
movq %rsp, %rbp
blsil %edi, %eax
popq %rbp
retq
_blsi_u64:
pushq %rbp
movq %rsp, %rbp
blsiq %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn blsi_u32(x: u32) -> u32 {
stdsimd::vendor::_blsi_u32(x)
}
#[no_mangle]
pub fn blsi_u64(x: u64) -> u64 {
stdsimd::vendor::_blsi_u64(x)
}

View file

@ -1,12 +0,0 @@
_blsr_u32:
pushq %rbp
movq %rsp, %rbp
blsrl %edi, %eax
popq %rbp
retq
_blsr_u64:
pushq %rbp
movq %rsp, %rbp
blsrq %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn blsr_u32(x: u32) -> u32 {
stdsimd::vendor::_blsr_u32(x)
}
#[no_mangle]
pub fn blsr_u64(x: u64) -> u64 {
stdsimd::vendor::_blsr_u64(x)
}

View file

@ -1,12 +0,0 @@
_tzcnt_u32:
pushq %rbp
movq %rsp, %rbp
tzcntl %edi, %eax
popq %rbp
retq
_tzcnt_u64:
pushq %rbp
movq %rsp, %rbp
tzcntq %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn tzcnt_u32(x: u32) -> u32 {
stdsimd::vendor::_tzcnt_u32(x)
}
#[no_mangle]
pub fn tzcnt_u64(x: u64) -> u64 {
stdsimd::vendor::_tzcnt_u64(x)
}

View file

@ -1,12 +0,0 @@
_lzcnt_u32:
pushq %rbp
movq %rsp, %rbp
lzcntl %edi, %eax
popq %rbp
retq
_lzcnt_u64:
pushq %rbp
movq %rsp, %rbp
lzcntq %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn lzcnt_u32(x: u32) -> u32 {
stdsimd::vendor::_lzcnt_u32(x)
}
#[no_mangle]
pub fn lzcnt_u64(x: u64) -> u64 {
stdsimd::vendor::_lzcnt_u64(x)
}

View file

@ -1,12 +0,0 @@
_popcnt_u32:
pushq %rbp
movq %rsp, %rbp
popcntl %edi, %eax
popq %rbp
retq
_popcnt_u64:
pushq %rbp
movq %rsp, %rbp
popcntq %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn popcnt_u32(x: u32) -> u32 {
stdsimd::vendor::_popcnt32(x)
}
#[no_mangle]
pub fn popcnt_u64(x: u64) -> u64 {
stdsimd::vendor::_popcnt64(x)
}

View file

@ -1,12 +0,0 @@
_blcfill_u32:
pushq %rbp
movq %rsp, %rbp
blcfill %edi, %eax
popq %rbp
retq
_blcfill_u64:
pushq %rbp
movq %rsp, %rbp
blcfill %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn blcfill_u32(x: u32) -> u32 {
stdsimd::vendor::_blcfill_u32(x)
}
#[no_mangle]
pub fn blcfill_u64(x: u64) -> u64 {
stdsimd::vendor::_blcfill_u64(x)
}

View file

@ -1,12 +0,0 @@
_blci_u32:
pushq %rbp
movq %rsp, %rbp
blci %edi, %eax
popq %rbp
retq
_blci_u64:
pushq %rbp
movq %rsp, %rbp
blci %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn blci_u32(x: u32) -> u32 {
stdsimd::vendor::_blci_u32(x)
}
#[no_mangle]
pub fn blci_u64(x: u64) -> u64 {
stdsimd::vendor::_blci_u64(x)
}

View file

@ -1,12 +0,0 @@
_blcic_u32:
pushq %rbp
movq %rsp, %rbp
blcic %edi, %eax
popq %rbp
retq
_blcic_u64:
pushq %rbp
movq %rsp, %rbp
blcic %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn blcic_u32(x: u32) -> u32 {
stdsimd::vendor::_blcic_u32(x)
}
#[no_mangle]
pub fn blcic_u64(x: u64) -> u64 {
stdsimd::vendor::_blcic_u64(x)
}

View file

@ -1,12 +0,0 @@
_blcmsk_u32:
pushq %rbp
movq %rsp, %rbp
blcmsk %edi, %eax
popq %rbp
retq
_blcmsk_u64:
pushq %rbp
movq %rsp, %rbp
blcmsk %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn blcmsk_u32(x: u32) -> u32 {
stdsimd::vendor::_blcmsk_u32(x)
}
#[no_mangle]
pub fn blcmsk_u64(x: u64) -> u64 {
stdsimd::vendor::_blcmsk_u64(x)
}

View file

@ -1,12 +0,0 @@
_blcs_u32:
pushq %rbp
movq %rsp, %rbp
blcs %edi, %eax
popq %rbp
retq
_blcs_u64:
pushq %rbp
movq %rsp, %rbp
blcs %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn blcs_u32(x: u32) -> u32 {
stdsimd::vendor::_blcs_u32(x)
}
#[no_mangle]
pub fn blcs_u64(x: u64) -> u64 {
stdsimd::vendor::_blcs_u64(x)
}

View file

@ -1,12 +0,0 @@
_blsfill_u32:
pushq %rbp
movq %rsp, %rbp
blsfill %edi, %eax
popq %rbp
retq
_blsfill_u64:
pushq %rbp
movq %rsp, %rbp
blsfill %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn blsfill_u32(x: u32) -> u32 {
stdsimd::vendor::_blsfill_u32(x)
}
#[no_mangle]
pub fn blsfill_u64(x: u64) -> u64 {
stdsimd::vendor::_blsfill_u64(x)
}

View file

@ -1,12 +0,0 @@
_blsic_u32:
pushq %rbp
movq %rsp, %rbp
blsic %edi, %eax
popq %rbp
retq
_blsic_u64:
pushq %rbp
movq %rsp, %rbp
blsic %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn blsic_u32(x: u32) -> u32 {
stdsimd::vendor::_blsic_u32(x)
}
#[no_mangle]
pub fn blsic_u64(x: u64) -> u64 {
stdsimd::vendor::_blsic_u64(x)
}

View file

@ -1,12 +0,0 @@
_t1mskc_u32:
pushq %rbp
movq %rsp, %rbp
t1mskc %edi, %eax
popq %rbp
retq
_t1mskc_u64:
pushq %rbp
movq %rsp, %rbp
t1mskc %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn t1mskc_u32(x: u32) -> u32 {
stdsimd::vendor::_t1mskc_u32(x)
}
#[no_mangle]
pub fn t1mskc_u64(x: u64) -> u64 {
stdsimd::vendor::_t1mskc_u64(x)
}

View file

@ -1,12 +0,0 @@
_tzmsk_u32:
pushq %rbp
movq %rsp, %rbp
tzmsk %edi, %eax
popq %rbp
retq
_tzmsk_u64:
pushq %rbp
movq %rsp, %rbp
tzmsk %rdi, %rax
popq %rbp
retq

View file

@ -1,11 +0,0 @@
extern crate stdsimd;
#[no_mangle]
pub fn tzmsk_u32(x: u32) -> u32 {
stdsimd::vendor::_tzmsk_u32(x)
}
#[no_mangle]
pub fn tzmsk_u64(x: u64) -> u64 {
stdsimd::vendor::_tzmsk_u64(x)
}

View file

@ -1,3 +1,13 @@
//! Implementation of the `#[assert_instr]` macro
//!
//! This macro is used when testing the `stdsimd` crate and is used to generate
//! test cases to assert that functions do indeed contain the instructions that
//! we're expecting them to contain.
//!
//! The procedural macro here is relatively simple, it simply appends a
//! `#[test]` function to the original token stream which asserts that the
//! function itself contains the relevant instruction.
#![feature(proc_macro)]
extern crate proc_macro;

View file

@ -1,3 +1,9 @@
//! Runtime support needed for the `#![assert_instr]` macro
//!
//! This basically just disassembles the current executable and then parses the
//! output once globally and then provides the `assert` function which makes
//! assertions about the disassembly of a function.
#![feature(proc_macro)]
extern crate assert_instr_macro;
@ -211,21 +217,30 @@ fn normalize(symbol: &str) -> String {
}
}
/// Main entry point for this crate, called by the `#[assert_instr]` macro.
///
/// This asserts that the function at `fnptr` contains the instruction
/// `expected` provided.
pub fn assert(fnptr: usize, expected: &str) {
// Translate this function pointer to a symbolic name that we'd have found
// in the disassembly.
let mut sym = None;
backtrace::resolve(fnptr as *mut _, |name| {
sym = name.name().and_then(|s| s.as_str()).map(normalize);
});
let sym = match sym {
Some(s) => s,
None => panic!("failed to get symbol of function pointer: {}", fnptr),
};
// Find our function in the list of all disassembled functions
let functions = &DISASSEMBLY.get(&sym)
.expect(&format!("failed to find disassembly of {}", sym));
assert_eq!(functions.len(), 1);
let function = &functions[0];
// Look for `expected` as the first part of any instruction in this
// function, returning if we do indeed find it.
for instr in function.instrs.iter() {
if let Some(part) = instr.parts.get(0) {
if part == expected {
@ -234,6 +249,8 @@ pub fn assert(fnptr: usize, expected: &str) {
}
}
// Help debug by printing out the found disassembly, and then panic as we
// didn't find the instruction.
println!("disassembly for {}: ", sym);
for (i, instr) in function.instrs.iter().enumerate() {
print!("\t{:2}: ", i);

View file

@ -1,144 +0,0 @@
#!/usr/bin/env python
# Script to check the assembly generated
import os, sys
import os.path
from subprocess import Popen, PIPE
import argparse
asm_dir = './asm'
files = set()
verbose = False
extern_crate = None
def arm_triplet(arch) :
triples = { 'armv7' : 'armv7-unknown-linux-gnueabihf',
'armv8' : 'aarch64-unknown-linux-gnu' }
return triples[arch]
class File(object):
def __init__(self, path_rs):
self.path_rs = path_rs
self.path_asm_should = os.path.join(os.path.splitext(path_rs)[0] + ".asm")
self.path_asm_output = os.path.join(os.path.splitext(path_rs)[0] + "_output.asm")
self.path_llvmir_output = os.path.join(os.path.splitext(path_rs)[0] + "_ir.ll")
self.name = os.path.splitext(os.path.basename(path_rs))[0]
self.feature = self.name.split("_")[1]
self.arch = self.name.split("_")[0]
if self.feature == "none":
self.feature = None
def __str__(self):
return "name: " + self.name + ", path-rs: " + self.path_rs + ", path-asm: " + self.path_asm_should + ', arch: ' + self.arch + ", feature: " + str(self.feature)
def __hash__(self):
return hash(self.name)
def find_files():
for dirpath, dirnames, filenames in os.walk(asm_dir):
for filename in [f for f in filenames if f.endswith(".rs")]:
files.add(File(os.path.join(dirpath, filename)))
def call(args):
if verbose:
print "command: " + str(args)
p = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True)
lines = p.stdout.readlines()
if verbose and p.returncode != 0:
error = p.stderr.readlines()
print >>sys.stdout, lines
print >>sys.stderr, "ERROR: %s" % error
def compile_file(file):
if verbose:
print "Checking: " + str(file) + "..."
cargo_args = 'cargo rustc --verbose --release -- -C panic=abort '
if file.feature:
cargo_args = cargo_args + '-C target-feature=+{}'.format(file.feature)
if file.arch == 'armv7' or file.arch == 'armv8':
cargo_args = cargo_args + '--target={}'.format(arm_triplet(file.arch))
call(str(cargo_args))
rustc_args = 'rustc --verbose -C opt-level=3 -C panic="abort" --extern %s=target/release/lib%s.rlib --crate-type lib' % (extern_crate, extern_crate);
if file.feature:
rustc_args = rustc_args + ' -C target-feature=+{}'.format(file.feature)
if file.arch == 'armv7' or file.arch == 'armv8':
rustc_args = rustc_args + ' --target={}'.format(arm_triplet(file.arch))
rustc_args_asm = rustc_args + ' --emit asm {} -o {}'.format(file.path_rs, file.path_asm_output)
call(rustc_args_asm)
rustc_args_ll = rustc_args + ' --emit llvm-ir {} -o {}'.format(file.path_rs, file.path_llvmir_output)
call(rustc_args_ll)
if verbose:
print "...done!"
def diff_files(rustc_output, asm_snippet):
with open(rustc_output, 'r') as rustc_output_file:
rustc_output_lines = rustc_output_file.readlines()
with open(asm_snippet, 'r') as asm_snippet_file:
asm_snippet_lines = asm_snippet_file.readlines()
# remove all empty lines and lines starting with "."
rustc_output_lines = [l.strip() for l in rustc_output_lines]
rustc_output_lines = [l for l in rustc_output_lines if not l.startswith(".") and not len(l) == 0]
asm_snippet_lines = [l.strip() for l in asm_snippet_lines]
asm_snippet_lines = [l for l in asm_snippet_lines if not l.startswith(".") and not len(l) == 0]
results_differ = False
if len(rustc_output_lines) != len(asm_snippet_lines):
results_differ = True
for line_is, line_should in zip(rustc_output_lines, asm_snippet_lines):
if line_is != line_should:
results_differ = True
if results_differ:
print "Error: results differ"
print "Is:"
print rustc_output_lines
print "Should:"
print asm_snippet_lines
return False
return True
def check_file(file):
compile_file(file)
return diff_files(file.path_asm_output, file.path_asm_should)
def main():
parser = argparse.ArgumentParser(description='Checks ASM code')
parser.add_argument('-verbose', action="store_true", default=False)
parser.add_argument('-extern-crate', dest='extern_crate', default='stdsimd')
results = parser.parse_args()
global verbose
if results.verbose:
verbose = True
global extern_crate
extern_crate = results.extern_crate
find_files()
if verbose:
for f in files:
print f
error = False
for f in files:
result = check_file(f)
if not result:
error = True
if error == True:
exit(1)
else:
exit(0)
if __name__ == "__main__":
main()

View file

@ -1,5 +1,5 @@
//! Advanced Bit Manipulation (ABM) instructions
//!
//!
//! The POPCNT and LZCNT have their own CPUID bits to indicate support.
//!
//! The references are:
@ -10,12 +10,15 @@
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29)
//! provides a quick overview of the instructions available.
#[cfg(test)]
use assert_instr::assert_instr;
/// Counts the leading most significant zero bits.
///
/// When the operand is zero, it returns its size in bits.
#[inline(always)]
#[target_feature = "+lzcnt"]
#[cfg_attr(test, assert_instr(lzcnt))]
pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
/// Counts the leading most significant zero bits.
@ -23,16 +26,19 @@ pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
/// When the operand is zero, it returns its size in bits.
#[inline(always)]
#[target_feature = "+lzcnt"]
#[cfg_attr(test, assert_instr(lzcnt))]
pub fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 }
/// Counts the bits that are set.
#[inline(always)]
#[target_feature = "+popcnt"]
#[cfg_attr(test, assert_instr(popcnt))]
pub fn _popcnt32(x: u32) -> u32 { x.count_ones() }
/// Counts the bits that are set.
#[inline(always)]
#[target_feature = "+popcnt"]
#[cfg_attr(test, assert_instr(popcnt))]
pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 }
#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))]

View file

@ -7,6 +7,9 @@
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI1_.28Bit_Manipulation_Instruction_Set_1.29)
//! provides a quick overview of the available instructions.
#[cfg(test)]
use assert_instr::assert_instr;
#[allow(dead_code)]
extern "C" {
#[link_name="llvm.x86.bmi.bextr.32"]
@ -19,6 +22,7 @@ extern "C" {
/// the least significant bits of the result.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(bextr))]
pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
}
@ -27,6 +31,7 @@ pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
/// the least significant bits of the result.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(bextr))]
pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
_bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
}
@ -38,6 +43,7 @@ pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
/// extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(bextr))]
pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
unsafe { x86_bmi_bextr_32(a, control) }
}
@ -49,6 +55,7 @@ pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
/// extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(bextr))]
pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
unsafe { x86_bmi_bextr_64(a, control) }
}
@ -56,6 +63,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
/// Bitwise logical `AND` of inverted `a` with `b`.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(andn))]
pub fn _andn_u32(a: u32, b: u32) -> u32 {
!a & b
}
@ -63,6 +71,7 @@ pub fn _andn_u32(a: u32, b: u32) -> u32 {
/// Bitwise logical `AND` of inverted `a` with `b`.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(andn))]
pub fn _andn_u64(a: u64, b: u64) -> u64 {
!a & b
}
@ -70,6 +79,7 @@ pub fn _andn_u64(a: u64, b: u64) -> u64 {
/// Extract lowest set isolated bit.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsi))]
pub fn _blsi_u32(x: u32) -> u32 {
x & x.wrapping_neg()
}
@ -77,6 +87,7 @@ pub fn _blsi_u32(x: u32) -> u32 {
/// Extract lowest set isolated bit.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsi))]
pub fn _blsi_u64(x: u64) -> u64 {
x & x.wrapping_neg()
}
@ -84,6 +95,7 @@ pub fn _blsi_u64(x: u64) -> u64 {
/// Get mask up to lowest set bit.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsmsk))]
pub fn _blsmsk_u32(x: u32) -> u32 {
x ^ (x.wrapping_sub(1u32))
}
@ -91,6 +103,7 @@ pub fn _blsmsk_u32(x: u32) -> u32 {
/// Get mask up to lowest set bit.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsmsk))]
pub fn _blsmsk_u64(x: u64) -> u64 {
x ^ (x.wrapping_sub(1u64))
}
@ -100,6 +113,7 @@ pub fn _blsmsk_u64(x: u64) -> u64 {
/// If `x` is sets CF.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsr))]
pub fn _blsr_u32(x: u32) -> u32 {
x & (x.wrapping_sub(1))
}
@ -109,6 +123,7 @@ pub fn _blsr_u32(x: u32) -> u32 {
/// If `x` is sets CF.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsr))]
pub fn _blsr_u64(x: u64) -> u64 {
x & (x.wrapping_sub(1))
}
@ -118,6 +133,7 @@ pub fn _blsr_u64(x: u64) -> u64 {
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _tzcnt_u16(x: u16) -> u16 {
x.trailing_zeros() as u16
}
@ -127,6 +143,7 @@ pub fn _tzcnt_u16(x: u16) -> u16 {
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _tzcnt_u32(x: u32) -> u32 {
x.trailing_zeros()
}
@ -136,6 +153,7 @@ pub fn _tzcnt_u32(x: u32) -> u32 {
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _tzcnt_u64(x: u64) -> u64 {
x.trailing_zeros() as u64
}
@ -145,6 +163,7 @@ pub fn _tzcnt_u64(x: u64) -> u64 {
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _mm_tzcnt_u32(x: u32) -> u32 {
x.trailing_zeros()
}
@ -154,6 +173,7 @@ pub fn _mm_tzcnt_u32(x: u32) -> u32 {
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _mm_tzcnt_u64(x: u64) -> u64 {
x.trailing_zeros() as u64
}

View file

@ -7,6 +7,9 @@
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29)
//! provides a quick overview of the available instructions.
#[cfg(test)]
use assert_instr::assert_instr;
/// Unsigned multiply without affecting flags.
///
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
@ -51,6 +54,7 @@ extern "C" {
/// Zero higher bits of `a` >= `index`.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(bzhi))]
pub fn _bzhi_u32(a: u32, index: u32) -> u32 {
unsafe { x86_bmi2_bzhi_32(a, index) }
}
@ -58,6 +62,7 @@ pub fn _bzhi_u32(a: u32, index: u32) -> u32 {
/// Zero higher bits of `a` >= `index`.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(bzhi))]
pub fn _bzhi_u64(a: u64, index: u64) -> u64 {
unsafe { x86_bmi2_bzhi_64(a, index) }
}
@ -67,6 +72,7 @@ pub fn _bzhi_u64(a: u64, index: u64) -> u64 {
/// specified by the `mask`.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(pdep))]
pub fn _pdep_u32(a: u32, mask: u32) -> u32 {
unsafe { x86_bmi2_pdep_32(a, mask) }
}
@ -75,6 +81,7 @@ pub fn _pdep_u32(a: u32, mask: u32) -> u32 {
/// specified by the `mask`.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(pdep))]
pub fn _pdep_u64(a: u64, mask: u64) -> u64 {
unsafe { x86_bmi2_pdep_64(a, mask) }
}
@ -83,6 +90,7 @@ pub fn _pdep_u64(a: u64, mask: u64) -> u64 {
/// order bit positions of the result.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(pext))]
pub fn _pext_u32(a: u32, mask: u32) -> u32 {
unsafe { x86_bmi2_pext_32(a, mask) }
}
@ -91,6 +99,7 @@ pub fn _pext_u32(a: u32, mask: u32) -> u32 {
/// order bit positions of the result.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(pext))]
pub fn _pext_u64(a: u64, mask: u64) -> u64 {
unsafe { x86_bmi2_pext_64(a, mask) }
}

View file

@ -9,6 +9,9 @@ use x86::__m128i;
use v128::*;
use v64::*;
#[cfg(test)]
use assert_instr::assert_instr;
/// Provide a hint to the processor that the code sequence is a spin-wait loop.
///
/// This can help improve the performance and power consumption of spin-wait
@ -89,6 +92,7 @@ pub fn _mm_adds_epi8(a: i8x16, b: i8x16) -> i8x16 {
/// Add packed 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(paddsw))]
pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 {
unsafe { paddsw(a, b) }
}

View file

@ -7,6 +7,9 @@
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_.28Trailing_Bit_Manipulation.29)
//! provides a quick overview of the available instructions.
#[cfg(test)]
use assert_instr::assert_instr;
// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32
/*
#[allow(dead_code)]
@ -20,7 +23,7 @@ extern "C" {
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
#[target_feature = "+tbm"]
#[target_feature = "+tbm"]
pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
}
@ -28,7 +31,7 @@ pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
#[target_feature = "+tbm"]
#[target_feature = "+tbm"]
pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
_bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
}
@ -61,6 +64,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcfill))]
pub fn _blcfill_u32(x: u32) -> u32 {
x & (x.wrapping_add(1))
}
@ -70,6 +74,7 @@ pub fn _blcfill_u32(x: u32) -> u32 {
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcfill))]
pub fn _blcfill_u64(x: u64) -> u64 {
x & (x.wrapping_add(1))
}
@ -79,6 +84,7 @@ pub fn _blcfill_u64(x: u64) -> u64 {
/// If there is no zero bit in `x`, it sets all bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blci))]
pub fn _blci_u32(x: u32) -> u32 {
x | !(x.wrapping_add(1))
}
@ -88,6 +94,7 @@ pub fn _blci_u32(x: u32) -> u32 {
/// If there is no zero bit in `x`, it sets all bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blci))]
pub fn _blci_u64(x: u64) -> u64 {
x | !(x.wrapping_add(1))
}
@ -97,6 +104,7 @@ pub fn _blci_u64(x: u64) -> u64 {
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcic))]
pub fn _blcic_u32(x: u32) -> u32 {
!x & (x.wrapping_add(1))
}
@ -106,6 +114,7 @@ pub fn _blcic_u32(x: u32) -> u32 {
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcic))]
pub fn _blcic_u64(x: u64) -> u64 {
!x & (x.wrapping_add(1))
}
@ -115,6 +124,7 @@ pub fn _blcic_u64(x: u64) -> u64 {
/// If there is no zero bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcmsk))]
pub fn _blcmsk_u32(x: u32) -> u32 {
x ^ (x.wrapping_add(1))
}
@ -124,6 +134,7 @@ pub fn _blcmsk_u32(x: u32) -> u32 {
/// If there is no zero bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcmsk))]
pub fn _blcmsk_u64(x: u64) -> u64 {
x ^ (x.wrapping_add(1))
}
@ -133,6 +144,7 @@ pub fn _blcmsk_u64(x: u64) -> u64 {
/// If there is no zero bit in `x`, it returns `x`.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcs))]
pub fn _blcs_u32(x: u32) -> u32 {
x | (x.wrapping_add(1))
}
@ -142,6 +154,7 @@ pub fn _blcs_u32(x: u32) -> u32 {
/// If there is no zero bit in `x`, it returns `x`.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcs))]
pub fn _blcs_u64(x: u64) -> u64 {
x | x.wrapping_add(1)
}
@ -151,6 +164,7 @@ pub fn _blcs_u64(x: u64) -> u64 {
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blsfill))]
pub fn _blsfill_u32(x: u32) -> u32 {
x | (x.wrapping_sub(1))
}
@ -160,6 +174,7 @@ pub fn _blsfill_u32(x: u32) -> u32 {
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blsfill))]
pub fn _blsfill_u64(x: u64) -> u64 {
x | (x.wrapping_sub(1))
}
@ -169,6 +184,7 @@ pub fn _blsfill_u64(x: u64) -> u64 {
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blsic))]
pub fn _blsic_u32(x: u32) -> u32 {
!x | (x.wrapping_sub(1))
}
@ -178,6 +194,7 @@ pub fn _blsic_u32(x: u32) -> u32 {
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blsic))]
pub fn _blsic_u64(x: u64) -> u64 {
!x | (x.wrapping_sub(1))
}
@ -188,6 +205,7 @@ pub fn _blsic_u64(x: u64) -> u64 {
/// If the least significant bit of `x` is 0, it sets all bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(t1mskc))]
pub fn _t1mskc_u32(x: u32) -> u32 {
!x | (x.wrapping_add(1))
}
@ -198,6 +216,7 @@ pub fn _t1mskc_u32(x: u32) -> u32 {
/// If the least significant bit of `x` is 0, it sets all bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(t1mskc))]
pub fn _t1mskc_u64(x: u64) -> u64 {
!x | (x.wrapping_add(1))
}
@ -208,6 +227,7 @@ pub fn _t1mskc_u64(x: u64) -> u64 {
/// If the least significant bit of `x` is 1, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(tzmsk))]
pub fn _tzmsk_u32(x: u32) -> u32 {
!x & (x.wrapping_sub(1))
}
@ -218,6 +238,7 @@ pub fn _tzmsk_u32(x: u32) -> u32 {
/// If the least significant bit of `x` is 1, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(tzmsk))]
pub fn _tzmsk_u64(x: u64) -> u64 {
!x & (x.wrapping_sub(1))
}