Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Dávid Kocsis 2017-09-22 21:00:21 +02:00
commit ca55004659
28 changed files with 1582 additions and 14 deletions

View file

@ -0,0 +1,24 @@
environment:
# We don't want to do identical comdat folding as it messes up the ability to
# generate lossless backtraces in some cases. This is enabled by rustc by
# default so pass a flag to disable it to ensure our tests work ok.
RUSTFLAGS: -Clink-args=/OPT:NOICF
matrix:
- TARGET: x86_64-pc-windows-msvc
install:
# Install rust, x86_64-pc-windows-msvc host
- appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
- rustup-init.exe -y --default-host x86_64-pc-windows-msvc --default-toolchain nightly
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
- if NOT "%TARGET%" == "x86_64-pc-windows-msvc" rustup target add %TARGET%
- rustc -vV
- cargo -vV
build: false
test_script:
- cargo test --target %TARGET%
- set RUST_BACKTRACE=1
- cargo test --target %TARGET% --release

View file

@ -0,0 +1,16 @@
language: rust
sudo: false
matrix:
include:
- rust: nightly
- rust: nightly
os: osx
script:
- cargo test
- cargo test --release
notifications:
email:
on_success: never

0
library/stdarch/.vscode/temp.sql vendored Normal file
View file

View file

@ -14,6 +14,7 @@ example for `_mm_adds_epi16`:
/// Add packed 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(paddsw))]
pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 {
unsafe { paddsw(a, b) }
}
@ -32,6 +33,10 @@ Let's break this down:
support `sse2`, the compiler will still generate code for `_mm_adds_epi16`
*as if* `sse2` support existed. Without this attribute, the compiler might
not generate the intended CPU instruction.
* The `#[cfg_attr(test, assert_instr(paddsw))]` attribute indicates that when
we're testing the crate we'll assert that the `paddsw` instruction is
generated inside this function, ensuring that the SIMD intrinsic truly is an
intrinsic for the instruction!
* The types of the vectors given to the intrinsic should generally match the
types as provided in the vendor interface. We'll talk about this more below.
* The implementation of the vendor intrinsic is generally very simple.
@ -40,7 +45,7 @@ Let's break this down:
compiler intrinsic (in this case, `paddsw`) when one is available. More on
this below as well.
Once a function has been added, you should add at least one test for basic
Once a function has been added, you should also add at least one test for basic
functionality. Here's an example for `_mm_adds_epi16`:
```rust

View file

@ -13,3 +13,10 @@ license = "MIT"
[profile.release]
debug = true
opt-level = 3
[profile.bench]
debug = true
opt-level = 3
[dev-dependencies]
assert-instr = { path = "assert-instr" }

View file

@ -155,7 +155,7 @@ sse
* [ ] `_mm_storer_ps`
* [ ] `_mm_move_ss`
* [ ] `_mm_shuffle_ps`
* [ ] `_mm_unpackhi_ps`
* [x] `_mm_unpackhi_ps`
* [ ] `_mm_unpacklo_ps`
* [ ] `_mm_movehl_ps`
* [ ] `_mm_movelh_ps`

View file

@ -0,0 +1,11 @@
[package]
name = "assert-instr"
version = "0.1.0"
authors = ["Alex Crichton <alex@alexcrichton.com>"]
[dependencies]
assert-instr-macro = { path = "assert-instr-macro" }
backtrace = "0.3"
cc = "1.0"
lazy_static = "0.2"
rustc-demangle = "0.1"

View file

@ -0,0 +1,7 @@
[package]
name = "assert-instr-macro"
version = "0.1.0"
authors = ["Alex Crichton <alex@alexcrichton.com>"]
[lib]
proc-macro = true

View file

@ -0,0 +1,10 @@
use std::env;
fn main() {
println!("cargo:rerun-if-changed=build.rs");
let opt_level = env::var("OPT_LEVEL").ok().and_then(|s| s.parse().ok()).unwrap_or(0);
let profile = env::var("PROFILE").unwrap_or(String::new());
if profile == "release" || opt_level >= 2 {
println!("cargo:rustc-cfg=optimized");
}
}

View file

@ -0,0 +1,71 @@
//! Implementation of the `#[assert_instr]` macro
//!
//! This macro is used when testing the `stdsimd` crate and is used to generate
//! test cases to assert that functions do indeed contain the instructions that
//! we're expecting them to contain.
//!
//! The procedural macro here is relatively simple, it simply appends a
//! `#[test]` function to the original token stream which asserts that the
//! function itself contains the relevant instruction.
#![feature(proc_macro)]
extern crate proc_macro;
use proc_macro::{TokenStream, Term, TokenNode, Delimiter};
#[proc_macro_attribute]
pub fn assert_instr(attr: TokenStream, item: TokenStream) -> TokenStream {
let name = find_name(item.clone());
let tokens = attr.into_iter().collect::<Vec<_>>();
if tokens.len() != 1 {
panic!("expected #[assert_instr(foo)]");
}
let tokens = match tokens[0].kind {
TokenNode::Group(Delimiter::Parenthesis, ref rest) => rest.clone(),
_ => panic!("expected #[assert_instr(foo)]"),
};
let tokens = tokens.into_iter().collect::<Vec<_>>();
if tokens.len() != 1 {
panic!("expected #[assert_instr(foo)]");
}
let instr = match tokens[0].kind {
TokenNode::Term(term) => term,
_ => panic!("expected #[assert_instr(foo)]"),
};
let ignore = if cfg!(optimized) {
""
} else {
"#[ignore]"
};
let test = format!("
#[test]
#[allow(non_snake_case)]
{ignore}
fn assert_instr_{name}() {{
::assert_instr::assert({name} as usize,
\"{name}\",
\"{instr}\");
}}
", name = name.as_str(), instr = instr.as_str(), ignore = ignore);
let test: TokenStream = test.parse().unwrap();
item.into_iter().chain(test.into_iter()).collect()
}
fn find_name(item: TokenStream) -> Term {
let mut tokens = item.into_iter();
while let Some(tok) = tokens.next() {
if let TokenNode::Term(word) = tok.kind {
if word.as_str() == "fn" {
break
}
}
}
match tokens.next().map(|t| t.kind) {
Some(TokenNode::Term(word)) => word,
_ => panic!("failed to find function name"),
}
}

View file

@ -0,0 +1,273 @@
//! Runtime support needed for the `#![assert_instr]` macro
//!
//! This basically just disassembles the current executable and then parses the
//! output once globally and then provides the `assert` function which makes
//! assertions about the disassembly of a function.
#![feature(proc_macro)]
extern crate assert_instr_macro;
extern crate backtrace;
extern crate cc;
extern crate rustc_demangle;
#[macro_use]
extern crate lazy_static;
use std::collections::HashMap;
use std::env;
use std::process::Command;
use std::str;
pub use assert_instr_macro::*;
lazy_static! {
static ref DISASSEMBLY: HashMap<String, Vec<Function>> = disassemble_myself();
}
struct Function {
instrs: Vec<Instruction>,
}
struct Instruction {
parts: Vec<String>,
}
fn disassemble_myself() -> HashMap<String, Vec<Function>> {
let me = env::current_exe().expect("failed to get current exe");
if cfg!(target_arch = "x86_64") &&
cfg!(target_os = "windows") &&
cfg!(target_env = "msvc") {
let mut cmd = cc::windows_registry::find("x86_64-pc-windows-msvc", "dumpbin.exe")
.expect("failed to find `dumpbin` tool");
let output = cmd.arg("/DISASM").arg(&me).output()
.expect("failed to execute dumpbin");
println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr));
assert!(output.status.success());
parse_dumpbin(&String::from_utf8_lossy(&output.stdout))
} else if cfg!(target_os = "windows") {
panic!("disassembly unimplemented")
} else if cfg!(target_os = "macos") {
let output = Command::new("otool")
.arg("-vt")
.arg(&me)
.output()
.expect("failed to execute otool");
println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr));
assert!(output.status.success());
parse_otool(&str::from_utf8(&output.stdout).expect("stdout not utf8"))
} else {
let output = Command::new("objdump")
.arg("--disassemble")
.arg(&me)
.output()
.expect("failed to execute objdump");
println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr));
assert!(output.status.success());
parse_objdump(&str::from_utf8(&output.stdout).expect("stdout not utf8"))
}
}
fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {
let mut lines = output.lines();
for line in output.lines().take(100) {
println!("{}", line);
}
let mut ret = HashMap::new();
while let Some(header) = lines.next() {
// symbols should start with `$hex_addr <$name>:`
if !header.ends_with(">:") {
continue
}
let start = header.find("<").unwrap();
let symbol = &header[start + 1..header.len() - 2];
let mut instructions = Vec::new();
while let Some(instruction) = lines.next() {
if instruction.is_empty() {
break
}
// Each line of instructions should look like:
//
// $rel_offset: ab cd ef 00 $instruction...
let parts = instruction.split_whitespace()
.skip(1)
.skip_while(|s| {
s.len() == 2 && usize::from_str_radix(s, 16).is_ok()
})
.map(|s| s.to_string())
.collect::<Vec<String>>();
instructions.push(Instruction { parts });
}
ret.entry(normalize(symbol))
.or_insert(Vec::new())
.push(Function { instrs: instructions });
}
return ret
}
fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
let mut lines = output.lines();
for line in output.lines().take(100) {
println!("{}", line);
}
let mut ret = HashMap::new();
let mut cached_header = None;
loop {
let header = match cached_header.take().or_else(|| lines.next()) {
Some(header) => header,
None => break,
};
// symbols should start with `$symbol:`
if !header.ends_with(":") {
continue
}
// strip the leading underscore and the trailing colon
let symbol = &header[1..header.len() - 1];
let mut instructions = Vec::new();
while let Some(instruction) = lines.next() {
if instruction.ends_with(":") {
cached_header = Some(instruction);
break
}
// Each line of instructions should look like:
//
// $addr $instruction...
let parts = instruction.split_whitespace()
.skip(1)
.map(|s| s.to_string())
.collect::<Vec<String>>();
instructions.push(Instruction { parts });
}
ret.entry(normalize(symbol))
.or_insert(Vec::new())
.push(Function { instrs: instructions });
}
return ret
}
fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
let mut lines = output.lines();
for line in output.lines().take(100) {
println!("{}", line);
}
let mut ret = HashMap::new();
let mut cached_header = None;
loop {
let header = match cached_header.take().or_else(|| lines.next()) {
Some(header) => header,
None => break,
};
// symbols should start with `$symbol:`
if !header.ends_with(":") {
continue
}
// strip the trailing colon
let symbol = &header[..header.len() - 1];
let mut instructions = Vec::new();
while let Some(instruction) = lines.next() {
if !instruction.starts_with(" ") {
cached_header = Some(instruction);
break
}
// Each line looks like:
//
// > $addr: ab cd ef $instr..
// > 00 12 # this line os optional
if instruction.starts_with(" ") {
continue
}
let parts = instruction.split_whitespace()
.skip(1)
.skip_while(|s| {
s.len() == 2 && usize::from_str_radix(s, 16).is_ok()
})
.map(|s| s.to_string())
.collect::<Vec<String>>();
instructions.push(Instruction { parts });
}
ret.entry(normalize(symbol))
.or_insert(Vec::new())
.push(Function { instrs: instructions });
}
return ret
}
fn normalize(symbol: &str) -> String {
let symbol = rustc_demangle::demangle(symbol).to_string();
match symbol.rfind("::h") {
Some(i) => symbol[..i].to_string(),
None => symbol.to_string(),
}
}
/// Main entry point for this crate, called by the `#[assert_instr]` macro.
///
/// This asserts that the function at `fnptr` contains the instruction
/// `expected` provided.
pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
// Translate this function pointer to a symbolic name that we'd have found
// in the disassembly.
let mut sym = None;
backtrace::resolve(fnptr as *mut _, |name| {
sym = name.name().and_then(|s| s.as_str()).map(normalize);
});
let functions = match sym.as_ref().and_then(|s| DISASSEMBLY.get(s)) {
Some(s) => s,
None => {
if let Some(sym) = sym {
println!("assumed symbol name: `{}`", sym);
}
println!("maybe related functions");
for f in DISASSEMBLY.keys().filter(|k| k.contains(fnname)) {
println!("\t- {}", f);
}
panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname);
}
};
assert_eq!(functions.len(), 1);
let function = &functions[0];
// Look for `expected` as the first part of any instruction in this
// function, returning if we do indeed find it.
for instr in function.instrs.iter() {
// Gets the first instruction, e.g. tzcntl in tzcntl %rax,%rax
if let Some(part) = instr.parts.get(0) {
// Truncates the instruction with the length of the expected
// instruction: tzcntl => tzcnt and compares that.
if part.starts_with(expected) {
return
}
}
}
// Help debug by printing out the found disassembly, and then panic as we
// didn't find the instruction.
println!("disassembly for {}: ", sym.as_ref().unwrap());
for (i, instr) in function.instrs.iter().enumerate() {
print!("\t{:2}: ", i);
for part in instr.parts.iter() {
print!("{} ", part);
}
println!("");
}
panic!("failed to find instruction `{}` in the disassembly", expected);
}

View file

@ -0,0 +1,10 @@
//! ARM intrinsics.
pub use self::v6::*;
pub use self::v7::*;
#[cfg(target_arch = "aarch64")]
pub use self::v8::*;
mod v6;
mod v7;
#[cfg(target_arch = "aarch64")]
mod v8;

View file

@ -0,0 +1,25 @@
//! ARMv6 intrinsics.
//!
//! The reference is [ARMv6-M Architecture Reference
//! Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.html).
/// Reverse the order of the bytes.
#[inline(always)]
#[cfg_attr(test, assert_instr(rev))]
pub fn _rev_u8(x: u8) -> u8 {
x.swap_bytes() as u8
}
/// Reverse the order of the bytes.
#[inline(always)]
#[cfg_attr(test, assert_instr(rev))]
pub fn _rev_u16(x: u16) -> u16 {
x.swap_bytes() as u16
}
/// Reverse the order of the bytes.
#[inline(always)]
#[cfg_attr(test, assert_instr(rev))]
pub fn _rev_u32(x: u32) -> u32 {
x.swap_bytes() as u32
}

View file

@ -0,0 +1,40 @@
//! ARMv7 intrinsics.
//!
//! The reference is [ARMv7-M Architecture Reference Manual (Issue
//! E.b)](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.b/index.html).
pub use super::v6::*;
/// Count Leading Zeros.
#[inline(always)]
#[cfg_attr(test, assert_instr(clz))]
pub fn _clz_u8(x: u8) -> u8 {
x.leading_zeros() as u8
}
/// Count Leading Zeros.
#[inline(always)]
#[cfg_attr(test, assert_instr(clz))]
pub fn _clz_u16(x: u16) -> u16 {
x.leading_zeros() as u16
}
/// Count Leading Zeros.
#[inline(always)]
#[cfg_attr(test, assert_instr(clz))]
pub fn _clz_u32(x: u32) -> u32 {
x.leading_zeros() as u32
}
#[allow(dead_code)]
extern "C" {
#[link_name="llvm.bitreverse.i32"]
fn rbit_u32(i: i32) -> i32;
}
/// Reverse the bit order.
#[inline(always)]
#[cfg_attr(test, assert_instr(rbit))]
pub fn _rbit_u32(x: u32) -> u32 {
unsafe { rbit_u32(x as i32) as u32 }
}

View file

@ -0,0 +1,54 @@
//! ARMv8 intrinsics.
//!
//! The reference is [ARMv8-A Reference Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.k_10775/index.html).
pub use super::v7::*;
/// Reverse the order of the bytes.
#[inline(always)]
#[cfg_attr(test, assert_instr(rev))]
pub fn _rev_u64(x: u64) -> u64 {
x.swap_bytes() as u64
}
/// Count Leading Zeros.
#[inline(always)]
#[cfg_attr(test, assert_instr(clz))]
pub fn _clz_u64(x: u64) -> u64 {
x.leading_zeros() as u64
}
#[allow(dead_code)]
extern "C" {
#[link_name="llvm.bitreverse.i64"]
fn rbit_u64(i: i64) -> i64;
}
/// Reverse the bit order.
#[inline(always)]
#[cfg_attr(test, assert_instr(rbit))]
pub fn _rbit_u64(x: u64) -> u64 {
unsafe { rbit_u64(x as i64) as u64 }
}
/// Counts the leading most significant bits set.
///
/// When all bits of the operand are set it returns the size of the operand in
/// bits.
#[inline(always)]
// LLVM Bug (should be cls): https://bugs.llvm.org/show_bug.cgi?id=31802
#[cfg_attr(test, assert_instr(clz))]
pub fn _cls_u32(x: u32) -> u32 {
u32::leading_zeros(!x) as u32
}
/// Counts the leading most significant bits set.
///
/// When all bits of the operand are set it returns the size of the operand in
/// bits.
#[inline(always)]
// LLVM Bug (should be cls): https://bugs.llvm.org/show_bug.cgi?id=31802
#[cfg_attr(test, assert_instr(clz))]
pub fn _cls_u64(x: u64) -> u64 {
u64::leading_zeros(!x) as u64
}

View file

@ -1,8 +1,12 @@
#![allow(dead_code)]
#![feature(
const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi,
target_feature,
target_feature, cfg_target_feature, i128_type
)]
#![cfg_attr(test, feature(proc_macro))]
#[cfg(test)]
extern crate assert_instr;
/// Platform independent SIMD vector types and operations.
pub mod simd {
@ -16,6 +20,9 @@ pub mod simd {
pub mod vendor {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use x86::*;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
pub use arm::*;
}
#[macro_use]
@ -27,3 +34,6 @@ mod v512;
mod v64;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
mod arm;

View file

@ -0,0 +1,71 @@
//! Advanced Bit Manipulation (ABM) instructions
//!
//! The POPCNT and LZCNT have their own CPUID bits to indicate support.
//!
//! The references are:
//!
//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
//!
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29)
//! provides a quick overview of the instructions available.
#[cfg(test)]
use assert_instr::assert_instr;
/// Counts the leading most significant zero bits.
///
/// When the operand is zero, it returns its size in bits.
#[inline(always)]
#[target_feature = "+lzcnt"]
#[cfg_attr(test, assert_instr(lzcnt))]
pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
/// Counts the leading most significant zero bits.
///
/// When the operand is zero, it returns its size in bits.
#[inline(always)]
#[target_feature = "+lzcnt"]
#[cfg_attr(test, assert_instr(lzcnt))]
pub fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 }
/// Counts the bits that are set.
#[inline(always)]
#[target_feature = "+popcnt"]
#[cfg_attr(test, assert_instr(popcnt))]
pub fn _popcnt32(x: u32) -> u32 { x.count_ones() }
/// Counts the bits that are set.
#[inline(always)]
#[target_feature = "+popcnt"]
#[cfg_attr(test, assert_instr(popcnt))]
pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 }
#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use x86::abm;
#[test]
#[target_feature = "+lzcnt"]
fn _lzcnt_u32() {
assert_eq!(abm::_lzcnt_u32(0b0101_1010u32), 25u32);
}
#[test]
#[target_feature = "+lzcnt"]
fn _lzcnt_u64() {
assert_eq!(abm::_lzcnt_u64(0b0101_1010u64), 57u64);
}
#[test]
#[target_feature = "+popcnt"]
fn _popcnt32() {
assert_eq!(abm::_popcnt32(0b0101_1010u32), 4);
}
#[test]
#[target_feature = "+popcnt"]
fn _popcnt64() {
assert_eq!(abm::_popcnt64(0b0101_1010u64), 4);
}
}

View file

@ -31,7 +31,7 @@ extern "C" {
}
#[cfg(test)]
#[cfg(all(test, target_feature = "avx", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use v256::*;
use x86::avx;
@ -65,7 +65,4 @@ mod tests {
let e = f64x4::new(-4.0,8.0,-4.0,12.0);
assert_eq!(r, e);
}
}
}

View file

@ -1044,7 +1044,7 @@ extern "C" {
}
#[cfg(test)]
#[cfg(all(test, target_feature = "avx2", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use v256::*;
use v128::*;

View file

@ -0,0 +1,288 @@
//! Bit Manipulation Instruction (BMI) Set 1.0.
//!
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
//! Manual Volume 2: Instruction Set Reference,
//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
//!
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI1_.28Bit_Manipulation_Instruction_Set_1.29)
//! provides a quick overview of the available instructions.
#[cfg(test)]
use assert_instr::assert_instr;
#[allow(dead_code)]
extern "C" {
#[link_name="llvm.x86.bmi.bextr.32"]
fn x86_bmi_bextr_32(x: u32, y: u32) -> u32;
#[link_name="llvm.x86.bmi.bextr.64"]
fn x86_bmi_bextr_64(x: u64, y: u64) -> u64;
}
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(bextr))]
pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
}
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(bextr))]
pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
_bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
}
/// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result.
///
/// Bits [7,0] of `control` specify the index to the first bit in the range to be
/// extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(bextr))]
pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
unsafe { x86_bmi_bextr_32(a, control) }
}
/// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result.
///
/// Bits [7,0] of `control` specify the index to the first bit in the range to be
/// extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(bextr))]
pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
unsafe { x86_bmi_bextr_64(a, control) }
}
/// Bitwise logical `AND` of inverted `a` with `b`.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(andn))]
pub fn _andn_u32(a: u32, b: u32) -> u32 {
!a & b
}
/// Bitwise logical `AND` of inverted `a` with `b`.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(andn))]
pub fn _andn_u64(a: u64, b: u64) -> u64 {
!a & b
}
/// Extract lowest set isolated bit.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsi))]
pub fn _blsi_u32(x: u32) -> u32 {
x & x.wrapping_neg()
}
/// Extract lowest set isolated bit.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsi))]
pub fn _blsi_u64(x: u64) -> u64 {
x & x.wrapping_neg()
}
/// Get mask up to lowest set bit.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsmsk))]
pub fn _blsmsk_u32(x: u32) -> u32 {
x ^ (x.wrapping_sub(1u32))
}
/// Get mask up to lowest set bit.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsmsk))]
pub fn _blsmsk_u64(x: u64) -> u64 {
x ^ (x.wrapping_sub(1u64))
}
/// Resets the lowest set bit of `x`.
///
/// If `x` is sets CF.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsr))]
pub fn _blsr_u32(x: u32) -> u32 {
x & (x.wrapping_sub(1))
}
/// Resets the lowest set bit of `x`.
///
/// If `x` is sets CF.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(blsr))]
pub fn _blsr_u64(x: u64) -> u64 {
x & (x.wrapping_sub(1))
}
/// Counts the number of trailing least significant zero bits.
///
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _tzcnt_u16(x: u16) -> u16 {
x.trailing_zeros() as u16
}
/// Counts the number of trailing least significant zero bits.
///
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _tzcnt_u32(x: u32) -> u32 {
x.trailing_zeros()
}
/// Counts the number of trailing least significant zero bits.
///
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _tzcnt_u64(x: u64) -> u64 {
x.trailing_zeros() as u64
}
/// Counts the number of trailing least significant zero bits.
///
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _mm_tzcnt_u32(x: u32) -> u32 {
x.trailing_zeros()
}
/// Counts the number of trailing least significant zero bits.
///
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
#[target_feature = "+bmi"]
#[cfg_attr(test, assert_instr(tzcnt))]
pub fn _mm_tzcnt_u64(x: u64) -> u64 {
x.trailing_zeros() as u64
}
#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use x86::bmi;
#[test]
#[target_feature = "+bmi"]
fn _bextr_u32() {
assert_eq!(bmi::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32);
}
#[test]
#[target_feature = "+bmi"]
fn _bextr_u64() {
assert_eq!(bmi::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64);
}
#[test]
#[target_feature = "+bmi"]
fn _andn_u32() {
assert_eq!(bmi::_andn_u32(0, 0), 0);
assert_eq!(bmi::_andn_u32(0, 1), 1);
assert_eq!(bmi::_andn_u32(1, 0), 0);
assert_eq!(bmi::_andn_u32(1, 1), 0);
assert_eq!(bmi::_andn_u32(0b0000_0000u32, 0b0000_0000u32), 0b0000_0000u32);
assert_eq!(bmi::_andn_u32(0b0000_0000u32, 0b1111_1111u32), 0b1111_1111u32);
assert_eq!(bmi::_andn_u32(0b1111_1111u32, 0b0000_0000u32), 0b0000_0000u32);
assert_eq!(bmi::_andn_u32(0b1111_1111u32, 0b1111_1111u32), 0b0000_0000u32);
assert_eq!(bmi::_andn_u32(0b0100_0000u32, 0b0101_1101u32), 0b0001_1101u32);
}
#[test]
#[target_feature = "+bmi"]
fn _andn_u64() {
assert_eq!(bmi::_andn_u64(0, 0), 0);
assert_eq!(bmi::_andn_u64(0, 1), 1);
assert_eq!(bmi::_andn_u64(1, 0), 0);
assert_eq!(bmi::_andn_u64(1, 1), 0);
assert_eq!(bmi::_andn_u64(0b0000_0000u64, 0b0000_0000u64), 0b0000_0000u64);
assert_eq!(bmi::_andn_u64(0b0000_0000u64, 0b1111_1111u64), 0b1111_1111u64);
assert_eq!(bmi::_andn_u64(0b1111_1111u64, 0b0000_0000u64), 0b0000_0000u64);
assert_eq!(bmi::_andn_u64(0b1111_1111u64, 0b1111_1111u64), 0b0000_0000u64);
assert_eq!(bmi::_andn_u64(0b0100_0000u64, 0b0101_1101u64), 0b0001_1101u64);
}
#[test]
#[target_feature = "+bmi"]
fn _blsi_u32() {
assert_eq!(bmi::_blsi_u32(0b1101_0000u32), 0b0001_0000u32);
}
#[test]
#[target_feature = "+bmi"]
fn _blsi_u64() {
assert_eq!(bmi::_blsi_u64(0b1101_0000u64), 0b0001_0000u64);
}
#[test]
#[target_feature = "+bmi"]
fn _blsmsk_u32() {
assert_eq!(bmi::_blsmsk_u32(0b0011_0000u32), 0b0001_1111u32);
}
#[test]
#[target_feature = "+bmi"]
fn _blsmsk_u64() {
assert_eq!(bmi::_blsmsk_u64(0b0011_0000u64), 0b0001_1111u64);
}
#[test]
#[target_feature = "+bmi"]
fn _blsr_u32() {
/// TODO: test the behavior when the input is 0
assert_eq!(bmi::_blsr_u32(0b0011_0000u32), 0b0010_0000u32);
}
#[test]
#[target_feature = "+bmi"]
fn _blsr_u64() {
/// TODO: test the behavior when the input is 0
assert_eq!(bmi::_blsr_u64(0b0011_0000u64), 0b0010_0000u64);
}
#[test]
#[target_feature = "+bmi"]
fn _tzcnt_u16() {
assert_eq!(bmi::_tzcnt_u16(0b0000_0001u16), 0u16);
assert_eq!(bmi::_tzcnt_u16(0b0000_0000u16), 16u16);
assert_eq!(bmi::_tzcnt_u16(0b1001_0000u16), 4u16);
}
#[test]
#[target_feature = "+bmi"]
fn _tzcnt_u32() {
assert_eq!(bmi::_tzcnt_u32(0b0000_0001u32), 0u32);
assert_eq!(bmi::_tzcnt_u32(0b0000_0000u32), 32u32);
assert_eq!(bmi::_tzcnt_u32(0b1001_0000u32), 4u32);
}
#[test]
#[target_feature = "+bmi"]
fn _tzcnt_u64() {
assert_eq!(bmi::_tzcnt_u64(0b0000_0001u64), 0u64);
assert_eq!(bmi::_tzcnt_u64(0b0000_0000u64), 64u64);
assert_eq!(bmi::_tzcnt_u64(0b1001_0000u64), 4u64);
}
}

View file

@ -0,0 +1,215 @@
//! Bit Manipulation Instruction (BMI) Set 2.0.
//!
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
//! Manual Volume 2: Instruction Set Reference,
//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectu res-software-developer-instruction-set-reference-manual-325383.pdf).
//!
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29)
//! provides a quick overview of the available instructions.
#[cfg(test)]
use assert_instr::assert_instr;
/// Unsigned multiply without affecting flags.
///
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
/// the low half and the high half of the result.
#[inline(always)]
// LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232
#[cfg_attr(test, assert_instr(imul))]
#[target_feature = "+bmi2"]
pub fn _mulx_u32(a: u32, b: u32) -> (u32, u32) {
let result: u64 = (a as u64) * (b as u64);
let hi = (result >> 32) as u32;
(result as u32, hi)
}
/// Unsigned multiply without affecting flags.
///
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
/// the low half and the high half of the result.
#[inline(always)]
#[cfg_attr(test, assert_instr(mulx))]
#[target_feature = "+bmi2"]
pub fn _mulx_u64(a: u64, b: u64) -> (u64, u64) {
let result: u128 = (a as u128) * (b as u128);
let hi = (result >> 64) as u64;
(result as u64, hi)
}
#[allow(dead_code)]
extern "C" {
#[link_name="llvm.x86.bmi.bzhi.32"]
fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32;
#[link_name="llvm.x86.bmi.bzhi.64"]
fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64;
#[link_name="llvm.x86.bmi.pdep.32"]
fn x86_bmi2_pdep_32(x: u32, y: u32) -> u32;
#[link_name="llvm.x86.bmi.pdep.64"]
fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64;
#[link_name="llvm.x86.bmi.pext.32"]
fn x86_bmi2_pext_32(x: u32, y: u32) -> u32;
#[link_name="llvm.x86.bmi.pext.64"]
fn x86_bmi2_pext_64(x: u64, y: u64) -> u64;
}
/// Zero higher bits of `a` >= `index`.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(bzhi))]
pub fn _bzhi_u32(a: u32, index: u32) -> u32 {
unsafe { x86_bmi2_bzhi_32(a, index) }
}
/// Zero higher bits of `a` >= `index`.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(bzhi))]
pub fn _bzhi_u64(a: u64, index: u64) -> u64 {
unsafe { x86_bmi2_bzhi_64(a, index) }
}
/// Scatter contiguous low order bits of `a` to the result at the positions
/// specified by the `mask`.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(pdep))]
pub fn _pdep_u32(a: u32, mask: u32) -> u32 {
unsafe { x86_bmi2_pdep_32(a, mask) }
}
/// Scatter contiguous low order bits of `a` to the result at the positions
/// specified by the `mask`.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(pdep))]
pub fn _pdep_u64(a: u64, mask: u64) -> u64 {
unsafe { x86_bmi2_pdep_64(a, mask) }
}
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
/// order bit positions of the result.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(pext))]
pub fn _pext_u32(a: u32, mask: u32) -> u32 {
unsafe { x86_bmi2_pext_32(a, mask) }
}
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
/// order bit positions of the result.
#[inline(always)]
#[target_feature = "+bmi2"]
#[cfg_attr(test, assert_instr(pext))]
pub fn _pext_u64(a: u64, mask: u64) -> u64 {
unsafe { x86_bmi2_pext_64(a, mask) }
}
#[cfg(all(test, target_feature = "bmi2", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use x86::bmi2;
#[test]
#[target_feature = "+bmi2"]
fn _pext_u32() {
let n = 0b1011_1110_1001_0011u32;
let m0 = 0b0110_0011_1000_0101u32;
let s0 = 0b0000_0000_0011_0101u32;
let m1 = 0b1110_1011_1110_1111u32;
let s1 = 0b0001_0111_0100_0011u32;
assert_eq!(bmi2::_pext_u32(n, m0), s0);
assert_eq!(bmi2::_pext_u32(n, m1), s1);
}
#[test]
#[target_feature = "+bmi2"]
fn _pext_u64() {
let n = 0b1011_1110_1001_0011u64;
let m0 = 0b0110_0011_1000_0101u64;
let s0 = 0b0000_0000_0011_0101u64;
let m1 = 0b1110_1011_1110_1111u64;
let s1 = 0b0001_0111_0100_0011u64;
assert_eq!(bmi2::_pext_u64(n, m0), s0);
assert_eq!(bmi2::_pext_u64(n, m1), s1);
}
#[test]
#[target_feature = "+bmi2"]
fn _pdep_u32() {
let n = 0b1011_1110_1001_0011u32;
let m0 = 0b0110_0011_1000_0101u32;
let s0 = 0b0000_0010_0000_0101u32;
let m1 = 0b1110_1011_1110_1111u32;
let s1 = 0b1110_1001_0010_0011u32;
assert_eq!(bmi2::_pdep_u32(n, m0), s0);
assert_eq!(bmi2::_pdep_u32(n, m1), s1);
}
#[test]
#[target_feature = "+bmi2"]
fn _pdep_u64() {
let n = 0b1011_1110_1001_0011u64;
let m0 = 0b0110_0011_1000_0101u64;
let s0 = 0b0000_0010_0000_0101u64;
let m1 = 0b1110_1011_1110_1111u64;
let s1 = 0b1110_1001_0010_0011u64;
assert_eq!(bmi2::_pdep_u64(n, m0), s0);
assert_eq!(bmi2::_pdep_u64(n, m1), s1);
}
#[test]
#[target_feature = "+bmi2"]
fn _bzhi_u32() {
let n = 0b1111_0010u32;
let s = 0b0001_0010u32;
assert_eq!(bmi2::_bzhi_u32(n, 5), s);
}
#[test]
#[target_feature = "+bmi2"]
fn _bzhi_u64() {
let n = 0b1111_0010u64;
let s = 0b0001_0010u64;
assert_eq!(bmi2::_bzhi_u64(n, 5), s);
}
#[test]
#[target_feature = "+bmi2"]
fn _mulx_u32() {
let a: u32 = 4_294_967_200;
let b: u32 = 2;
let (lo, hi): (u32, u32) = bmi2::_mulx_u32(a, b);
// result = 8589934400
// = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64
// ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
assert_eq!(lo, 0b1111_1111_1111_1111_1111_1111_0100_0000u32);
assert_eq!(hi, 0b0001u32);
}
#[test]
#[target_feature = "+bmi2"]
fn _mulx_u64() {
let a: u64 = 9_223_372_036_854_775_800;
let b: u64 = 100;
let (lo, hi): (u64, u64) = bmi2::_mulx_u64(a, b);
// result = 922337203685477580000
// = 0b00110001_11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u128
// ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
assert_eq!(lo, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64);
assert_eq!(hi, 0b00110001u64);
}
}

View file

@ -6,6 +6,11 @@ pub use self::sse42::*;
pub use self::avx::*;
pub use self::avx2::*;
pub use self::abm::*;
pub use self::bmi::*;
pub use self::bmi2::*;
pub use self::tbm::*;
#[allow(non_camel_case_types)]
pub type __m128i = ::v128::i8x16;
#[allow(non_camel_case_types)]
@ -20,3 +25,8 @@ mod sse41;
mod sse42;
mod avx;
mod avx2;
mod abm;
mod bmi;
mod bmi2;
mod tbm;

View file

@ -1,9 +1,14 @@
use simd_llvm::simd_shuffle4;
use v128::*;
#[cfg(test)]
use assert_instr::assert_instr;
/// Return the square root of packed single-precision (32-bit) floating-point
/// elements in `a`.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(sqrtps))]
pub fn _mm_sqrt_ps(a: f32x4) -> f32x4 {
unsafe { sqrtps(a) }
}
@ -12,6 +17,7 @@ pub fn _mm_sqrt_ps(a: f32x4) -> f32x4 {
/// floating-point elements in `a`.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(rcpps))]
pub fn _mm_rcp_ps(a: f32x4) -> f32x4 {
unsafe { rcpps(a) }
}
@ -20,6 +26,7 @@ pub fn _mm_rcp_ps(a: f32x4) -> f32x4 {
/// (32-bit) floating-point elements in `a`.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(rsqrtps))]
pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 {
unsafe { rsqrtps(a) }
}
@ -28,6 +35,7 @@ pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 {
/// `b`, and return the corresponding minimum values.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(minps))]
pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 {
unsafe { minps(a, b) }
}
@ -36,16 +44,26 @@ pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 {
/// `b`, and return the corresponding maximum values.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(maxps))]
pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 {
unsafe { maxps(a, b) }
}
/// Unpack and interleave single-precision (32-bit) floating-point elements
/// from the high half of `a` and `b`;
#[inline(always)]
#[target_feature = "+sse"]
pub fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 {
unsafe { simd_shuffle4(a, b, [2, 6, 3, 7]) }
}
/// Return a mask of the most significant bit of each element in `a`.
///
/// The mask is stored in the 4 least significant bits of the return value.
/// All other bits are set to `0`.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(movmskps))]
pub fn _mm_movemask_ps(a: f32x4) -> i32 {
unsafe { movmskps(a) }
}
@ -66,7 +84,7 @@ extern {
fn movmskps(a: f32x4) -> i32;
}
#[cfg(test)]
#[cfg(all(test, target_feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use v128::*;
use x86::sse;
@ -116,6 +134,15 @@ mod tests {
assert_eq!(r, f32x4::new(-1.0, 20.0, 0.0, -5.0));
}
#[test]
#[target_feature = "+sse"]
fn _mm_unpackhi_ps() {
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
let b = f32x4::new(5.0, 6.0, 7.0, 8.0);
let r = sse::_mm_unpackhi_ps(a, b);
assert_eq!(r, f32x4::new(3.0, 7.0, 4.0, 8.0));
}
#[test]
#[target_feature = "+sse"]
fn _mm_movemask_ps() {

View file

@ -9,6 +9,9 @@ use x86::__m128i;
use v128::*;
use v64::*;
#[cfg(test)]
use assert_instr::assert_instr;
/// Provide a hint to the processor that the code sequence is a spin-wait loop.
///
/// This can help improve the performance and power consumption of spin-wait
@ -89,6 +92,7 @@ pub fn _mm_adds_epi8(a: i8x16, b: i8x16) -> i8x16 {
/// Add packed 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(paddsw))]
pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 {
unsafe { paddsw(a, b) }
}
@ -1716,7 +1720,7 @@ extern {
fn movmskpd(a: f64x2) -> i32;
}
#[cfg(test)]
#[cfg(all(test, target_feature = "sse2", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use std::os::raw::c_void;

View file

@ -57,7 +57,7 @@ extern {
fn dpps(a: f32x4, b: f32x4, imm8: u8) -> f32x4;
}
#[cfg(test)]
#[cfg(all(test, target_feature = "sse4.1", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use v128::*;
use x86::sse41;

View file

@ -40,7 +40,7 @@ extern {
fn pcmpestri128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
}
#[cfg(test)]
#[cfg(all(test, target_feature = "sse4.2", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use v128::*;
use x86::{__m128i, sse42};

View file

@ -50,7 +50,7 @@ extern {
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
}
#[cfg(test)]
#[cfg(all(test, target_feature = "ssse3", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use v128::*;
use x86::ssse3 as ssse3;

View file

@ -0,0 +1,393 @@
//! Trailing Bit Manipulation (TBM) instruction set.
//!
//! The reference is [AMD64 Architecture Programmer's Manual, Volume 3:
//! General-Purpose and System
//! Instructions](http://support.amd.com/TechDocs/24594.pdf).
//!
//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_.28Trailing_Bit_Manipulation.29)
//! provides a quick overview of the available instructions.
#[cfg(test)]
use assert_instr::assert_instr;
// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32
/*
#[allow(dead_code)]
extern "C" {
#[link_name="llvm.x86.tbm.bextri.u32"]
fn x86_tbm_bextri_u32(a: u32, y: u32) -> u32;
#[link_name="llvm.x86.tbm.bextri.u64"]
fn x86_tbm_bextri_u64(x: u64, y: u64) -> u64;
}
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
#[target_feature = "+tbm"]
pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
}
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
#[target_feature = "+tbm"]
pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
_bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
}
/// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result.
///
/// Bits [7,0] of `control` specify the index to the first bit in the range to be
/// extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
#[target_feature = "+tbm"]
pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
unsafe { x86_tbm_bextri_u32(a, control) }
}
/// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result.
///
/// Bits [7,0] of `control` specify the index to the first bit in the range to be
/// extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
#[target_feature = "+tbm"]
pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
unsafe { x86_tbm_bextri_u64(a, control) }
}
*/
/// Clears all bits below the least significant zero bit of `x`.
///
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcfill))]
pub fn _blcfill_u32(x: u32) -> u32 {
x & (x.wrapping_add(1))
}
/// Clears all bits below the least significant zero bit of `x`.
///
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcfill))]
pub fn _blcfill_u64(x: u64) -> u64 {
x & (x.wrapping_add(1))
}
/// Sets all bits of `x` to 1 except for the least significant zero bit.
///
/// If there is no zero bit in `x`, it sets all bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blci))]
pub fn _blci_u32(x: u32) -> u32 {
x | !(x.wrapping_add(1))
}
/// Sets all bits of `x` to 1 except for the least significant zero bit.
///
/// If there is no zero bit in `x`, it sets all bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blci))]
pub fn _blci_u64(x: u64) -> u64 {
x | !(x.wrapping_add(1))
}
/// Sets the least significant zero bit of `x` and clears all other bits.
///
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcic))]
pub fn _blcic_u32(x: u32) -> u32 {
!x & (x.wrapping_add(1))
}
/// Sets the least significant zero bit of `x` and clears all other bits.
///
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcic))]
pub fn _blcic_u64(x: u64) -> u64 {
!x & (x.wrapping_add(1))
}
/// Sets the least significant zero bit of `x` and clears all bits above that bit.
///
/// If there is no zero bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcmsk))]
pub fn _blcmsk_u32(x: u32) -> u32 {
x ^ (x.wrapping_add(1))
}
/// Sets the least significant zero bit of `x` and clears all bits above that bit.
///
/// If there is no zero bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcmsk))]
pub fn _blcmsk_u64(x: u64) -> u64 {
x ^ (x.wrapping_add(1))
}
/// Sets the least significant zero bit of `x`.
///
/// If there is no zero bit in `x`, it returns `x`.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcs))]
pub fn _blcs_u32(x: u32) -> u32 {
x | (x.wrapping_add(1))
}
/// Sets the least significant zero bit of `x`.
///
/// If there is no zero bit in `x`, it returns `x`.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blcs))]
pub fn _blcs_u64(x: u64) -> u64 {
x | x.wrapping_add(1)
}
/// Sets all bits of `x` below the least significant one.
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blsfill))]
pub fn _blsfill_u32(x: u32) -> u32 {
x | (x.wrapping_sub(1))
}
/// Sets all bits of `x` below the least significant one.
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blsfill))]
pub fn _blsfill_u64(x: u64) -> u64 {
x | (x.wrapping_sub(1))
}
/// Clears least significant bit and sets all other bits.
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blsic))]
pub fn _blsic_u32(x: u32) -> u32 {
!x | (x.wrapping_sub(1))
}
/// Clears least significant bit and sets all other bits.
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(blsic))]
pub fn _blsic_u64(x: u64) -> u64 {
!x | (x.wrapping_sub(1))
}
/// Clears all bits below the least significant zero of `x` and sets all other
/// bits.
///
/// If the least significant bit of `x` is 0, it sets all bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(t1mskc))]
pub fn _t1mskc_u32(x: u32) -> u32 {
!x | (x.wrapping_add(1))
}
/// Clears all bits below the least significant zero of `x` and sets all other
/// bits.
///
/// If the least significant bit of `x` is 0, it sets all bits.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(t1mskc))]
pub fn _t1mskc_u64(x: u64) -> u64 {
!x | (x.wrapping_add(1))
}
/// Sets all bits below the least significant one of `x` and clears all other
/// bits.
///
/// If the least significant bit of `x` is 1, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(tzmsk))]
pub fn _tzmsk_u32(x: u32) -> u32 {
!x & (x.wrapping_sub(1))
}
/// Sets all bits below the least significant one of `x` and clears all other
/// bits.
///
/// If the least significant bit of `x` is 1, it returns zero.
#[inline(always)]
#[target_feature = "+tbm"]
#[cfg_attr(test, assert_instr(tzmsk))]
pub fn _tzmsk_u64(x: u64) -> u64 {
!x & (x.wrapping_sub(1))
}
#[cfg(all(test, target_feature = "tbm", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use x86::tbm;
/*
#[test]
#[target_feature = "+tbm"]
fn _bextr_u32() {
assert_eq!(tbm::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32);
}
#[test]
#[target_feature = "+tbm"]
fn _bextr_u64() {
assert_eq!(tbm::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64);
}
*/
#[test]
#[target_feature = "+tbm"]
fn _blcfill_u32() {
assert_eq!(tbm::_blcfill_u32(0b0101_0111u32), 0b0101_0000u32);
assert_eq!(tbm::_blcfill_u32(0b1111_1111u32), 0u32);
}
#[test]
#[target_feature = "+tbm"]
fn _blcfill_u64() {
assert_eq!(tbm::_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
assert_eq!(tbm::_blcfill_u64(0b1111_1111u64), 0u64);
}
#[test]
#[target_feature = "+tbm"]
fn _blci_u32() {
assert_eq!(tbm::_blci_u32(0b0101_0000u32),
0b1111_1111_1111_1111_1111_1111_1111_1110u32);
assert_eq!(tbm::_blci_u32(0b1111_1111u32),
0b1111_1111_1111_1111_1111_1110_1111_1111u32);
}
#[test]
#[target_feature = "+tbm"]
fn _blci_u64() {
assert_eq!(tbm::_blci_u64(0b0101_0000u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64);
assert_eq!(tbm::_blci_u64(0b1111_1111u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64);
}
#[test]
#[target_feature = "+tbm"]
fn _blcic_u32() {
assert_eq!(tbm::_blcic_u32(0b0101_0001u32), 0b0000_0010u32);
assert_eq!(tbm::_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32);
}
#[test]
#[target_feature = "+tbm"]
fn _blcic_u64() {
assert_eq!(tbm::_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
assert_eq!(tbm::_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
}
#[test]
#[target_feature = "+tbm"]
fn _blcmsk_u32() {
assert_eq!(tbm::_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32);
assert_eq!(tbm::_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32);
}
#[test]
#[target_feature = "+tbm"]
fn _blcmsk_u64() {
assert_eq!(tbm::_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
assert_eq!(tbm::_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
}
#[test]
#[target_feature = "+tbm"]
fn _blcs_u32() {
assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
}
#[test]
#[target_feature = "+tbm"]
fn _blcs_u64() {
assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
}
#[test]
#[target_feature = "+tbm"]
fn _blsfill_u32() {
assert_eq!(tbm::_blsfill_u32(0b0101_0100u32), 0b0101_0111u32);
assert_eq!(tbm::_blsfill_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32);
}
#[test]
#[target_feature = "+tbm"]
fn _blsfill_u64() {
assert_eq!(tbm::_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
assert_eq!(tbm::_blsfill_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
}
#[test]
#[target_feature = "+tbm"]
fn _blsic_u32() {
assert_eq!(tbm::_blsic_u32(0b0101_0100u32), 0b1111_1111_1111_1111_1111_1111_1111_1011u32);
assert_eq!(tbm::_blsic_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32);
}
#[test]
#[target_feature = "+tbm"]
fn _blsic_u64() {
assert_eq!(tbm::_blsic_u64(0b0101_0100u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64);
assert_eq!(tbm::_blsic_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
}
#[test]
#[target_feature = "+tbm"]
fn _t1mskc_u32() {
assert_eq!(tbm::_t1mskc_u32(0b0101_0111u32), 0b1111_1111_1111_1111_1111_1111_1111_1000u32);
assert_eq!(tbm::_t1mskc_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32);
}
#[test]
#[target_feature = "+tbm"]
fn _t1mksc_u64() {
assert_eq!(tbm::_t1mskc_u64(0b0101_0111u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64);
assert_eq!(tbm::_t1mskc_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
}
#[test]
#[target_feature = "+tbm"]
fn _tzmsk_u32() {
assert_eq!(tbm::_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32);
assert_eq!(tbm::_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32);
}
#[test]
#[target_feature = "+tbm"]
fn _tzmsk_u64() {
assert_eq!(tbm::_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64);
assert_eq!(tbm::_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64);
}
}