diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index 8ecf0850f680..e8c9c518e8c6 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] #![feature( const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi, - target_feature, cfg_target_feature + target_feature, cfg_target_feature, i128_type )] /// Platform independent SIMD vector types and operations. diff --git a/library/stdarch/src/x86/bmi2.rs b/library/stdarch/src/x86/bmi2.rs new file mode 100644 index 000000000000..81e6f7c6b168 --- /dev/null +++ b/library/stdarch/src/x86/bmi2.rs @@ -0,0 +1,193 @@ +/// Unsigned multiply without affecting flags. +/// +/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with +/// the low half and the high half of the result. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _mulx_u32(a: u32, b: u32) -> (u32, u32) { + let result: u64 = (a as u64) * (b as u64); + let hi = (result >> 32) as u32; + (result as u32, hi) +} + +/// Unsigned multiply without affecting flags. +/// +/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with +/// the low half and the high half of the result. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _mulx_u64(a: u64, b: u64) -> (u64, u64) { + let result: u128 = (a as u128) * (b as u128); + let hi = (result >> 64) as u64; + (result as u64, hi) +} + +#[allow(dead_code)] +extern "platform-intrinsic" { + fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32; + fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64; + fn x86_bmi2_pdep_32(x: u32, y: u32) -> u32; + fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64; + fn x86_bmi2_pext_32(x: u32, y: u32) -> u32; + fn x86_bmi2_pext_64(x: u64, y: u64) -> u64; +} + + +/// Zero higher bits of `a` >= `index`. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _bzhi_u32(a: u32, index: u32) -> u32 { + unsafe { x86_bmi2_bzhi_32(a, index) } +} + +/// Zero higher bits of `a` >= `index`. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _bzhi_u64(a: u64, index: u64) -> u64 { + unsafe { x86_bmi2_bzhi_64(a, index) } +} + + +/// Scatter contiguous low order bits of `a` to the result at the positions +/// specified by the `mask`. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _pdep_u32(a: u32, mask: u32) -> u32 { + unsafe { x86_bmi2_pdep_32(a, mask) } +} + +/// Scatter contiguous low order bits of `a` to the result at the positions +/// specified by the `mask`. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _pdep_u64(a: u64, mask: u64) -> u64 { + unsafe { x86_bmi2_pdep_64(a, mask) } +} + +/// Gathers the bits of `x` specified by the `mask` into the contiguous low +/// order bit positions of the result. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _pext_u32(a: u32, mask: u32) -> u32 { + unsafe { x86_bmi2_pext_32(a, mask) } +} + +/// Gathers the bits of `x` specified by the `mask` into the contiguous low +/// order bit positions of the result. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _pext_u64(a: u64, mask: u64) -> u64 { + unsafe { x86_bmi2_pext_64(a, mask) } +} + + +#[cfg(all(test, target_feature = "bmi2", any(target_arch = "x86", target_arch = "x86_64")))] +mod tests { + use x86::bmi2; + + #[test] + #[target_feature = "+bmi2"] + fn _pext_u32() { + let n = 0b1011_1110_1001_0011u32; + + let m0 = 0b0110_0011_1000_0101u32; + let s0 = 0b0000_0000_0011_0101u32; + + let m1 = 0b1110_1011_1110_1111u32; + let s1 = 0b0001_0111_0100_0011u32; + + assert_eq!(bmi2::_pext_u32(n, m0), s0); + assert_eq!(bmi2::_pext_u32(n, m1), s1); + } + + #[test] + #[target_feature = "+bmi2"] + fn _pext_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0000_0011_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b0001_0111_0100_0011u64; + + assert_eq!(bmi2::_pext_u64(n, m0), s0); + assert_eq!(bmi2::_pext_u64(n, m1), s1); + } + + #[test] + #[target_feature = "+bmi2"] + fn _pdep_u32() { + let n = 0b1011_1110_1001_0011u32; + + let m0 = 0b0110_0011_1000_0101u32; + let s0 = 0b0000_0010_0000_0101u32; + + let m1 = 0b1110_1011_1110_1111u32; + let s1 = 0b1110_1001_0010_0011u32; + + assert_eq!(bmi2::_pdep_u32(n, m0), s0); + assert_eq!(bmi2::_pdep_u32(n, m1), s1); + } + + + #[test] + #[target_feature = "+bmi2"] + fn _pdep_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0010_0000_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b1110_1001_0010_0011u64; + + assert_eq!(bmi2::_pdep_u64(n, m0), s0); + assert_eq!(bmi2::_pdep_u64(n, m1), s1); + } + + + #[test] + #[target_feature = "+bmi2"] + fn _bzhi_u32() { + let n = 0b1111_0010u32; + let s = 0b0001_0010u32; + assert_eq!(bmi2::_bzhi_u32(n, 5), s); + } + + #[test] + #[target_feature = "+bmi2"] + fn _bzhi_u64() { + let n = 0b1111_0010u64; + let s = 0b0001_0010u64; + assert_eq!(bmi2::_bzhi_u64(n, 5), s); + } + + + #[test] + #[target_feature = "+bmi2"] + fn _mulx_u32() { + let a: u32 = 4_294_967_200; + let b: u32 = 2; + let (lo, hi): (u32, u32) = bmi2::_mulx_u32(a, b); + // result = 8589934400 + // = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64 + // ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + assert_eq!(lo, 0b1111_1111_1111_1111_1111_1111_0100_0000u32); + assert_eq!(hi, 0b0001u32); + } + + #[test] + #[target_feature = "+bmi2"] + fn _mulx_u64() { + let a: u64 = 9_223_372_036_854_775_800; + let b: u64 = 100; + let (lo, hi): (u64, u64) = bmi2::_mulx_u64(a, b); + // result = 922337203685477580000 + // = 0b00110001_11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u128 + // ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + assert_eq!(lo, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64); + assert_eq!(hi, 0b00110001u64); + } + +} diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs index fd1eb95e544a..f9a10f18a3b2 100644 --- a/library/stdarch/src/x86/mod.rs +++ b/library/stdarch/src/x86/mod.rs @@ -21,3 +21,4 @@ mod avx; mod avx2; mod bmi; +mod bmi2;