From ad5ff722de4220adb1677026d4d68989f73fba7d Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 16:30:59 +0200 Subject: [PATCH 01/25] run tests only in architectures that support them --- library/stdarch/src/lib.rs | 2 +- library/stdarch/src/x86/avx.rs | 7 ++----- library/stdarch/src/x86/avx2.rs | 2 +- library/stdarch/src/x86/sse.rs | 2 +- library/stdarch/src/x86/sse2.rs | 2 +- library/stdarch/src/x86/sse41.rs | 4 ++-- library/stdarch/src/x86/sse42.rs | 2 +- library/stdarch/src/x86/ssse3.rs | 2 +- 8 files changed, 10 insertions(+), 13 deletions(-) diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index 1aa713742844..8ecf0850f680 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] #![feature( const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi, - target_feature, + target_feature, cfg_target_feature )] /// Platform independent SIMD vector types and operations. diff --git a/library/stdarch/src/x86/avx.rs b/library/stdarch/src/x86/avx.rs index 6ec764c3776b..7b23d1e6cde5 100644 --- a/library/stdarch/src/x86/avx.rs +++ b/library/stdarch/src/x86/avx.rs @@ -31,7 +31,7 @@ extern "C" { } -#[cfg(test)] +#[cfg(all(test, target_feature = "avx", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { use v256::*; use x86::avx; @@ -65,7 +65,4 @@ mod tests { let e = f64x4::new(-4.0,8.0,-4.0,12.0); assert_eq!(r, e); } - - - -} \ No newline at end of file +} diff --git a/library/stdarch/src/x86/avx2.rs b/library/stdarch/src/x86/avx2.rs index e07f26a67dec..ac81ccb9dea1 100644 --- a/library/stdarch/src/x86/avx2.rs +++ b/library/stdarch/src/x86/avx2.rs @@ -1044,7 +1044,7 @@ extern "C" { } -#[cfg(test)] +#[cfg(all(test, target_feature = "avx2", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { use v256::*; use v128::*; diff --git a/library/stdarch/src/x86/sse.rs b/library/stdarch/src/x86/sse.rs index c03735624c75..d1af0e1a9b61 100644 --- a/library/stdarch/src/x86/sse.rs +++ b/library/stdarch/src/x86/sse.rs @@ -66,7 +66,7 @@ extern { fn movmskps(a: f32x4) -> i32; } -#[cfg(test)] +#[cfg(all(test, target_feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { use v128::*; use x86::sse; diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs index b564677ea786..882259fa5502 100644 --- a/library/stdarch/src/x86/sse2.rs +++ b/library/stdarch/src/x86/sse2.rs @@ -1716,7 +1716,7 @@ extern { fn movmskpd(a: f64x2) -> i32; } -#[cfg(test)] +#[cfg(all(test, target_feature = "sse2", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { use std::os::raw::c_void; diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index 61217f26f596..b7dc45263e39 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -16,13 +16,13 @@ extern { fn pblendvb(a: __m128i, b: __m128i, mask: __m128i) -> __m128i; } -#[cfg(test)] +#[cfg(all(test, target_feature = "sse4.1", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { use v128::*; use x86::sse41; #[test] - #[target_feature = "+sse4.2"] + #[target_feature = "+sse4.1"] fn _mm_blendv_epi8() { let a = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); diff --git a/library/stdarch/src/x86/sse42.rs b/library/stdarch/src/x86/sse42.rs index 4789fd32d537..242906119f7c 100644 --- a/library/stdarch/src/x86/sse42.rs +++ b/library/stdarch/src/x86/sse42.rs @@ -304,7 +304,7 @@ extern { fn pcmpestri128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32; } -#[cfg(test)] +#[cfg(all(test, target_feature = "sse4.2", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { use v128::*; use x86::{__m128i, sse42}; diff --git a/library/stdarch/src/x86/ssse3.rs b/library/stdarch/src/x86/ssse3.rs index b4b9ce21d421..1d1497f5d0d1 100644 --- a/library/stdarch/src/x86/ssse3.rs +++ b/library/stdarch/src/x86/ssse3.rs @@ -50,7 +50,7 @@ extern { fn pshufb128(a: u8x16, b: u8x16) -> u8x16; } -#[cfg(test)] +#[cfg(all(test, target_feature = "ssse3", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { use v128::*; use x86::ssse3 as ssse3; From 14144a3b38c7b900bf3b9b9fb2a7bbc06717800f Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 17:29:15 +0200 Subject: [PATCH 02/25] [bmi] implement all bmi1 intrinsics --- library/stdarch/src/x86/bmi.rs | 257 +++++++++++++++++++++++++++++++++ library/stdarch/src/x86/mod.rs | 3 + 2 files changed, 260 insertions(+) create mode 100644 library/stdarch/src/x86/bmi.rs diff --git a/library/stdarch/src/x86/bmi.rs b/library/stdarch/src/x86/bmi.rs new file mode 100644 index 000000000000..6e2eba8e504d --- /dev/null +++ b/library/stdarch/src/x86/bmi.rs @@ -0,0 +1,257 @@ +#[allow(dead_code)] +extern "platform-intrinsic" { + fn x86_bmi_bextr_32(x: u32, y: u32) -> u32; + fn x86_bmi_bextr_64(x: u64, y: u64) -> u64; +} + +/// Extracts bits in range [`start`, `start` + `length`) from `a` into +/// the least significant bits of the result. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { + _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32)) +} + +/// Extracts bits in range [`start`, `start` + `length`) from `a` into +/// the least significant bits of the result. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { + _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64)) +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits [7,0] of `control` specify the index to the first bit in the range to be +/// extracted, and bits [15,8] specify the length of the range. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _bextr2_u32(a: u32, control: u32) -> u32 { + unsafe { x86_bmi_bextr_32(a, control) } +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits [7,0] of `control` specify the index to the first bit in the range to be +/// extracted, and bits [15,8] specify the length of the range. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _bextr2_u64(a: u64, control: u64) -> u64 { + unsafe { x86_bmi_bextr_64(a, control) } +} + +/// Bitwise logical `AND` of inverted `a` with `b`. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _andn_u32(a: u32, b: u32) -> u32 { + !a & b +} + +/// Bitwise logical `AND` of inverted `a` with `b`. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _andn_u64(a: u64, b: u64) -> u64 { + !a & b +} + +/// Extract lowest set isolated bit. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _blsi_u32(x: u32) -> u32 { + x & x.wrapping_neg() +} + +/// Extract lowest set isolated bit. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _blsi_u64(x: u64) -> u64 { + x & x.wrapping_neg() +} + +/// Get mask up to lowest set bit. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _blsmsk_u32(x: u32) -> u32 { + x ^ (x.wrapping_sub(1u32)) +} + +/// Get mask up to lowest set bit. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _blsmsk_u64(x: u64) -> u64 { + x ^ (x.wrapping_sub(1u64)) +} + +/// Resets the lowest set bit of `x`. +/// +/// If `x` is sets CF. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _blsr_u32(x: u32) -> u32 { + x & (x.wrapping_sub(1)) +} + +/// Resets the lowest set bit of `x`. +/// +/// If `x` is sets CF. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _blsr_u64(x: u64) -> u64 { + x & (x.wrapping_sub(1)) +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is 0, it returns its size in bits. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _tzcnt_u16(x: u16) -> u16 { + x.trailing_zeros() as u16 +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is 0, it returns its size in bits. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _tzcnt_u32(x: u32) -> u32 { + x.trailing_zeros() +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is 0, it returns its size in bits. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _tzcnt_u64(x: u64) -> u64 { + x.trailing_zeros() as u64 +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is 0, it returns its size in bits. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _mm_tzcnt_u32(x: u32) -> u32 { + x.trailing_zeros() +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is 0, it returns its size in bits. +#[inline(always)] +#[target_feature = "+bmi"] +pub fn _mm_tzcnt_u64(x: u64) -> u64 { + x.trailing_zeros() as u64 +} + +#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))] +mod tests { + use x86::bmi; + + #[test] + #[target_feature = "+bmi"] + fn _bextr_u32() { + assert_eq!(bmi::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32); + } + + #[test] + #[target_feature = "+bmi"] + fn _bextr_u64() { + assert_eq!(bmi::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); + } + + #[test] + #[target_feature = "+bmi"] + fn _andn_u32() { + assert_eq!(bmi::_andn_u32(0, 0), 0); + assert_eq!(bmi::_andn_u32(0, 1), 1); + assert_eq!(bmi::_andn_u32(1, 0), 0); + assert_eq!(bmi::_andn_u32(1, 1), 0); + + assert_eq!(bmi::_andn_u32(0b0000_0000u32, 0b0000_0000u32), 0b0000_0000u32); + assert_eq!(bmi::_andn_u32(0b0000_0000u32, 0b1111_1111u32), 0b1111_1111u32); + assert_eq!(bmi::_andn_u32(0b1111_1111u32, 0b0000_0000u32), 0b0000_0000u32); + assert_eq!(bmi::_andn_u32(0b1111_1111u32, 0b1111_1111u32), 0b0000_0000u32); + assert_eq!(bmi::_andn_u32(0b0100_0000u32, 0b0101_1101u32), 0b0001_1101u32); + } + + #[test] + #[target_feature = "+bmi"] + fn _andn_u64() { + assert_eq!(bmi::_andn_u64(0, 0), 0); + assert_eq!(bmi::_andn_u64(0, 1), 1); + assert_eq!(bmi::_andn_u64(1, 0), 0); + assert_eq!(bmi::_andn_u64(1, 1), 0); + + assert_eq!(bmi::_andn_u64(0b0000_0000u64, 0b0000_0000u64), 0b0000_0000u64); + assert_eq!(bmi::_andn_u64(0b0000_0000u64, 0b1111_1111u64), 0b1111_1111u64); + assert_eq!(bmi::_andn_u64(0b1111_1111u64, 0b0000_0000u64), 0b0000_0000u64); + assert_eq!(bmi::_andn_u64(0b1111_1111u64, 0b1111_1111u64), 0b0000_0000u64); + assert_eq!(bmi::_andn_u64(0b0100_0000u64, 0b0101_1101u64), 0b0001_1101u64); + } + + #[test] + #[target_feature = "+bmi"] + fn _blsi_u32() { + assert_eq!(bmi::_blsi_u32(0b1101_0000u32), 0b0001_0000u32); + } + + #[test] + #[target_feature = "+bmi"] + fn _blsi_u64() { + assert_eq!(bmi::_blsi_u64(0b1101_0000u64), 0b0001_0000u64); + } + + #[test] + #[target_feature = "+bmi"] + fn _blsmsk_u32() { + assert_eq!(bmi::_blsmsk_u32(0b0011_0000u32), 0b0001_1111u32); + } + + #[test] + #[target_feature = "+bmi"] + fn _blsmsk_u64() { + assert_eq!(bmi::_blsmsk_u64(0b0011_0000u64), 0b0001_1111u64); + } + + #[test] + #[target_feature = "+bmi"] + fn _blsr_u32() { + /// TODO: test the behavior when the input is 0 + assert_eq!(bmi::_blsr_u32(0b0011_0000u32), 0b0010_0000u32); + } + + #[test] + #[target_feature = "+bmi"] + fn _blsr_u64() { + /// TODO: test the behavior when the input is 0 + assert_eq!(bmi::_blsr_u64(0b0011_0000u64), 0b0010_0000u64); + } + + #[test] + #[target_feature = "+bmi"] + fn _tzcnt_u16() { + assert_eq!(bmi::_tzcnt_u16(0b0000_0001u16), 0u16); + assert_eq!(bmi::_tzcnt_u16(0b0000_0000u16), 16u16); + assert_eq!(bmi::_tzcnt_u16(0b1001_0000u16), 4u16); + } + + #[test] + #[target_feature = "+bmi"] + fn _tzcnt_u32() { + assert_eq!(bmi::_tzcnt_u32(0b0000_0001u32), 0u32); + assert_eq!(bmi::_tzcnt_u32(0b0000_0000u32), 32u32); + assert_eq!(bmi::_tzcnt_u32(0b1001_0000u32), 4u32); + } + + #[test] + #[target_feature = "+bmi"] + fn _tzcnt_u64() { + assert_eq!(bmi::_tzcnt_u64(0b0000_0001u64), 0u64); + assert_eq!(bmi::_tzcnt_u64(0b0000_0000u64), 64u64); + assert_eq!(bmi::_tzcnt_u64(0b1001_0000u64), 4u64); + } +} diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs index 15d335b172b0..fd1eb95e544a 100644 --- a/library/stdarch/src/x86/mod.rs +++ b/library/stdarch/src/x86/mod.rs @@ -5,6 +5,7 @@ pub use self::sse41::*; pub use self::sse42::*; pub use self::avx::*; pub use self::avx2::*; +pub use self::bmi::*; #[allow(non_camel_case_types)] pub type __m128i = ::v128::i8x16; @@ -18,3 +19,5 @@ mod sse41; mod sse42; mod avx; mod avx2; + +mod bmi; From bf68b58e6bdde8dc1298c5972d97f9a301ed003d Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 18:02:14 +0200 Subject: [PATCH 03/25] [bmi2] implement all bmi2 instructions --- library/stdarch/src/lib.rs | 2 +- library/stdarch/src/x86/bmi2.rs | 193 ++++++++++++++++++++++++++++++++ library/stdarch/src/x86/mod.rs | 1 + 3 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 library/stdarch/src/x86/bmi2.rs diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index 8ecf0850f680..e8c9c518e8c6 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] #![feature( const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi, - target_feature, cfg_target_feature + target_feature, cfg_target_feature, i128_type )] /// Platform independent SIMD vector types and operations. diff --git a/library/stdarch/src/x86/bmi2.rs b/library/stdarch/src/x86/bmi2.rs new file mode 100644 index 000000000000..81e6f7c6b168 --- /dev/null +++ b/library/stdarch/src/x86/bmi2.rs @@ -0,0 +1,193 @@ +/// Unsigned multiply without affecting flags. +/// +/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with +/// the low half and the high half of the result. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _mulx_u32(a: u32, b: u32) -> (u32, u32) { + let result: u64 = (a as u64) * (b as u64); + let hi = (result >> 32) as u32; + (result as u32, hi) +} + +/// Unsigned multiply without affecting flags. +/// +/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with +/// the low half and the high half of the result. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _mulx_u64(a: u64, b: u64) -> (u64, u64) { + let result: u128 = (a as u128) * (b as u128); + let hi = (result >> 64) as u64; + (result as u64, hi) +} + +#[allow(dead_code)] +extern "platform-intrinsic" { + fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32; + fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64; + fn x86_bmi2_pdep_32(x: u32, y: u32) -> u32; + fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64; + fn x86_bmi2_pext_32(x: u32, y: u32) -> u32; + fn x86_bmi2_pext_64(x: u64, y: u64) -> u64; +} + + +/// Zero higher bits of `a` >= `index`. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _bzhi_u32(a: u32, index: u32) -> u32 { + unsafe { x86_bmi2_bzhi_32(a, index) } +} + +/// Zero higher bits of `a` >= `index`. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _bzhi_u64(a: u64, index: u64) -> u64 { + unsafe { x86_bmi2_bzhi_64(a, index) } +} + + +/// Scatter contiguous low order bits of `a` to the result at the positions +/// specified by the `mask`. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _pdep_u32(a: u32, mask: u32) -> u32 { + unsafe { x86_bmi2_pdep_32(a, mask) } +} + +/// Scatter contiguous low order bits of `a` to the result at the positions +/// specified by the `mask`. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _pdep_u64(a: u64, mask: u64) -> u64 { + unsafe { x86_bmi2_pdep_64(a, mask) } +} + +/// Gathers the bits of `x` specified by the `mask` into the contiguous low +/// order bit positions of the result. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _pext_u32(a: u32, mask: u32) -> u32 { + unsafe { x86_bmi2_pext_32(a, mask) } +} + +/// Gathers the bits of `x` specified by the `mask` into the contiguous low +/// order bit positions of the result. +#[inline(always)] +#[target_feature = "+bmi2"] +pub fn _pext_u64(a: u64, mask: u64) -> u64 { + unsafe { x86_bmi2_pext_64(a, mask) } +} + + +#[cfg(all(test, target_feature = "bmi2", any(target_arch = "x86", target_arch = "x86_64")))] +mod tests { + use x86::bmi2; + + #[test] + #[target_feature = "+bmi2"] + fn _pext_u32() { + let n = 0b1011_1110_1001_0011u32; + + let m0 = 0b0110_0011_1000_0101u32; + let s0 = 0b0000_0000_0011_0101u32; + + let m1 = 0b1110_1011_1110_1111u32; + let s1 = 0b0001_0111_0100_0011u32; + + assert_eq!(bmi2::_pext_u32(n, m0), s0); + assert_eq!(bmi2::_pext_u32(n, m1), s1); + } + + #[test] + #[target_feature = "+bmi2"] + fn _pext_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0000_0011_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b0001_0111_0100_0011u64; + + assert_eq!(bmi2::_pext_u64(n, m0), s0); + assert_eq!(bmi2::_pext_u64(n, m1), s1); + } + + #[test] + #[target_feature = "+bmi2"] + fn _pdep_u32() { + let n = 0b1011_1110_1001_0011u32; + + let m0 = 0b0110_0011_1000_0101u32; + let s0 = 0b0000_0010_0000_0101u32; + + let m1 = 0b1110_1011_1110_1111u32; + let s1 = 0b1110_1001_0010_0011u32; + + assert_eq!(bmi2::_pdep_u32(n, m0), s0); + assert_eq!(bmi2::_pdep_u32(n, m1), s1); + } + + + #[test] + #[target_feature = "+bmi2"] + fn _pdep_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0010_0000_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b1110_1001_0010_0011u64; + + assert_eq!(bmi2::_pdep_u64(n, m0), s0); + assert_eq!(bmi2::_pdep_u64(n, m1), s1); + } + + + #[test] + #[target_feature = "+bmi2"] + fn _bzhi_u32() { + let n = 0b1111_0010u32; + let s = 0b0001_0010u32; + assert_eq!(bmi2::_bzhi_u32(n, 5), s); + } + + #[test] + #[target_feature = "+bmi2"] + fn _bzhi_u64() { + let n = 0b1111_0010u64; + let s = 0b0001_0010u64; + assert_eq!(bmi2::_bzhi_u64(n, 5), s); + } + + + #[test] + #[target_feature = "+bmi2"] + fn _mulx_u32() { + let a: u32 = 4_294_967_200; + let b: u32 = 2; + let (lo, hi): (u32, u32) = bmi2::_mulx_u32(a, b); + // result = 8589934400 + // = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64 + // ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + assert_eq!(lo, 0b1111_1111_1111_1111_1111_1111_0100_0000u32); + assert_eq!(hi, 0b0001u32); + } + + #[test] + #[target_feature = "+bmi2"] + fn _mulx_u64() { + let a: u64 = 9_223_372_036_854_775_800; + let b: u64 = 100; + let (lo, hi): (u64, u64) = bmi2::_mulx_u64(a, b); + // result = 922337203685477580000 + // = 0b00110001_11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u128 + // ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + assert_eq!(lo, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64); + assert_eq!(hi, 0b00110001u64); + } + +} diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs index fd1eb95e544a..f9a10f18a3b2 100644 --- a/library/stdarch/src/x86/mod.rs +++ b/library/stdarch/src/x86/mod.rs @@ -21,3 +21,4 @@ mod avx; mod avx2; mod bmi; +mod bmi2; From 778d55a0bcd390e4fde8c813a03f9d4c15484dbf Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 18:25:58 +0200 Subject: [PATCH 04/25] [abm] support popcnt and lzcnt --- library/stdarch/src/x86/abm.rs | 71 ++++++++++++++++++++++++++++++++++ library/stdarch/src/x86/mod.rs | 1 + 2 files changed, 72 insertions(+) create mode 100644 library/stdarch/src/x86/abm.rs diff --git a/library/stdarch/src/x86/abm.rs b/library/stdarch/src/x86/abm.rs new file mode 100644 index 000000000000..0ea94e75a9c4 --- /dev/null +++ b/library/stdarch/src/x86/abm.rs @@ -0,0 +1,71 @@ +//! Advanced Bit Manipulation (ABM) instructions +//! +//! That is, POPCNT and LZCNT. These instructions have their own CPUID bits to +//! indicate support. +//! +//! TODO: it is unclear which target feature to use here. SSE4.2 should be good +//! enough but we might need to use BMI for LZCNT if there are any problems. +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf). +//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf). +//! +//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29) +//! provides a quick overview of the instructions available. + + +/// Counts the leading most significant zero bits. +/// +/// When the operand is zero, it returns its size in bits. +#[inline(always)] +#[target_feature = "+sse4.2"] +pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() } + +/// Counts the leading most significant zero bits. +/// +/// When the operand is zero, it returns its size in bits. +#[inline(always)] +#[target_feature = "+sse4.2"] +pub fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 } + +/// Counts the bits that are set. +#[inline(always)] +#[target_feature = "+sse4.2"] +pub fn _popcnt32(x: u32) -> u32 { x.count_ones() } + +/// Counts the bits that are set. +#[inline(always)] +#[target_feature = "+sse4.2"] +pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 } + +#[cfg(all(test, target_feature = "sse4.2", any(target_arch = "x86", target_arch = "x86_64")))] +mod tests { + use x86::abm; + + #[test] + #[target_feature = "+sse4.2"] + fn _lzcnt_u32() { + assert_eq!(abm::_lzcnt_u32(0b0101_1010u32), 25u32); + } + + #[test] + #[target_feature = "+sse4.2"] + fn _lzcnt_u64() { + assert_eq!(abm::_lzcnt_u64(0b0101_1010u64), 57u64); + } + + #[test] + #[target_feature = "+sse4.2"] + fn _popcnt32() { + assert_eq!(abm::_popcnt32(0b0101_1010u32), 4); + } + + #[test] + #[target_feature = "+sse4.2"] + fn _popcnt64() { + assert_eq!(abm::_popcnt64(0b0101_1010u64), 4); + } +} + + diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs index f9a10f18a3b2..0c9b0ab61e33 100644 --- a/library/stdarch/src/x86/mod.rs +++ b/library/stdarch/src/x86/mod.rs @@ -20,5 +20,6 @@ mod sse42; mod avx; mod avx2; +mod abm; mod bmi; mod bmi2; From c66003d2593dae01455aa7d55309ae5a64361ebd Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 18:27:00 +0200 Subject: [PATCH 05/25] [bmi] add module-level docs to bmi and bmi2 modules --- library/stdarch/src/x86/bmi.rs | 9 +++++++++ library/stdarch/src/x86/bmi2.rs | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/library/stdarch/src/x86/bmi.rs b/library/stdarch/src/x86/bmi.rs index 6e2eba8e504d..8dcd4919b69d 100644 --- a/library/stdarch/src/x86/bmi.rs +++ b/library/stdarch/src/x86/bmi.rs @@ -1,3 +1,12 @@ +//! Bit Manipulation Instruction (BMI) Set 1.0. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, +//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf). +//! +//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI1_.28Bit_Manipulation_Instruction_Set_1.29) +//! provides a quick overview of the available instructions. + #[allow(dead_code)] extern "platform-intrinsic" { fn x86_bmi_bextr_32(x: u32, y: u32) -> u32; diff --git a/library/stdarch/src/x86/bmi2.rs b/library/stdarch/src/x86/bmi2.rs index 81e6f7c6b168..3a7a93b37b83 100644 --- a/library/stdarch/src/x86/bmi2.rs +++ b/library/stdarch/src/x86/bmi2.rs @@ -1,3 +1,12 @@ +//! Bit Manipulation Instruction (BMI) Set 2.0. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, +//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf). +//! +//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29) +//! provides a quick overview of the available instructions. + /// Unsigned multiply without affecting flags. /// /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with From 5c483e37738cc37a374c5a51489bed8a8b1384c8 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 20:07:24 +0200 Subject: [PATCH 06/25] [tbm] adds all tbm intrinsics --- library/stdarch/src/x86/mod.rs | 1 + library/stdarch/src/x86/tbm.rs | 369 +++++++++++++++++++++++++++++++++ 2 files changed, 370 insertions(+) create mode 100644 library/stdarch/src/x86/tbm.rs diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs index 0c9b0ab61e33..71e426f9b718 100644 --- a/library/stdarch/src/x86/mod.rs +++ b/library/stdarch/src/x86/mod.rs @@ -23,3 +23,4 @@ mod avx2; mod abm; mod bmi; mod bmi2; +mod tbm; diff --git a/library/stdarch/src/x86/tbm.rs b/library/stdarch/src/x86/tbm.rs new file mode 100644 index 000000000000..2e42bf80f1bf --- /dev/null +++ b/library/stdarch/src/x86/tbm.rs @@ -0,0 +1,369 @@ +//! Trailing Bit Manipulation (TBM) instruction set. +//! +//! The reference is [AMD64 Architecture Programmer's Manual, Volume 3: +//! General-Purpose and System +//! Instructions](http://support.amd.com/TechDocs/24594.pdf). +//! +//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_.28Trailing_Bit_Manipulation.29) +//! provides a quick overview of the available instructions. + +/* // TODO: LLVM-CODEGEN ERROR +#[allow(dead_code)] +extern "platform-intrinsic" { + fn x86_tbm_bextri_u32(a: u32, y: u32) -> u32; + fn x86_tbm_bextri_u64(x: u64, y: u64) -> u64; +} + +/// Extracts bits in range [`start`, `start` + `length`) from `a` into +/// the least significant bits of the result. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { + _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32)) +} + +/// Extracts bits in range [`start`, `start` + `length`) from `a` into +/// the least significant bits of the result. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { + _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64)) +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits [7,0] of `control` specify the index to the first bit in the range to be +/// extracted, and bits [15,8] specify the length of the range. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _bextr2_u32(a: u32, control: u32) -> u32 { + unsafe { x86_tbm_bextri_u32(a, control) } +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits [7,0] of `control` specify the index to the first bit in the range to be +/// extracted, and bits [15,8] specify the length of the range. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _bextr2_u64(a: u64, control: u64) -> u64 { + unsafe { x86_tbm_bextri_u64(a, control) } +} +*/ + +/// Clears all bits below the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blcfill_u32(x: u32) -> u32 { + x & (x.wrapping_add(1)) +} + +/// Clears all bits below the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blcfill_u64(x: u64) -> u64 { + x & (x.wrapping_add(1)) +} + +/// Sets all bits of `x` to 1 except for the least significant zero bit. +/// +/// If there is no zero bit in `x`, it sets all bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blci_u32(x: u32) -> u32 { + x | !(x.wrapping_add(1)) +} + +/// Sets all bits of `x` to 1 except for the least significant zero bit. +/// +/// If there is no zero bit in `x`, it sets all bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blci_u64(x: u64) -> u64 { + x | !(x.wrapping_add(1)) +} + +/// Sets the least significant zero bit of `x` and clears all other bits. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blcic_u32(x: u32) -> u32 { + !x & (x.wrapping_add(1)) +} + +/// Sets the least significant zero bit of `x` and clears all other bits. +/// +/// If there is no zero bit in `x`, it returns zero. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blcic_u64(x: u64) -> u64 { + !x & (x.wrapping_add(1)) +} + +/// Sets the least significant zero bit of `x` and clears all bits above that bit. +/// +/// If there is no zero bit in `x`, it sets all the bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blcmsk_u32(x: u32) -> u32 { + x ^ (x.wrapping_add(1)) +} + +/// Sets the least significant zero bit of `x` and clears all bits above that bit. +/// +/// If there is no zero bit in `x`, it sets all the bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blcmsk_u64(x: u64) -> u64 { + x ^ (x.wrapping_add(1)) +} + +/// Sets the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns `x`. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blcs_u32(x: u32) -> u32 { + x | (x.wrapping_add(1)) +} + +/// Sets the least significant zero bit of `x`. +/// +/// If there is no zero bit in `x`, it returns `x`. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blcs_u64(x: u64) -> u64 { + x | x.wrapping_add(1) +} + +/// Sets all bits of `x` below the least significant one. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blsfill_u32(x: u32) -> u32 { + x | (x.wrapping_sub(1)) +} + +/// Sets all bits of `x` below the least significant one. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blsfill_u64(x: u64) -> u64 { + x | (x.wrapping_sub(1)) +} + +/// Clears least significant bit and sets all other bits. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blsic_u32(x: u32) -> u32 { + !x | (x.wrapping_sub(1)) +} + +/// Clears least significant bit and sets all other bits. +/// +/// If there is no set bit in `x`, it sets all the bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _blsic_u64(x: u64) -> u64 { + !x | (x.wrapping_sub(1)) +} + +/// Clears all bits below the least significant zero of `x` and sets all other +/// bits. +/// +/// If the least significant bit of `x` is 0, it sets all bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _t1mskc_u32(x: u32) -> u32 { + !x | (x.wrapping_add(1)) +} + +/// Clears all bits below the least significant zero of `x` and sets all other +/// bits. +/// +/// If the least significant bit of `x` is 0, it sets all bits. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _t1mskc_u64(x: u64) -> u64 { + !x | (x.wrapping_add(1)) +} + +/// Sets all bits below the least significant one of `x` and clears all other +/// bits. +/// +/// If the least significant bit of `x` is 1, it returns zero. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _tzmsk_u32(x: u32) -> u32 { + !x & (x.wrapping_sub(1)) +} + +/// Sets all bits below the least significant one of `x` and clears all other +/// bits. +/// +/// If the least significant bit of `x` is 1, it returns zero. +#[inline(always)] +#[target_feature = "+tbm"] +pub fn _tzmsk_u64(x: u64) -> u64 { + !x & (x.wrapping_sub(1)) +} + +#[cfg(all(test, target_feature = "tbm", any(target_arch = "x86", target_arch = "x86_64")))] +mod tests { + use x86::tbm; + + /* + #[test] + #[target_feature = "+tbm"] + fn _bextr_u32() { + assert_eq!(tbm::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _bextr_u64() { + assert_eq!(tbm::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); + } + */ + + #[test] + #[target_feature = "+tbm"] + fn _blcfill_u32() { + assert_eq!(tbm::_blcfill_u32(0b0101_0111u32), 0b0101_0000u32); + assert_eq!(tbm::_blcfill_u32(0b1111_1111u32), 0u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _blcfill_u64() { + assert_eq!(tbm::_blcfill_u64(0b0101_0111u64), 0b0101_0000u64); + assert_eq!(tbm::_blcfill_u64(0b1111_1111u64), 0u64); + } + + #[test] + #[target_feature = "+tbm"] + fn _blci_u32() { + assert_eq!(tbm::_blci_u32(0b0101_0000u32), + 0b1111_1111_1111_1111_1111_1111_1111_1110u32); + assert_eq!(tbm::_blci_u32(0b1111_1111u32), + 0b1111_1111_1111_1111_1111_1110_1111_1111u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _blci_u64() { + assert_eq!(tbm::_blci_u64(0b0101_0000u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64); + assert_eq!(tbm::_blci_u64(0b1111_1111u64), + 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64); + } + + #[test] + #[target_feature = "+tbm"] + fn _blcic_u32() { + assert_eq!(tbm::_blcic_u32(0b0101_0001u32), 0b0000_0010u32); + assert_eq!(tbm::_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _blcic_u64() { + assert_eq!(tbm::_blcic_u64(0b0101_0001u64), 0b0000_0010u64); + assert_eq!(tbm::_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64); + } + + #[test] + #[target_feature = "+tbm"] + fn _blcmsk_u32() { + assert_eq!(tbm::_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32); + assert_eq!(tbm::_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _blcmsk_u64() { + assert_eq!(tbm::_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64); + assert_eq!(tbm::_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64); + } + + #[test] + #[target_feature = "+tbm"] + fn _blcs_u32() { + assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32); + assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _blcs_u64() { + assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64); + assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64); + } + + #[test] + #[target_feature = "+tbm"] + fn _blsfill_u32() { + assert_eq!(tbm::_blsfill_u32(0b0101_0100u32), 0b0101_0111u32); + assert_eq!(tbm::_blsfill_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _blsfill_u64() { + assert_eq!(tbm::_blsfill_u64(0b0101_0100u64), 0b0101_0111u64); + assert_eq!(tbm::_blsfill_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64); + } + + #[test] + #[target_feature = "+tbm"] + fn _blsic_u32() { + assert_eq!(tbm::_blsic_u32(0b0101_0100u32), 0b1111_1111_1111_1111_1111_1111_1111_1011u32); + assert_eq!(tbm::_blsic_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _blsic_u64() { + assert_eq!(tbm::_blsic_u64(0b0101_0100u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64); + assert_eq!(tbm::_blsic_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64); + } + + #[test] + #[target_feature = "+tbm"] + fn _t1mskc_u32() { + assert_eq!(tbm::_t1mskc_u32(0b0101_0111u32), 0b1111_1111_1111_1111_1111_1111_1111_1000u32); + assert_eq!(tbm::_t1mskc_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _t1mksc_u64() { + assert_eq!(tbm::_t1mskc_u64(0b0101_0111u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64); + assert_eq!(tbm::_t1mskc_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64); + } + + #[test] + #[target_feature = "+tbm"] + fn _tzmsk_u32() { + assert_eq!(tbm::_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32); + assert_eq!(tbm::_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32); + } + + #[test] + #[target_feature = "+tbm"] + fn _tzmsk_u64() { + assert_eq!(tbm::_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64); + assert_eq!(tbm::_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64); + } +} From 4446e941edd644c51268e71e6b730e7cbbc71d1d Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 20:36:49 +0200 Subject: [PATCH 07/25] [bmi] use llvm intrinsics instead of platform-intrinsic --- library/stdarch/src/x86/bmi.rs | 4 +++- library/stdarch/src/x86/bmi2.rs | 8 +++++++- library/stdarch/src/x86/tbm.rs | 7 +++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/library/stdarch/src/x86/bmi.rs b/library/stdarch/src/x86/bmi.rs index 8dcd4919b69d..9932d53b503d 100644 --- a/library/stdarch/src/x86/bmi.rs +++ b/library/stdarch/src/x86/bmi.rs @@ -8,8 +8,10 @@ //! provides a quick overview of the available instructions. #[allow(dead_code)] -extern "platform-intrinsic" { +extern "C" { + #[link_name="llvm.x86.bmi.bextr.32"] fn x86_bmi_bextr_32(x: u32, y: u32) -> u32; + #[link_name="llvm.x86.bmi.bextr.64"] fn x86_bmi_bextr_64(x: u64, y: u64) -> u64; } diff --git a/library/stdarch/src/x86/bmi2.rs b/library/stdarch/src/x86/bmi2.rs index 3a7a93b37b83..86dc56abc71e 100644 --- a/library/stdarch/src/x86/bmi2.rs +++ b/library/stdarch/src/x86/bmi2.rs @@ -32,12 +32,18 @@ pub fn _mulx_u64(a: u64, b: u64) -> (u64, u64) { } #[allow(dead_code)] -extern "platform-intrinsic" { +extern "C" { + #[link_name="llvm.x86.bmi.bzhi.32"] fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32; + #[link_name="llvm.x86.bmi.bzhi.64"] fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64; + #[link_name="llvm.x86.bmi.pdep.32"] fn x86_bmi2_pdep_32(x: u32, y: u32) -> u32; + #[link_name="llvm.x86.bmi.pdep.64"] fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64; + #[link_name="llvm.x86.bmi.pext.32"] fn x86_bmi2_pext_32(x: u32, y: u32) -> u32; + #[link_name="llvm.x86.bmi.pext.64"] fn x86_bmi2_pext_64(x: u64, y: u64) -> u64; } diff --git a/library/stdarch/src/x86/tbm.rs b/library/stdarch/src/x86/tbm.rs index 2e42bf80f1bf..cfcba05686d0 100644 --- a/library/stdarch/src/x86/tbm.rs +++ b/library/stdarch/src/x86/tbm.rs @@ -7,10 +7,13 @@ //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_.28Trailing_Bit_Manipulation.29) //! provides a quick overview of the available instructions. -/* // TODO: LLVM-CODEGEN ERROR +// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32 +/* #[allow(dead_code)] -extern "platform-intrinsic" { +extern "C" { + #[link_name="llvm.x86.tbm.bextri.u32"] fn x86_tbm_bextri_u32(a: u32, y: u32) -> u32; + #[link_name="llvm.x86.tbm.bextri.u64"] fn x86_tbm_bextri_u64(x: u64, y: u64) -> u64; } From 21091b0d710c967c0f4e61904020553c26d6f863 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 20:57:47 +0200 Subject: [PATCH 08/25] [bmi] export all intrinsics --- library/stdarch/src/x86/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs index 71e426f9b718..2cde200c086d 100644 --- a/library/stdarch/src/x86/mod.rs +++ b/library/stdarch/src/x86/mod.rs @@ -5,7 +5,11 @@ pub use self::sse41::*; pub use self::sse42::*; pub use self::avx::*; pub use self::avx2::*; + +pub use self::abm::*; pub use self::bmi::*; +pub use self::bmi2::*; +pub use self::tbm::*; #[allow(non_camel_case_types)] pub type __m128i = ::v128::i8x16; From 9483950d3e16f6ec935a3d43d754183291e4a8f4 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 21:05:35 +0200 Subject: [PATCH 09/25] [bmi2] check generated assembly --- library/stdarch/asm/x86_bmi2_bzhi.asm | 12 +++ library/stdarch/asm/x86_bmi2_bzhi.rs | 11 ++ library/stdarch/asm/x86_bmi2_mulx.asm | 17 +++ library/stdarch/asm/x86_bmi2_mulx.rs | 11 ++ library/stdarch/asm/x86_bmi2_pdep.asm | 12 +++ library/stdarch/asm/x86_bmi2_pdep.rs | 11 ++ library/stdarch/asm/x86_bmi2_pext.asm | 12 +++ library/stdarch/asm/x86_bmi2_pext.rs | 11 ++ library/stdarch/check_asm.py | 148 ++++++++++++++++++++++++++ 9 files changed, 245 insertions(+) create mode 100644 library/stdarch/asm/x86_bmi2_bzhi.asm create mode 100644 library/stdarch/asm/x86_bmi2_bzhi.rs create mode 100644 library/stdarch/asm/x86_bmi2_mulx.asm create mode 100644 library/stdarch/asm/x86_bmi2_mulx.rs create mode 100644 library/stdarch/asm/x86_bmi2_pdep.asm create mode 100644 library/stdarch/asm/x86_bmi2_pdep.rs create mode 100644 library/stdarch/asm/x86_bmi2_pext.asm create mode 100644 library/stdarch/asm/x86_bmi2_pext.rs create mode 100755 library/stdarch/check_asm.py diff --git a/library/stdarch/asm/x86_bmi2_bzhi.asm b/library/stdarch/asm/x86_bmi2_bzhi.asm new file mode 100644 index 000000000000..e9df7c0eeefe --- /dev/null +++ b/library/stdarch/asm/x86_bmi2_bzhi.asm @@ -0,0 +1,12 @@ +_bzhi_u32: + pushq %rbp + movq %rsp, %rbp + bzhil %esi, %edi, %eax + popq %rbp + retq +_bzhi_u64: + pushq %rbp + movq %rsp, %rbp + bzhiq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi2_bzhi.rs b/library/stdarch/asm/x86_bmi2_bzhi.rs new file mode 100644 index 000000000000..98323037c130 --- /dev/null +++ b/library/stdarch/asm/x86_bmi2_bzhi.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn bzhi_u32(x: u32, mask: u32) -> u32 { + stdsimd::vendor::_bzhi_u32(x, mask) +} + +#[no_mangle] +pub fn bzhi_u64(x: u64, mask: u64) -> u64 { + stdsimd::vendor::_bzhi_u64(x, mask) +} diff --git a/library/stdarch/asm/x86_bmi2_mulx.asm b/library/stdarch/asm/x86_bmi2_mulx.asm new file mode 100644 index 000000000000..842e4ebbe2d0 --- /dev/null +++ b/library/stdarch/asm/x86_bmi2_mulx.asm @@ -0,0 +1,17 @@ +_umulx_u32: + pushq %rbp + movq %rsp, %rbp + movl %edi, %ecx + movl %esi, %eax + imulq %rcx, %rax + popq %rbp + retq +_umulx_u64: + pushq %rbp + movq %rsp, %rbp + mulxq %rsi, %rcx, %rax + movq %rcx, (%rdi) + movq %rax, 8(%rdi) + movq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi2_mulx.rs b/library/stdarch/asm/x86_bmi2_mulx.rs new file mode 100644 index 000000000000..08ce65ef3f4d --- /dev/null +++ b/library/stdarch/asm/x86_bmi2_mulx.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn umulx_u32(x: u32, y: u32) -> (u32, u32) { + stdsimd::vendor::_mulx_u32(x, y) +} + +#[no_mangle] +pub fn umulx_u64(x: u64, y: u64) -> (u64, u64) { + stdsimd::vendor::_mulx_u64(x, y) +} diff --git a/library/stdarch/asm/x86_bmi2_pdep.asm b/library/stdarch/asm/x86_bmi2_pdep.asm new file mode 100644 index 000000000000..3011438fa23f --- /dev/null +++ b/library/stdarch/asm/x86_bmi2_pdep.asm @@ -0,0 +1,12 @@ +_pdep_u32: + pushq %rbp + movq %rsp, %rbp + pdepl %esi, %edi, %eax + popq %rbp + retq +_pdep_u64: + pushq %rbp + movq %rsp, %rbp + pdepq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi2_pdep.rs b/library/stdarch/asm/x86_bmi2_pdep.rs new file mode 100644 index 000000000000..05c64e0c5a9e --- /dev/null +++ b/library/stdarch/asm/x86_bmi2_pdep.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn pdep_u32(x: u32, mask: u32) -> u32 { + stdsimd::vendor::_pdep_u32(x, mask) +} + +#[no_mangle] +pub fn pdep_u64(x: u64, mask: u64) -> u64 { + stdsimd::vendor::_pdep_u64(x, mask) +} diff --git a/library/stdarch/asm/x86_bmi2_pext.asm b/library/stdarch/asm/x86_bmi2_pext.asm new file mode 100644 index 000000000000..5df3657e4f22 --- /dev/null +++ b/library/stdarch/asm/x86_bmi2_pext.asm @@ -0,0 +1,12 @@ +_pext_u32: + pushq %rbp + movq %rsp, %rbp + pextl %esi, %edi, %eax + popq %rbp + retq +_pext_u64: + pushq %rbp + movq %rsp, %rbp + pextq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi2_pext.rs b/library/stdarch/asm/x86_bmi2_pext.rs new file mode 100644 index 000000000000..62f795411d89 --- /dev/null +++ b/library/stdarch/asm/x86_bmi2_pext.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn pext_u32(x: u32, mask: u32) -> u32 { + stdsimd::vendor::_pext_u32(x, mask) +} + +#[no_mangle] +pub fn pext_u64(x: u64, mask: u64) -> u64 { + stdsimd::vendor::_pext_u64(x, mask) +} diff --git a/library/stdarch/check_asm.py b/library/stdarch/check_asm.py new file mode 100755 index 000000000000..80529a0b3d6d --- /dev/null +++ b/library/stdarch/check_asm.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# Script to check the assembly generated +import os, sys +import os.path +from subprocess import Popen, PIPE +import argparse + +asm_dir = './asm' + +files = set() +verbose = False +extern_crate = None + +def arm_triplet(arch) : + triples = { 'armv7' : 'armv7-unknown-linux-gnueabihf', + 'armv8' : 'aarch64-unknown-linux-gnu' } + return triples[arch] + + +class File(object): + def __init__(self, path_rs): + self.path_rs = path_rs + self.path_asm_should = os.path.join(os.path.splitext(path_rs)[0] + ".asm") + self.path_asm_output = os.path.join(os.path.splitext(path_rs)[0] + "_output.asm") + self.path_llvmir_output = os.path.join(os.path.splitext(path_rs)[0] + "_ir.ll") + self.name = os.path.splitext(os.path.basename(path_rs))[0] + self.feature = self.name.split("_")[1] + self.arch = self.name.split("_")[0] + + if self.feature == "none": + self.feature = None + + def __str__(self): + return "name: " + self.name + ", path-rs: " + self.path_rs + ", path-asm: " + self.path_asm_should + ', arch: ' + self.arch + ", feature: " + str(self.feature) + + def __hash__(self): + return hash(self.name) + +def find_files(): + for dirpath, dirnames, filenames in os.walk(asm_dir): + for filename in [f for f in filenames if f.endswith(".rs")]: + files.add(File(os.path.join(dirpath, filename))) + +def call(args): + if verbose: + print "command: " + str(args) + p = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True) + lines = p.stdout.readlines() + if verbose and p.returncode != 0: + error = p.stderr.readlines() + print >>sys.stdout, lines + print >>sys.stderr, "ERROR: %s" % error + + +def compile_file(file): + if verbose: + print "Checking: " + str(file) + "..." + + cargo_args = 'cargo rustc --verbose --release -- -C panic=abort ' + if file.feature: + cargo_args = cargo_args + '-C target-feature=+{}'.format(file.feature) + if file.arch == 'armv7' or file.arch == 'armv8': + cargo_args = cargo_args + '--target={}'.format(arm_triplet(file.arch)) + call(str(cargo_args)) + + rustc_args = 'rustc --verbose -C opt-level=3 -C panic="abort" --extern %s=target/release/lib%s.rlib --crate-type lib' % (extern_crate, extern_crate); + if file.feature: + rustc_args = rustc_args + ' -C target-feature=+{}'.format(file.feature) + if file.arch == 'armv7' or file.arch == 'armv8': + rustc_args = rustc_args + ' --target={}'.format(arm_triplet(file.arch)) + rustc_args_asm = rustc_args + ' --emit asm {} -o {}'.format(file.path_rs, file.path_asm_output) + call(rustc_args_asm) + rustc_args_ll = rustc_args + ' --emit llvm-ir {} -o {}'.format(file.path_rs, file.path_llvmir_output) + call(rustc_args_ll) + + + + if verbose: + print "...done!" + +def diff_files(rustc_output, asm_snippet): + with open(rustc_output, 'r') as rustc_output_file: + rustc_output_lines = rustc_output_file.readlines() + + with open(asm_snippet, 'r') as asm_snippet_file: + asm_snippet_lines = asm_snippet_file.readlines() + + # remove all empty lines and lines starting with "." + rustc_output_lines = [l.strip() for l in rustc_output_lines] + rustc_output_lines = [l for l in rustc_output_lines if not l.startswith(".") and not len(l) == 0] + asm_snippet_lines = [l.strip() for l in asm_snippet_lines] + asm_snippet_lines = [l for l in asm_snippet_lines if not l.startswith(".") and not len(l) == 0] + + results_differ = False + + if len(rustc_output_lines) != len(asm_snippet_lines): + results_differ = True + + for line_is, line_should in zip(rustc_output_lines, asm_snippet_lines): + if line_is != line_should: + results_differ = True + + if results_differ: + print "Error: results differ" + print "Is:" + print rustc_output_lines + print "Should:" + print asm_snippet_lines + return False + + return True + +def check_file(file): + compile_file(file) + return diff_files(file.path_asm_output, file.path_asm_should) + +def main(): + + parser = argparse.ArgumentParser(description='Checks ASM code') + parser.add_argument('-verbose', action="store_true", default=False) + parser.add_argument('-extern-crate', dest='extern_crate', default='stdsimd') + results = parser.parse_args() + + global verbose + if results.verbose: + verbose = True + + global extern_crate + extern_crate = results.extern_crate + + find_files() + + if verbose: + for f in files: + print f + error = False + for f in files: + result = check_file(f) + if not result: + error = True + + if error == True: + exit(1) + else: + exit(0) + +if __name__ == "__main__": + main() From cb1db00983509b055ef80c5618d5e89d8a22d735 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 21:11:06 +0200 Subject: [PATCH 10/25] [abm] check generated assembly --- library/stdarch/asm/x86_lzcnt_lzcnt.asm | 12 ++++++++++++ library/stdarch/asm/x86_lzcnt_lzcnt.rs | 11 +++++++++++ library/stdarch/asm/x86_popcnt_popcnt.asm | 12 ++++++++++++ library/stdarch/asm/x86_popcnt_popcnt.rs | 11 +++++++++++ 4 files changed, 46 insertions(+) create mode 100644 library/stdarch/asm/x86_lzcnt_lzcnt.asm create mode 100644 library/stdarch/asm/x86_lzcnt_lzcnt.rs create mode 100644 library/stdarch/asm/x86_popcnt_popcnt.asm create mode 100644 library/stdarch/asm/x86_popcnt_popcnt.rs diff --git a/library/stdarch/asm/x86_lzcnt_lzcnt.asm b/library/stdarch/asm/x86_lzcnt_lzcnt.asm new file mode 100644 index 000000000000..891fb4ee8f5e --- /dev/null +++ b/library/stdarch/asm/x86_lzcnt_lzcnt.asm @@ -0,0 +1,12 @@ +_lzcnt_u32: + pushq %rbp + movq %rsp, %rbp + lzcntl %edi, %eax + popq %rbp + retq +_lzcnt_u64: + pushq %rbp + movq %rsp, %rbp + lzcntq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_lzcnt_lzcnt.rs b/library/stdarch/asm/x86_lzcnt_lzcnt.rs new file mode 100644 index 000000000000..34185009727b --- /dev/null +++ b/library/stdarch/asm/x86_lzcnt_lzcnt.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn lzcnt_u32(x: u32) -> u32 { + stdsimd::vendor::_lzcnt_u32(x) +} + +#[no_mangle] +pub fn lzcnt_u64(x: u64) -> u64 { + stdsimd::vendor::_lzcnt_u64(x) +} diff --git a/library/stdarch/asm/x86_popcnt_popcnt.asm b/library/stdarch/asm/x86_popcnt_popcnt.asm new file mode 100644 index 000000000000..ef8fcf211c54 --- /dev/null +++ b/library/stdarch/asm/x86_popcnt_popcnt.asm @@ -0,0 +1,12 @@ +_popcnt_u32: + pushq %rbp + movq %rsp, %rbp + popcntl %edi, %eax + popq %rbp + retq +_popcnt_u64: + pushq %rbp + movq %rsp, %rbp + popcntq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_popcnt_popcnt.rs b/library/stdarch/asm/x86_popcnt_popcnt.rs new file mode 100644 index 000000000000..9f215be6b2a3 --- /dev/null +++ b/library/stdarch/asm/x86_popcnt_popcnt.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn popcnt_u32(x: u32) -> u32 { + stdsimd::vendor::_popcnt32(x) +} + +#[no_mangle] +pub fn popcnt_u64(x: u64) -> u64 { + stdsimd::vendor::_popcnt64(x) +} From 4f1f53b707da0eb089701ecb9dca0a3bc3862a35 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 21:23:39 +0200 Subject: [PATCH 11/25] [bmi] check assembly of bmi instructions --- library/stdarch/asm/x86_bmi_andn.asm | 12 ++++++++++ library/stdarch/asm/x86_bmi_andn.rs | 12 ++++++++++ library/stdarch/asm/x86_bmi_bextr.asm | 32 +++++++++++++++++++++++++++ library/stdarch/asm/x86_bmi_bextr.rs | 21 ++++++++++++++++++ library/stdarch/asm/x86_bmi_blsi.asm | 12 ++++++++++ library/stdarch/asm/x86_bmi_blsi.rs | 11 +++++++++ library/stdarch/asm/x86_bmi_blsr.asm | 12 ++++++++++ library/stdarch/asm/x86_bmi_blsr.rs | 11 +++++++++ library/stdarch/asm/x86_bmi_tzcnt.asm | 12 ++++++++++ library/stdarch/asm/x86_bmi_tzcnt.rs | 11 +++++++++ 10 files changed, 146 insertions(+) create mode 100644 library/stdarch/asm/x86_bmi_andn.asm create mode 100644 library/stdarch/asm/x86_bmi_andn.rs create mode 100644 library/stdarch/asm/x86_bmi_bextr.asm create mode 100644 library/stdarch/asm/x86_bmi_bextr.rs create mode 100644 library/stdarch/asm/x86_bmi_blsi.asm create mode 100644 library/stdarch/asm/x86_bmi_blsi.rs create mode 100644 library/stdarch/asm/x86_bmi_blsr.asm create mode 100644 library/stdarch/asm/x86_bmi_blsr.rs create mode 100644 library/stdarch/asm/x86_bmi_tzcnt.asm create mode 100644 library/stdarch/asm/x86_bmi_tzcnt.rs diff --git a/library/stdarch/asm/x86_bmi_andn.asm b/library/stdarch/asm/x86_bmi_andn.asm new file mode 100644 index 000000000000..861b2926a88e --- /dev/null +++ b/library/stdarch/asm/x86_bmi_andn.asm @@ -0,0 +1,12 @@ +_andn_u32: + pushq %rbp + movq %rsp, %rbp + andnl %esi, %edi, %eax + popq %rbp + retq +_andn_u64: + pushq %rbp + movq %rsp, %rbp + andnq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_andn.rs b/library/stdarch/asm/x86_bmi_andn.rs new file mode 100644 index 000000000000..e63529c42e60 --- /dev/null +++ b/library/stdarch/asm/x86_bmi_andn.rs @@ -0,0 +1,12 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn andn_u32(x: u32, y: u32) -> u32 { + stdsimd::vendor::_andn_u32(x, y) +} + +#[no_mangle] +pub fn andn_u64(x: u64, y: u64) -> u64 { + stdsimd::vendor::_andn_u64(x, y) +} + diff --git a/library/stdarch/asm/x86_bmi_bextr.asm b/library/stdarch/asm/x86_bmi_bextr.asm new file mode 100644 index 000000000000..4eda97fa5ce5 --- /dev/null +++ b/library/stdarch/asm/x86_bmi_bextr.asm @@ -0,0 +1,32 @@ +_bextr_u32: + pushq %rbp + movq %rsp, %rbp + movzbl %sil, %eax + shll $8, %edx + movzwl %dx, %ecx + orl %eax, %ecx + bextrl %ecx, %edi, %eax + popq %rbp + retq +_bextr_u64: + pushq %rbp + movq %rsp, %rbp + movzbl %sil, %eax + shlq $8, %rdx + movzwl %dx, %ecx + orq %rax, %rcx + bextrq %rcx, %rdi, %rax + popq %rbp + retq +_bextr2_u32: + pushq %rbp + movq %rsp, %rbp + bextrl %esi, %edi, %eax + popq %rbp + retq +_bextr2_u64: + pushq %rbp + movq %rsp, %rbp + bextrq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_bextr.rs b/library/stdarch/asm/x86_bmi_bextr.rs new file mode 100644 index 000000000000..1c661e529682 --- /dev/null +++ b/library/stdarch/asm/x86_bmi_bextr.rs @@ -0,0 +1,21 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn bextr_u32(x: u32, y: u32, z: u32) -> u32 { + stdsimd::vendor::_bextr_u32(x, y, z) +} + +#[no_mangle] +pub fn bextr_u64(x: u64, y: u64, z: u64) -> u64 { + stdsimd::vendor::_bextr_u64(x, y, z) +} + +#[no_mangle] +pub fn bextr2_u32(x: u32, y: u32) -> u32 { + stdsimd::vendor::_bextr2_u32(x, y) +} + +#[no_mangle] +pub fn bextr2_u64(x: u64, y: u64) -> u64 { + stdsimd::vendor::_bextr2_u64(x, y) +} diff --git a/library/stdarch/asm/x86_bmi_blsi.asm b/library/stdarch/asm/x86_bmi_blsi.asm new file mode 100644 index 000000000000..8ab833541188 --- /dev/null +++ b/library/stdarch/asm/x86_bmi_blsi.asm @@ -0,0 +1,12 @@ +_blsi_u32: + pushq %rbp + movq %rsp, %rbp + blsil %edi, %eax + popq %rbp + retq +_blsi_u64: + pushq %rbp + movq %rsp, %rbp + blsiq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_blsi.rs b/library/stdarch/asm/x86_bmi_blsi.rs new file mode 100644 index 000000000000..637051c43769 --- /dev/null +++ b/library/stdarch/asm/x86_bmi_blsi.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn blsi_u32(x: u32) -> u32 { + stdsimd::vendor::_blsi_u32(x) +} + +#[no_mangle] +pub fn blsi_u64(x: u64) -> u64 { + stdsimd::vendor::_blsi_u64(x) +} diff --git a/library/stdarch/asm/x86_bmi_blsr.asm b/library/stdarch/asm/x86_bmi_blsr.asm new file mode 100644 index 000000000000..1918c5b4d207 --- /dev/null +++ b/library/stdarch/asm/x86_bmi_blsr.asm @@ -0,0 +1,12 @@ +_blsr_u32: + pushq %rbp + movq %rsp, %rbp + blsrl %edi, %eax + popq %rbp + retq +_blsr_u64: + pushq %rbp + movq %rsp, %rbp + blsrq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_blsr.rs b/library/stdarch/asm/x86_bmi_blsr.rs new file mode 100644 index 000000000000..48a193869a63 --- /dev/null +++ b/library/stdarch/asm/x86_bmi_blsr.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn blsr_u32(x: u32) -> u32 { + stdsimd::vendor::_blsr_u32(x) +} + +#[no_mangle] +pub fn blsr_u64(x: u64) -> u64 { + stdsimd::vendor::_blsr_u64(x) +} diff --git a/library/stdarch/asm/x86_bmi_tzcnt.asm b/library/stdarch/asm/x86_bmi_tzcnt.asm new file mode 100644 index 000000000000..422e7ea9b33c --- /dev/null +++ b/library/stdarch/asm/x86_bmi_tzcnt.asm @@ -0,0 +1,12 @@ +_tzcnt_u32: + pushq %rbp + movq %rsp, %rbp + tzcntl %edi, %eax + popq %rbp + retq +_tzcnt_u64: + pushq %rbp + movq %rsp, %rbp + tzcntq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_tzcnt.rs b/library/stdarch/asm/x86_bmi_tzcnt.rs new file mode 100644 index 000000000000..d4ac48aa2b5e --- /dev/null +++ b/library/stdarch/asm/x86_bmi_tzcnt.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn tzcnt_u32(x: u32) -> u32 { + stdsimd::vendor::_tzcnt_u32(x) +} + +#[no_mangle] +pub fn tzcnt_u64(x: u64) -> u64 { + stdsimd::vendor::_tzcnt_u64(x) +} From 59f6e217511f0c56c2af7bba4cbb63b3eb1130c2 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 21:33:33 +0200 Subject: [PATCH 12/25] nitpicks, whitespace, etc --- library/stdarch/asm/x86_bmi2_bzhi.asm | 20 ++++----- library/stdarch/asm/x86_bmi2_mulx.asm | 30 ++++++------- library/stdarch/asm/x86_bmi2_pdep.asm | 10 ++--- library/stdarch/asm/x86_bmi2_pext.asm | 10 ++--- library/stdarch/asm/x86_bmi_andn.asm | 20 ++++----- library/stdarch/asm/x86_bmi_andn.rs | 1 - library/stdarch/asm/x86_bmi_bextr.asm | 56 ++++++++++++------------- library/stdarch/asm/x86_bmi_blsi.asm | 20 ++++----- library/stdarch/asm/x86_bmi_blsr.asm | 20 ++++----- library/stdarch/asm/x86_bmi_tzcnt.asm | 20 ++++----- library/stdarch/asm/x86_lzcnt_lzcnt.asm | 20 ++++----- library/stdarch/check_asm.py | 4 -- library/stdarch/src/x86/abm.rs | 2 - library/stdarch/src/x86/bmi2.rs | 5 --- 14 files changed, 113 insertions(+), 125 deletions(-) diff --git a/library/stdarch/asm/x86_bmi2_bzhi.asm b/library/stdarch/asm/x86_bmi2_bzhi.asm index e9df7c0eeefe..f5e6006f2e28 100644 --- a/library/stdarch/asm/x86_bmi2_bzhi.asm +++ b/library/stdarch/asm/x86_bmi2_bzhi.asm @@ -1,12 +1,12 @@ _bzhi_u32: - pushq %rbp - movq %rsp, %rbp - bzhil %esi, %edi, %eax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + bzhil %esi, %edi, %eax + popq %rbp + retq _bzhi_u64: - pushq %rbp - movq %rsp, %rbp - bzhiq %rsi, %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + bzhiq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi2_mulx.asm b/library/stdarch/asm/x86_bmi2_mulx.asm index 842e4ebbe2d0..e884a07c464c 100644 --- a/library/stdarch/asm/x86_bmi2_mulx.asm +++ b/library/stdarch/asm/x86_bmi2_mulx.asm @@ -1,17 +1,17 @@ _umulx_u32: - pushq %rbp - movq %rsp, %rbp - movl %edi, %ecx - movl %esi, %eax - imulq %rcx, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + movl %edi, %ecx + movl %esi, %eax + imulq %rcx, %rax + popq %rbp + retq _umulx_u64: - pushq %rbp - movq %rsp, %rbp - mulxq %rsi, %rcx, %rax - movq %rcx, (%rdi) - movq %rax, 8(%rdi) - movq %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + mulxq %rsi, %rcx, %rax + movq %rcx, (%rdi) + movq %rax, 8(%rdi) + movq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi2_pdep.asm b/library/stdarch/asm/x86_bmi2_pdep.asm index 3011438fa23f..157e07a2c87b 100644 --- a/library/stdarch/asm/x86_bmi2_pdep.asm +++ b/library/stdarch/asm/x86_bmi2_pdep.asm @@ -5,8 +5,8 @@ _pdep_u32: popq %rbp retq _pdep_u64: - pushq %rbp - movq %rsp, %rbp - pdepq %rsi, %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + pdepq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi2_pext.asm b/library/stdarch/asm/x86_bmi2_pext.asm index 5df3657e4f22..76014780e211 100644 --- a/library/stdarch/asm/x86_bmi2_pext.asm +++ b/library/stdarch/asm/x86_bmi2_pext.asm @@ -5,8 +5,8 @@ _pext_u32: popq %rbp retq _pext_u64: - pushq %rbp - movq %rsp, %rbp - pextq %rsi, %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + pextq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_andn.asm b/library/stdarch/asm/x86_bmi_andn.asm index 861b2926a88e..9751ee469c62 100644 --- a/library/stdarch/asm/x86_bmi_andn.asm +++ b/library/stdarch/asm/x86_bmi_andn.asm @@ -1,12 +1,12 @@ _andn_u32: - pushq %rbp - movq %rsp, %rbp - andnl %esi, %edi, %eax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + andnl %esi, %edi, %eax + popq %rbp + retq _andn_u64: - pushq %rbp - movq %rsp, %rbp - andnq %rsi, %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + andnq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_andn.rs b/library/stdarch/asm/x86_bmi_andn.rs index e63529c42e60..2770cb930493 100644 --- a/library/stdarch/asm/x86_bmi_andn.rs +++ b/library/stdarch/asm/x86_bmi_andn.rs @@ -9,4 +9,3 @@ pub fn andn_u32(x: u32, y: u32) -> u32 { pub fn andn_u64(x: u64, y: u64) -> u64 { stdsimd::vendor::_andn_u64(x, y) } - diff --git a/library/stdarch/asm/x86_bmi_bextr.asm b/library/stdarch/asm/x86_bmi_bextr.asm index 4eda97fa5ce5..0dd3c950d5ee 100644 --- a/library/stdarch/asm/x86_bmi_bextr.asm +++ b/library/stdarch/asm/x86_bmi_bextr.asm @@ -1,32 +1,32 @@ _bextr_u32: - pushq %rbp - movq %rsp, %rbp - movzbl %sil, %eax - shll $8, %edx - movzwl %dx, %ecx - orl %eax, %ecx - bextrl %ecx, %edi, %eax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + movzbl %sil, %eax + shll $8, %edx + movzwl %dx, %ecx + orl %eax, %ecx + bextrl %ecx, %edi, %eax + popq %rbp + retq _bextr_u64: - pushq %rbp - movq %rsp, %rbp - movzbl %sil, %eax - shlq $8, %rdx - movzwl %dx, %ecx - orq %rax, %rcx - bextrq %rcx, %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + movzbl %sil, %eax + shlq $8, %rdx + movzwl %dx, %ecx + orq %rax, %rcx + bextrq %rcx, %rdi, %rax + popq %rbp + retq _bextr2_u32: - pushq %rbp - movq %rsp, %rbp - bextrl %esi, %edi, %eax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + bextrl %esi, %edi, %eax + popq %rbp + retq _bextr2_u64: - pushq %rbp - movq %rsp, %rbp - bextrq %rsi, %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + bextrq %rsi, %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_blsi.asm b/library/stdarch/asm/x86_bmi_blsi.asm index 8ab833541188..a2f6231f9c40 100644 --- a/library/stdarch/asm/x86_bmi_blsi.asm +++ b/library/stdarch/asm/x86_bmi_blsi.asm @@ -1,12 +1,12 @@ _blsi_u32: - pushq %rbp - movq %rsp, %rbp - blsil %edi, %eax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + blsil %edi, %eax + popq %rbp + retq _blsi_u64: - pushq %rbp - movq %rsp, %rbp - blsiq %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + blsiq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_blsr.asm b/library/stdarch/asm/x86_bmi_blsr.asm index 1918c5b4d207..8ace6bc19556 100644 --- a/library/stdarch/asm/x86_bmi_blsr.asm +++ b/library/stdarch/asm/x86_bmi_blsr.asm @@ -1,12 +1,12 @@ _blsr_u32: - pushq %rbp - movq %rsp, %rbp - blsrl %edi, %eax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + blsrl %edi, %eax + popq %rbp + retq _blsr_u64: - pushq %rbp - movq %rsp, %rbp - blsrq %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + blsrq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_bmi_tzcnt.asm b/library/stdarch/asm/x86_bmi_tzcnt.asm index 422e7ea9b33c..2412cc177f21 100644 --- a/library/stdarch/asm/x86_bmi_tzcnt.asm +++ b/library/stdarch/asm/x86_bmi_tzcnt.asm @@ -1,12 +1,12 @@ _tzcnt_u32: - pushq %rbp - movq %rsp, %rbp - tzcntl %edi, %eax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + tzcntl %edi, %eax + popq %rbp + retq _tzcnt_u64: - pushq %rbp - movq %rsp, %rbp - tzcntq %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + tzcntq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_lzcnt_lzcnt.asm b/library/stdarch/asm/x86_lzcnt_lzcnt.asm index 891fb4ee8f5e..920644f31308 100644 --- a/library/stdarch/asm/x86_lzcnt_lzcnt.asm +++ b/library/stdarch/asm/x86_lzcnt_lzcnt.asm @@ -1,12 +1,12 @@ _lzcnt_u32: - pushq %rbp - movq %rsp, %rbp - lzcntl %edi, %eax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + lzcntl %edi, %eax + popq %rbp + retq _lzcnt_u64: - pushq %rbp - movq %rsp, %rbp - lzcntq %rdi, %rax - popq %rbp - retq + pushq %rbp + movq %rsp, %rbp + lzcntq %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/check_asm.py b/library/stdarch/check_asm.py index 80529a0b3d6d..1959ed279100 100755 --- a/library/stdarch/check_asm.py +++ b/library/stdarch/check_asm.py @@ -16,7 +16,6 @@ def arm_triplet(arch) : 'armv8' : 'aarch64-unknown-linux-gnu' } return triples[arch] - class File(object): def __init__(self, path_rs): self.path_rs = path_rs @@ -51,7 +50,6 @@ def call(args): print >>sys.stdout, lines print >>sys.stderr, "ERROR: %s" % error - def compile_file(file): if verbose: print "Checking: " + str(file) + "..." @@ -73,8 +71,6 @@ def compile_file(file): rustc_args_ll = rustc_args + ' --emit llvm-ir {} -o {}'.format(file.path_rs, file.path_llvmir_output) call(rustc_args_ll) - - if verbose: print "...done!" diff --git a/library/stdarch/src/x86/abm.rs b/library/stdarch/src/x86/abm.rs index 0ea94e75a9c4..cefd973dd9e1 100644 --- a/library/stdarch/src/x86/abm.rs +++ b/library/stdarch/src/x86/abm.rs @@ -67,5 +67,3 @@ mod tests { assert_eq!(abm::_popcnt64(0b0101_1010u64), 4); } } - - diff --git a/library/stdarch/src/x86/bmi2.rs b/library/stdarch/src/x86/bmi2.rs index 86dc56abc71e..64b778e0bff4 100644 --- a/library/stdarch/src/x86/bmi2.rs +++ b/library/stdarch/src/x86/bmi2.rs @@ -95,7 +95,6 @@ pub fn _pext_u64(a: u64, mask: u64) -> u64 { unsafe { x86_bmi2_pext_64(a, mask) } } - #[cfg(all(test, target_feature = "bmi2", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { use x86::bmi2; @@ -145,7 +144,6 @@ mod tests { assert_eq!(bmi2::_pdep_u32(n, m1), s1); } - #[test] #[target_feature = "+bmi2"] fn _pdep_u64() { @@ -161,7 +159,6 @@ mod tests { assert_eq!(bmi2::_pdep_u64(n, m1), s1); } - #[test] #[target_feature = "+bmi2"] fn _bzhi_u32() { @@ -178,7 +175,6 @@ mod tests { assert_eq!(bmi2::_bzhi_u64(n, 5), s); } - #[test] #[target_feature = "+bmi2"] fn _mulx_u32() { @@ -204,5 +200,4 @@ mod tests { assert_eq!(lo, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64); assert_eq!(hi, 0b00110001u64); } - } From 08a890a0a0f11635d8cc8671c786360af583887f Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 21:51:59 +0200 Subject: [PATCH 13/25] [tbm] check generated code --- library/stdarch/asm/x86_tbm_blcfill.asm | 12 ++++++++++++ library/stdarch/asm/x86_tbm_blcfill.rs | 11 +++++++++++ library/stdarch/asm/x86_tbm_blci.asm | 12 ++++++++++++ library/stdarch/asm/x86_tbm_blci.rs | 11 +++++++++++ library/stdarch/asm/x86_tbm_blcic.asm | 12 ++++++++++++ library/stdarch/asm/x86_tbm_blcic.rs | 11 +++++++++++ library/stdarch/asm/x86_tbm_blcmsk.asm | 12 ++++++++++++ library/stdarch/asm/x86_tbm_blcmsk.rs | 11 +++++++++++ library/stdarch/asm/x86_tbm_blcs.asm | 12 ++++++++++++ library/stdarch/asm/x86_tbm_blcs.rs | 11 +++++++++++ library/stdarch/asm/x86_tbm_blsfill.asm | 12 ++++++++++++ library/stdarch/asm/x86_tbm_blsfill.rs | 11 +++++++++++ library/stdarch/asm/x86_tbm_blsic.asm | 12 ++++++++++++ library/stdarch/asm/x86_tbm_blsic.rs | 11 +++++++++++ library/stdarch/asm/x86_tbm_t1mskc.asm | 12 ++++++++++++ library/stdarch/asm/x86_tbm_t1mskc.rs | 11 +++++++++++ library/stdarch/asm/x86_tbm_tzmsk.asm | 12 ++++++++++++ library/stdarch/asm/x86_tbm_tzmsk.rs | 11 +++++++++++ 18 files changed, 207 insertions(+) create mode 100644 library/stdarch/asm/x86_tbm_blcfill.asm create mode 100644 library/stdarch/asm/x86_tbm_blcfill.rs create mode 100644 library/stdarch/asm/x86_tbm_blci.asm create mode 100644 library/stdarch/asm/x86_tbm_blci.rs create mode 100644 library/stdarch/asm/x86_tbm_blcic.asm create mode 100644 library/stdarch/asm/x86_tbm_blcic.rs create mode 100644 library/stdarch/asm/x86_tbm_blcmsk.asm create mode 100644 library/stdarch/asm/x86_tbm_blcmsk.rs create mode 100644 library/stdarch/asm/x86_tbm_blcs.asm create mode 100644 library/stdarch/asm/x86_tbm_blcs.rs create mode 100644 library/stdarch/asm/x86_tbm_blsfill.asm create mode 100644 library/stdarch/asm/x86_tbm_blsfill.rs create mode 100644 library/stdarch/asm/x86_tbm_blsic.asm create mode 100644 library/stdarch/asm/x86_tbm_blsic.rs create mode 100644 library/stdarch/asm/x86_tbm_t1mskc.asm create mode 100644 library/stdarch/asm/x86_tbm_t1mskc.rs create mode 100644 library/stdarch/asm/x86_tbm_tzmsk.asm create mode 100644 library/stdarch/asm/x86_tbm_tzmsk.rs diff --git a/library/stdarch/asm/x86_tbm_blcfill.asm b/library/stdarch/asm/x86_tbm_blcfill.asm new file mode 100644 index 000000000000..a7214198007b --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blcfill.asm @@ -0,0 +1,12 @@ +_blcfill_u32: + pushq %rbp + movq %rsp, %rbp + blcfill %edi, %eax + popq %rbp + retq +_blcfill_u64: + pushq %rbp + movq %rsp, %rbp + blcfill %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_tbm_blcfill.rs b/library/stdarch/asm/x86_tbm_blcfill.rs new file mode 100644 index 000000000000..9712449e1483 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blcfill.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn blcfill_u32(x: u32) -> u32 { + stdsimd::vendor::_blcfill_u32(x) +} + +#[no_mangle] +pub fn blcfill_u64(x: u64) -> u64 { + stdsimd::vendor::_blcfill_u64(x) +} diff --git a/library/stdarch/asm/x86_tbm_blci.asm b/library/stdarch/asm/x86_tbm_blci.asm new file mode 100644 index 000000000000..c7a8708b6235 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blci.asm @@ -0,0 +1,12 @@ +_blci_u32: + pushq %rbp + movq %rsp, %rbp + blci %edi, %eax + popq %rbp + retq +_blci_u64: + pushq %rbp + movq %rsp, %rbp + blci %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_tbm_blci.rs b/library/stdarch/asm/x86_tbm_blci.rs new file mode 100644 index 000000000000..6cc306ed065a --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blci.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn blci_u32(x: u32) -> u32 { + stdsimd::vendor::_blci_u32(x) +} + +#[no_mangle] +pub fn blci_u64(x: u64) -> u64 { + stdsimd::vendor::_blci_u64(x) +} diff --git a/library/stdarch/asm/x86_tbm_blcic.asm b/library/stdarch/asm/x86_tbm_blcic.asm new file mode 100644 index 000000000000..1c6796f1d18e --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blcic.asm @@ -0,0 +1,12 @@ +_blcic_u32: + pushq %rbp + movq %rsp, %rbp + blcic %edi, %eax + popq %rbp + retq +_blcic_u64: + pushq %rbp + movq %rsp, %rbp + blcic %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_tbm_blcic.rs b/library/stdarch/asm/x86_tbm_blcic.rs new file mode 100644 index 000000000000..390d131d6cb3 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blcic.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn blcic_u32(x: u32) -> u32 { + stdsimd::vendor::_blcic_u32(x) +} + +#[no_mangle] +pub fn blcic_u64(x: u64) -> u64 { + stdsimd::vendor::_blcic_u64(x) +} diff --git a/library/stdarch/asm/x86_tbm_blcmsk.asm b/library/stdarch/asm/x86_tbm_blcmsk.asm new file mode 100644 index 000000000000..360aff904c85 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blcmsk.asm @@ -0,0 +1,12 @@ +_blcmsk_u32: + pushq %rbp + movq %rsp, %rbp + blcmsk %edi, %eax + popq %rbp + retq +_blcmsk_u64: + pushq %rbp + movq %rsp, %rbp + blcmsk %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_tbm_blcmsk.rs b/library/stdarch/asm/x86_tbm_blcmsk.rs new file mode 100644 index 000000000000..7174b778deac --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blcmsk.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn blcmsk_u32(x: u32) -> u32 { + stdsimd::vendor::_blcmsk_u32(x) +} + +#[no_mangle] +pub fn blcmsk_u64(x: u64) -> u64 { + stdsimd::vendor::_blcmsk_u64(x) +} diff --git a/library/stdarch/asm/x86_tbm_blcs.asm b/library/stdarch/asm/x86_tbm_blcs.asm new file mode 100644 index 000000000000..6a524b162d00 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blcs.asm @@ -0,0 +1,12 @@ +_blcs_u32: + pushq %rbp + movq %rsp, %rbp + blcs %edi, %eax + popq %rbp + retq +_blcs_u64: + pushq %rbp + movq %rsp, %rbp + blcs %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_tbm_blcs.rs b/library/stdarch/asm/x86_tbm_blcs.rs new file mode 100644 index 000000000000..9c8d51ab7f19 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blcs.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn blcs_u32(x: u32) -> u32 { + stdsimd::vendor::_blcs_u32(x) +} + +#[no_mangle] +pub fn blcs_u64(x: u64) -> u64 { + stdsimd::vendor::_blcs_u64(x) +} diff --git a/library/stdarch/asm/x86_tbm_blsfill.asm b/library/stdarch/asm/x86_tbm_blsfill.asm new file mode 100644 index 000000000000..aa756feec33e --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blsfill.asm @@ -0,0 +1,12 @@ +_blsfill_u32: + pushq %rbp + movq %rsp, %rbp + blsfill %edi, %eax + popq %rbp + retq +_blsfill_u64: + pushq %rbp + movq %rsp, %rbp + blsfill %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_tbm_blsfill.rs b/library/stdarch/asm/x86_tbm_blsfill.rs new file mode 100644 index 000000000000..f794dc63bf87 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blsfill.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn blsfill_u32(x: u32) -> u32 { + stdsimd::vendor::_blsfill_u32(x) +} + +#[no_mangle] +pub fn blsfill_u64(x: u64) -> u64 { + stdsimd::vendor::_blsfill_u64(x) +} diff --git a/library/stdarch/asm/x86_tbm_blsic.asm b/library/stdarch/asm/x86_tbm_blsic.asm new file mode 100644 index 000000000000..d400398283a2 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blsic.asm @@ -0,0 +1,12 @@ +_blsic_u32: + pushq %rbp + movq %rsp, %rbp + blsic %edi, %eax + popq %rbp + retq +_blsic_u64: + pushq %rbp + movq %rsp, %rbp + blsic %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_tbm_blsic.rs b/library/stdarch/asm/x86_tbm_blsic.rs new file mode 100644 index 000000000000..d79f1937d183 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_blsic.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn blsic_u32(x: u32) -> u32 { + stdsimd::vendor::_blsic_u32(x) +} + +#[no_mangle] +pub fn blsic_u64(x: u64) -> u64 { + stdsimd::vendor::_blsic_u64(x) +} diff --git a/library/stdarch/asm/x86_tbm_t1mskc.asm b/library/stdarch/asm/x86_tbm_t1mskc.asm new file mode 100644 index 000000000000..414a463a7230 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_t1mskc.asm @@ -0,0 +1,12 @@ +_t1mskc_u32: + pushq %rbp + movq %rsp, %rbp + t1mskc %edi, %eax + popq %rbp + retq +_t1mskc_u64: + pushq %rbp + movq %rsp, %rbp + t1mskc %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_tbm_t1mskc.rs b/library/stdarch/asm/x86_tbm_t1mskc.rs new file mode 100644 index 000000000000..e1fe51565e68 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_t1mskc.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn t1mskc_u32(x: u32) -> u32 { + stdsimd::vendor::_t1mskc_u32(x) +} + +#[no_mangle] +pub fn t1mskc_u64(x: u64) -> u64 { + stdsimd::vendor::_t1mskc_u64(x) +} diff --git a/library/stdarch/asm/x86_tbm_tzmsk.asm b/library/stdarch/asm/x86_tbm_tzmsk.asm new file mode 100644 index 000000000000..fa471844b749 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_tzmsk.asm @@ -0,0 +1,12 @@ +_tzmsk_u32: + pushq %rbp + movq %rsp, %rbp + tzmsk %edi, %eax + popq %rbp + retq +_tzmsk_u64: + pushq %rbp + movq %rsp, %rbp + tzmsk %rdi, %rax + popq %rbp + retq diff --git a/library/stdarch/asm/x86_tbm_tzmsk.rs b/library/stdarch/asm/x86_tbm_tzmsk.rs new file mode 100644 index 000000000000..7f8eb4a1b2a7 --- /dev/null +++ b/library/stdarch/asm/x86_tbm_tzmsk.rs @@ -0,0 +1,11 @@ +extern crate stdsimd; + +#[no_mangle] +pub fn tzmsk_u32(x: u32) -> u32 { + stdsimd::vendor::_tzmsk_u32(x) +} + +#[no_mangle] +pub fn tzmsk_u64(x: u64) -> u64 { + stdsimd::vendor::_tzmsk_u64(x) +} From e64f80e4793e55f65ecb7ce2bb9e0fc2cecd8d54 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 19 Sep 2017 22:15:17 +0200 Subject: [PATCH 14/25] [abm] use lzcnt and popcnt features --- library/stdarch/src/x86/abm.rs | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/library/stdarch/src/x86/abm.rs b/library/stdarch/src/x86/abm.rs index cefd973dd9e1..7479bf2f17ef 100644 --- a/library/stdarch/src/x86/abm.rs +++ b/library/stdarch/src/x86/abm.rs @@ -1,10 +1,6 @@ //! Advanced Bit Manipulation (ABM) instructions //! -//! That is, POPCNT and LZCNT. These instructions have their own CPUID bits to -//! indicate support. -//! -//! TODO: it is unclear which target feature to use here. SSE4.2 should be good -//! enough but we might need to use BMI for LZCNT if there are any problems. +//! The POPCNT and LZCNT have their own CPUID bits to indicate support. //! //! The references are: //! @@ -19,50 +15,50 @@ /// /// When the operand is zero, it returns its size in bits. #[inline(always)] -#[target_feature = "+sse4.2"] +#[target_feature = "+lzcnt"] pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() } /// Counts the leading most significant zero bits. /// /// When the operand is zero, it returns its size in bits. #[inline(always)] -#[target_feature = "+sse4.2"] +#[target_feature = "+lzcnt"] pub fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 } /// Counts the bits that are set. #[inline(always)] -#[target_feature = "+sse4.2"] +#[target_feature = "+popcnt"] pub fn _popcnt32(x: u32) -> u32 { x.count_ones() } /// Counts the bits that are set. #[inline(always)] -#[target_feature = "+sse4.2"] +#[target_feature = "+popcnt"] pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 } -#[cfg(all(test, target_feature = "sse4.2", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))] mod tests { use x86::abm; #[test] - #[target_feature = "+sse4.2"] + #[target_feature = "+lzcnt"] fn _lzcnt_u32() { assert_eq!(abm::_lzcnt_u32(0b0101_1010u32), 25u32); } #[test] - #[target_feature = "+sse4.2"] + #[target_feature = "+lzcnt"] fn _lzcnt_u64() { assert_eq!(abm::_lzcnt_u64(0b0101_1010u64), 57u64); } #[test] - #[target_feature = "+sse4.2"] + #[target_feature = "+popcnt"] fn _popcnt32() { assert_eq!(abm::_popcnt32(0b0101_1010u32), 4); } #[test] - #[target_feature = "+sse4.2"] + #[target_feature = "+popcnt"] fn _popcnt64() { assert_eq!(abm::_popcnt64(0b0101_1010u64), 4); } From 9b96985000f6b88c1ecb0c435afc7fc45267b952 Mon Sep 17 00:00:00 2001 From: Mohan Rajendran Date: Wed, 20 Sep 2017 05:50:06 -0500 Subject: [PATCH 15/25] Added _mm_unpackhi_ps function (#16) Added _mm_unpackhi_ps --- library/stdarch/.vscode/temp.sql | 0 library/stdarch/TODO.md | 2 +- library/stdarch/src/x86/sse.rs | 18 ++++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 library/stdarch/.vscode/temp.sql diff --git a/library/stdarch/.vscode/temp.sql b/library/stdarch/.vscode/temp.sql new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/library/stdarch/TODO.md b/library/stdarch/TODO.md index 12832620bf33..6b69e250da84 100644 --- a/library/stdarch/TODO.md +++ b/library/stdarch/TODO.md @@ -155,7 +155,7 @@ sse * [ ] `_mm_storer_ps` * [ ] `_mm_move_ss` * [ ] `_mm_shuffle_ps` -* [ ] `_mm_unpackhi_ps` +* [x] `_mm_unpackhi_ps` * [ ] `_mm_unpacklo_ps` * [ ] `_mm_movehl_ps` * [ ] `_mm_movelh_ps` diff --git a/library/stdarch/src/x86/sse.rs b/library/stdarch/src/x86/sse.rs index d1af0e1a9b61..e1706d107d81 100644 --- a/library/stdarch/src/x86/sse.rs +++ b/library/stdarch/src/x86/sse.rs @@ -1,3 +1,4 @@ +use simd_llvm::simd_shuffle4; use v128::*; /// Return the square root of packed single-precision (32-bit) floating-point @@ -40,6 +41,14 @@ pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 { unsafe { maxps(a, b) } } +/// Unpack and interleave single-precision (32-bit) floating-point elements +/// from the high half of `a` and `b`; +#[inline(always)] +#[target_feature = "+sse"] +pub fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 { + unsafe { simd_shuffle4(a, b, [2, 6, 3, 7]) } +} + /// Return a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 4 least significant bits of the return value. @@ -116,6 +125,15 @@ mod tests { assert_eq!(r, f32x4::new(-1.0, 20.0, 0.0, -5.0)); } + #[test] + #[target_feature = "+sse"] + fn _mm_unpackhi_ps() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let b = f32x4::new(5.0, 6.0, 7.0, 8.0); + let r = sse::_mm_unpackhi_ps(a, b); + assert_eq!(r, f32x4::new(3.0, 7.0, 4.0, 8.0)); + } + #[test] #[target_feature = "+sse"] fn _mm_movemask_ps() { From fe5d8f939f9b81b4042eac440f63af53447c121f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Kocsis?= Date: Tue, 19 Sep 2017 20:50:15 +0200 Subject: [PATCH 16/25] SSE4.1 dot product instructions --- library/stdarch/src/x86/macros.rs | 263 +++++++++++++++++++++++++++++ library/stdarch/src/x86/mod.rs | 2 + library/stdarch/src/x86/sse41.rs | 59 +++++++ library/stdarch/src/x86/sse42.rs | 264 ------------------------------ 4 files changed, 324 insertions(+), 264 deletions(-) create mode 100644 library/stdarch/src/x86/macros.rs diff --git a/library/stdarch/src/x86/macros.rs b/library/stdarch/src/x86/macros.rs new file mode 100644 index 000000000000..ebe101518143 --- /dev/null +++ b/library/stdarch/src/x86/macros.rs @@ -0,0 +1,263 @@ +macro_rules! constify_imm8 { + ($imm8:expr, $expand:ident) => { + #[allow(overflowing_literals)] + match $imm8 & 0b1111_1111 { + 0 => $expand!(0), + 1 => $expand!(1), + 2 => $expand!(2), + 3 => $expand!(3), + 4 => $expand!(4), + 5 => $expand!(5), + 6 => $expand!(6), + 7 => $expand!(7), + 8 => $expand!(8), + 9 => $expand!(9), + 10 => $expand!(10), + 11 => $expand!(11), + 12 => $expand!(12), + 13 => $expand!(13), + 14 => $expand!(14), + 15 => $expand!(15), + 16 => $expand!(16), + 17 => $expand!(17), + 18 => $expand!(18), + 19 => $expand!(19), + 20 => $expand!(20), + 21 => $expand!(21), + 22 => $expand!(22), + 23 => $expand!(23), + 24 => $expand!(24), + 25 => $expand!(25), + 26 => $expand!(26), + 27 => $expand!(27), + 28 => $expand!(28), + 29 => $expand!(29), + 30 => $expand!(30), + 31 => $expand!(31), + 32 => $expand!(32), + 33 => $expand!(33), + 34 => $expand!(34), + 35 => $expand!(35), + 36 => $expand!(36), + 37 => $expand!(37), + 38 => $expand!(38), + 39 => $expand!(39), + 40 => $expand!(40), + 41 => $expand!(41), + 42 => $expand!(42), + 43 => $expand!(43), + 44 => $expand!(44), + 45 => $expand!(45), + 46 => $expand!(46), + 47 => $expand!(47), + 48 => $expand!(48), + 49 => $expand!(49), + 50 => $expand!(50), + 51 => $expand!(51), + 52 => $expand!(52), + 53 => $expand!(53), + 54 => $expand!(54), + 55 => $expand!(55), + 56 => $expand!(56), + 57 => $expand!(57), + 58 => $expand!(58), + 59 => $expand!(59), + 60 => $expand!(60), + 61 => $expand!(61), + 62 => $expand!(62), + 63 => $expand!(63), + 64 => $expand!(64), + 65 => $expand!(65), + 66 => $expand!(66), + 67 => $expand!(67), + 68 => $expand!(68), + 69 => $expand!(69), + 70 => $expand!(70), + 71 => $expand!(71), + 72 => $expand!(72), + 73 => $expand!(73), + 74 => $expand!(74), + 75 => $expand!(75), + 76 => $expand!(76), + 77 => $expand!(77), + 78 => $expand!(78), + 79 => $expand!(79), + 80 => $expand!(80), + 81 => $expand!(81), + 82 => $expand!(82), + 83 => $expand!(83), + 84 => $expand!(84), + 85 => $expand!(85), + 86 => $expand!(86), + 87 => $expand!(87), + 88 => $expand!(88), + 89 => $expand!(89), + 90 => $expand!(90), + 91 => $expand!(91), + 92 => $expand!(92), + 93 => $expand!(93), + 94 => $expand!(94), + 95 => $expand!(95), + 96 => $expand!(96), + 97 => $expand!(97), + 98 => $expand!(98), + 99 => $expand!(99), + 100 => $expand!(100), + 101 => $expand!(101), + 102 => $expand!(102), + 103 => $expand!(103), + 104 => $expand!(104), + 105 => $expand!(105), + 106 => $expand!(106), + 107 => $expand!(107), + 108 => $expand!(108), + 109 => $expand!(109), + 110 => $expand!(110), + 111 => $expand!(111), + 112 => $expand!(112), + 113 => $expand!(113), + 114 => $expand!(114), + 115 => $expand!(115), + 116 => $expand!(116), + 117 => $expand!(117), + 118 => $expand!(118), + 119 => $expand!(119), + 120 => $expand!(120), + 121 => $expand!(121), + 122 => $expand!(122), + 123 => $expand!(123), + 124 => $expand!(124), + 125 => $expand!(125), + 126 => $expand!(126), + 127 => $expand!(127), + 128 => $expand!(128), + 129 => $expand!(129), + 130 => $expand!(130), + 131 => $expand!(131), + 132 => $expand!(132), + 133 => $expand!(133), + 134 => $expand!(134), + 135 => $expand!(135), + 136 => $expand!(136), + 137 => $expand!(137), + 138 => $expand!(138), + 139 => $expand!(139), + 140 => $expand!(140), + 141 => $expand!(141), + 142 => $expand!(142), + 143 => $expand!(143), + 144 => $expand!(144), + 145 => $expand!(145), + 146 => $expand!(146), + 147 => $expand!(147), + 148 => $expand!(148), + 149 => $expand!(149), + 150 => $expand!(150), + 151 => $expand!(151), + 152 => $expand!(152), + 153 => $expand!(153), + 154 => $expand!(154), + 155 => $expand!(155), + 156 => $expand!(156), + 157 => $expand!(157), + 158 => $expand!(158), + 159 => $expand!(159), + 160 => $expand!(160), + 161 => $expand!(161), + 162 => $expand!(162), + 163 => $expand!(163), + 164 => $expand!(164), + 165 => $expand!(165), + 166 => $expand!(166), + 167 => $expand!(167), + 168 => $expand!(168), + 169 => $expand!(169), + 170 => $expand!(170), + 171 => $expand!(171), + 172 => $expand!(172), + 173 => $expand!(173), + 174 => $expand!(174), + 175 => $expand!(175), + 176 => $expand!(176), + 177 => $expand!(177), + 178 => $expand!(178), + 179 => $expand!(179), + 180 => $expand!(180), + 181 => $expand!(181), + 182 => $expand!(182), + 183 => $expand!(183), + 184 => $expand!(184), + 185 => $expand!(185), + 186 => $expand!(186), + 187 => $expand!(187), + 188 => $expand!(188), + 189 => $expand!(189), + 190 => $expand!(190), + 191 => $expand!(191), + 192 => $expand!(192), + 193 => $expand!(193), + 194 => $expand!(194), + 195 => $expand!(195), + 196 => $expand!(196), + 197 => $expand!(197), + 198 => $expand!(198), + 199 => $expand!(199), + 200 => $expand!(200), + 201 => $expand!(201), + 202 => $expand!(202), + 203 => $expand!(203), + 204 => $expand!(204), + 205 => $expand!(205), + 206 => $expand!(206), + 207 => $expand!(207), + 208 => $expand!(208), + 209 => $expand!(209), + 210 => $expand!(210), + 211 => $expand!(211), + 212 => $expand!(212), + 213 => $expand!(213), + 214 => $expand!(214), + 215 => $expand!(215), + 216 => $expand!(216), + 217 => $expand!(217), + 218 => $expand!(218), + 219 => $expand!(219), + 220 => $expand!(220), + 221 => $expand!(221), + 222 => $expand!(222), + 223 => $expand!(223), + 224 => $expand!(224), + 225 => $expand!(225), + 226 => $expand!(226), + 227 => $expand!(227), + 228 => $expand!(228), + 229 => $expand!(229), + 230 => $expand!(230), + 231 => $expand!(231), + 232 => $expand!(232), + 233 => $expand!(233), + 234 => $expand!(234), + 235 => $expand!(235), + 236 => $expand!(236), + 237 => $expand!(237), + 238 => $expand!(238), + 239 => $expand!(239), + 240 => $expand!(240), + 241 => $expand!(241), + 242 => $expand!(242), + 243 => $expand!(243), + 244 => $expand!(244), + 245 => $expand!(245), + 246 => $expand!(246), + 247 => $expand!(247), + 248 => $expand!(248), + 249 => $expand!(249), + 250 => $expand!(250), + 251 => $expand!(251), + 252 => $expand!(252), + 253 => $expand!(253), + 254 => $expand!(254), + _ => $expand!(255), + } + } +} diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs index 2cde200c086d..c840ffff25ac 100644 --- a/library/stdarch/src/x86/mod.rs +++ b/library/stdarch/src/x86/mod.rs @@ -16,6 +16,8 @@ pub type __m128i = ::v128::i8x16; #[allow(non_camel_case_types)] pub type __m256i = ::v256::i8x32; +#[macro_use] +mod macros; mod sse; mod sse2; mod ssse3; diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs index b7dc45263e39..955026e2b4d0 100644 --- a/library/stdarch/src/x86/sse41.rs +++ b/library/stdarch/src/x86/sse41.rs @@ -1,3 +1,4 @@ +use v128::*; use x86::__m128i; #[inline(always)] @@ -10,10 +11,50 @@ pub fn _mm_blendv_epi8( unsafe { pblendvb(a, b, mask) } } +/// Returns the dot product of two f64x2 vectors. +/// +/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask. +/// If a condition mask bit is zero, the corresponding multiplication is +/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of +/// the dot product will be stored in the return value component. Otherwise if +/// the broadcast mask bit is zero then the return component will be zero. +#[inline(always)] +#[target_feature = "+sse4.1"] +pub fn _mm_dp_pd(a: f64x2, b: f64x2, imm8: u8) -> f64x2 { + macro_rules! call { + ($imm8:expr) => { + unsafe { dppd(a, b, $imm8) } + } + } + constify_imm8!(imm8, call) +} + +/// Returns the dot product of two f32x4 vectors. +/// +/// `imm8[3:0]` is the broadcast mask, and `imm8[7:4]` is the condition mask. +/// If a condition mask bit is zero, the corresponding multiplication is +/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of +/// the dot product will be stored in the return value component. Otherwise if +/// the broadcast mask bit is zero then the return component will be zero. +#[inline(always)] +#[target_feature = "+sse4.1"] +pub fn _mm_dp_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 { + macro_rules! call { + ($imm8:expr) => { + unsafe { dpps(a, b, $imm8) } + } + } + constify_imm8!(imm8, call) +} + #[allow(improper_ctypes)] extern { #[link_name = "llvm.x86.sse41.pblendvb"] fn pblendvb(a: __m128i, b: __m128i, mask: __m128i) -> __m128i; + #[link_name = "llvm.x86.sse41.dppd"] + fn dppd(a: f64x2, b: f64x2, imm8: u8) -> f64x2; + #[link_name = "llvm.x86.sse41.dpps"] + fn dpps(a: f32x4, b: f32x4, imm8: u8) -> f32x4; } #[cfg(all(test, target_feature = "sse4.1", any(target_arch = "x86", target_arch = "x86_64")))] @@ -34,4 +75,22 @@ mod tests { 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31); assert_eq!(sse41::_mm_blendv_epi8(a, b, mask), e); } + + #[test] + #[target_feature = "+sse4.1"] + fn _mm_dp_pd() { + let a = f64x2::new(2.0, 3.0); + let b = f64x2::new(1.0, 4.0); + let e = f64x2::new(14.0, 0.0); + assert_eq!(sse41::_mm_dp_pd(a, b, 0b00110001), e); + } + + #[test] + #[target_feature = "+sse4.1"] + fn _mm_dp_ps() { + let a = f32x4::new(2.0, 3.0, 1.0, 10.0); + let b = f32x4::new(1.0, 4.0, 0.5, 10.0); + let e = f32x4::new(14.5, 0.0, 14.5, 0.0); + assert_eq!(sse41::_mm_dp_ps(a, b, 0b01110101), e); + } } diff --git a/library/stdarch/src/x86/sse42.rs b/library/stdarch/src/x86/sse42.rs index 242906119f7c..7459997f7b5c 100644 --- a/library/stdarch/src/x86/sse42.rs +++ b/library/stdarch/src/x86/sse42.rs @@ -17,270 +17,6 @@ pub const _SIDD_MASKED_NEGATIVE_POLARITY: i8 = 0b00110000; pub const _SIDD_LEAST_SIGNIFICANT: i8 = 0b00000000; pub const _SIDD_MOST_SIGNIFICANT: i8 = 0b01000000; -macro_rules! constify_imm8 { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match $imm8 & 0b1111_1111 { - 0 => $expand!(0), - 1 => $expand!(1), - 2 => $expand!(2), - 3 => $expand!(3), - 4 => $expand!(4), - 5 => $expand!(5), - 6 => $expand!(6), - 7 => $expand!(7), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - 12 => $expand!(12), - 13 => $expand!(13), - 14 => $expand!(14), - 15 => $expand!(15), - 16 => $expand!(16), - 17 => $expand!(17), - 18 => $expand!(18), - 19 => $expand!(19), - 20 => $expand!(20), - 21 => $expand!(21), - 22 => $expand!(22), - 23 => $expand!(23), - 24 => $expand!(24), - 25 => $expand!(25), - 26 => $expand!(26), - 27 => $expand!(27), - 28 => $expand!(28), - 29 => $expand!(29), - 30 => $expand!(30), - 31 => $expand!(31), - 32 => $expand!(32), - 33 => $expand!(33), - 34 => $expand!(34), - 35 => $expand!(35), - 36 => $expand!(36), - 37 => $expand!(37), - 38 => $expand!(38), - 39 => $expand!(39), - 40 => $expand!(40), - 41 => $expand!(41), - 42 => $expand!(42), - 43 => $expand!(43), - 44 => $expand!(44), - 45 => $expand!(45), - 46 => $expand!(46), - 47 => $expand!(47), - 48 => $expand!(48), - 49 => $expand!(49), - 50 => $expand!(50), - 51 => $expand!(51), - 52 => $expand!(52), - 53 => $expand!(53), - 54 => $expand!(54), - 55 => $expand!(55), - 56 => $expand!(56), - 57 => $expand!(57), - 58 => $expand!(58), - 59 => $expand!(59), - 60 => $expand!(60), - 61 => $expand!(61), - 62 => $expand!(62), - 63 => $expand!(63), - 64 => $expand!(64), - 65 => $expand!(65), - 66 => $expand!(66), - 67 => $expand!(67), - 68 => $expand!(68), - 69 => $expand!(69), - 70 => $expand!(70), - 71 => $expand!(71), - 72 => $expand!(72), - 73 => $expand!(73), - 74 => $expand!(74), - 75 => $expand!(75), - 76 => $expand!(76), - 77 => $expand!(77), - 78 => $expand!(78), - 79 => $expand!(79), - 80 => $expand!(80), - 81 => $expand!(81), - 82 => $expand!(82), - 83 => $expand!(83), - 84 => $expand!(84), - 85 => $expand!(85), - 86 => $expand!(86), - 87 => $expand!(87), - 88 => $expand!(88), - 89 => $expand!(89), - 90 => $expand!(90), - 91 => $expand!(91), - 92 => $expand!(92), - 93 => $expand!(93), - 94 => $expand!(94), - 95 => $expand!(95), - 96 => $expand!(96), - 97 => $expand!(97), - 98 => $expand!(98), - 99 => $expand!(99), - 100 => $expand!(100), - 101 => $expand!(101), - 102 => $expand!(102), - 103 => $expand!(103), - 104 => $expand!(104), - 105 => $expand!(105), - 106 => $expand!(106), - 107 => $expand!(107), - 108 => $expand!(108), - 109 => $expand!(109), - 110 => $expand!(110), - 111 => $expand!(111), - 112 => $expand!(112), - 113 => $expand!(113), - 114 => $expand!(114), - 115 => $expand!(115), - 116 => $expand!(116), - 117 => $expand!(117), - 118 => $expand!(118), - 119 => $expand!(119), - 120 => $expand!(120), - 121 => $expand!(121), - 122 => $expand!(122), - 123 => $expand!(123), - 124 => $expand!(124), - 125 => $expand!(125), - 126 => $expand!(126), - 127 => $expand!(127), - 128 => $expand!(128), - 129 => $expand!(129), - 130 => $expand!(130), - 131 => $expand!(131), - 132 => $expand!(132), - 133 => $expand!(133), - 134 => $expand!(134), - 135 => $expand!(135), - 136 => $expand!(136), - 137 => $expand!(137), - 138 => $expand!(138), - 139 => $expand!(139), - 140 => $expand!(140), - 141 => $expand!(141), - 142 => $expand!(142), - 143 => $expand!(143), - 144 => $expand!(144), - 145 => $expand!(145), - 146 => $expand!(146), - 147 => $expand!(147), - 148 => $expand!(148), - 149 => $expand!(149), - 150 => $expand!(150), - 151 => $expand!(151), - 152 => $expand!(152), - 153 => $expand!(153), - 154 => $expand!(154), - 155 => $expand!(155), - 156 => $expand!(156), - 157 => $expand!(157), - 158 => $expand!(158), - 159 => $expand!(159), - 160 => $expand!(160), - 161 => $expand!(161), - 162 => $expand!(162), - 163 => $expand!(163), - 164 => $expand!(164), - 165 => $expand!(165), - 166 => $expand!(166), - 167 => $expand!(167), - 168 => $expand!(168), - 169 => $expand!(169), - 170 => $expand!(170), - 171 => $expand!(171), - 172 => $expand!(172), - 173 => $expand!(173), - 174 => $expand!(174), - 175 => $expand!(175), - 176 => $expand!(176), - 177 => $expand!(177), - 178 => $expand!(178), - 179 => $expand!(179), - 180 => $expand!(180), - 181 => $expand!(181), - 182 => $expand!(182), - 183 => $expand!(183), - 184 => $expand!(184), - 185 => $expand!(185), - 186 => $expand!(186), - 187 => $expand!(187), - 188 => $expand!(188), - 189 => $expand!(189), - 190 => $expand!(190), - 191 => $expand!(191), - 192 => $expand!(192), - 193 => $expand!(193), - 194 => $expand!(194), - 195 => $expand!(195), - 196 => $expand!(196), - 197 => $expand!(197), - 198 => $expand!(198), - 199 => $expand!(199), - 200 => $expand!(200), - 201 => $expand!(201), - 202 => $expand!(202), - 203 => $expand!(203), - 204 => $expand!(204), - 205 => $expand!(205), - 206 => $expand!(206), - 207 => $expand!(207), - 208 => $expand!(208), - 209 => $expand!(209), - 210 => $expand!(210), - 211 => $expand!(211), - 212 => $expand!(212), - 213 => $expand!(213), - 214 => $expand!(214), - 215 => $expand!(215), - 216 => $expand!(216), - 217 => $expand!(217), - 218 => $expand!(218), - 219 => $expand!(219), - 220 => $expand!(220), - 221 => $expand!(221), - 222 => $expand!(222), - 223 => $expand!(223), - 224 => $expand!(224), - 225 => $expand!(225), - 226 => $expand!(226), - 227 => $expand!(227), - 228 => $expand!(228), - 229 => $expand!(229), - 230 => $expand!(230), - 231 => $expand!(231), - 232 => $expand!(232), - 233 => $expand!(233), - 234 => $expand!(234), - 235 => $expand!(235), - 236 => $expand!(236), - 237 => $expand!(237), - 238 => $expand!(238), - 239 => $expand!(239), - 240 => $expand!(240), - 241 => $expand!(241), - 242 => $expand!(242), - 243 => $expand!(243), - 244 => $expand!(244), - 245 => $expand!(245), - 246 => $expand!(246), - 247 => $expand!(247), - 248 => $expand!(248), - 249 => $expand!(249), - 250 => $expand!(250), - 251 => $expand!(251), - 252 => $expand!(252), - 253 => $expand!(253), - 254 => $expand!(254), - _ => $expand!(255), - } - } -} - #[inline(always)] #[target_feature = "+sse4.2"] pub fn _mm_cmpestri( From ba7f62715a84c178182b3cd22a1d7419cfd9796d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 19 Sep 2017 14:46:00 -0700 Subject: [PATCH 17/25] Add assertions correct instructions are generated This commit adds a procedural macro which can be used to test instruction generation in a lightweight way. The intention is that all functions are annotated with: #[cfg_attr(test, assert_instr(maxps))] fn foo(...) { // ... } and then during `cargo test --release` it'll assert the function `foo` does indeed generate the instruction `maxps`. This only activates tests in optimized mode to avoid debug mode inefficiencies, and it uses a literal invocation of `objdump` and some parsing to figure out what instructions are inside each function. Finally it also uses the `backtrace` crate to figure out the symbol name of the relevant function and hook that up to the output of `objdump`. I added a few assertions in the `sse` module to get some feedback, but curious what y'all think of this! --- library/stdarch/.appveyor.yml | 18 ++ library/stdarch/.travis.yml | 16 ++ library/stdarch/Cargo.toml | 7 + library/stdarch/assert-instr/Cargo.toml | 11 + .../assert-instr-macro/Cargo.toml | 7 + .../assert-instr/assert-instr-macro/build.rs | 10 + .../assert-instr-macro/src/lib.rs | 59 +++++ library/stdarch/assert-instr/src/lib.rs | 247 ++++++++++++++++++ library/stdarch/src/lib.rs | 4 + library/stdarch/src/x86/sse.rs | 9 + 10 files changed, 388 insertions(+) create mode 100644 library/stdarch/.appveyor.yml create mode 100644 library/stdarch/.travis.yml create mode 100644 library/stdarch/assert-instr/Cargo.toml create mode 100644 library/stdarch/assert-instr/assert-instr-macro/Cargo.toml create mode 100644 library/stdarch/assert-instr/assert-instr-macro/build.rs create mode 100644 library/stdarch/assert-instr/assert-instr-macro/src/lib.rs create mode 100644 library/stdarch/assert-instr/src/lib.rs diff --git a/library/stdarch/.appveyor.yml b/library/stdarch/.appveyor.yml new file mode 100644 index 000000000000..352b3bc3aa91 --- /dev/null +++ b/library/stdarch/.appveyor.yml @@ -0,0 +1,18 @@ +environment: + matrix: + - TARGET: x86_64-pc-windows-msvc + +install: + # Install rust, x86_64-pc-windows-msvc host + - appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe + - rustup-init.exe -y --default-host x86_64-pc-windows-msvc --default-toolchain nightly + - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin + - if NOT "%TARGET%" == "x86_64-pc-windows-msvc" rustup target add %TARGET% + - rustc -vV + - cargo -vV + +build: false + +test_script: + - cargo test --target %TARGET% + - cargo test --target %TARGET% --release diff --git a/library/stdarch/.travis.yml b/library/stdarch/.travis.yml new file mode 100644 index 000000000000..12638698cb44 --- /dev/null +++ b/library/stdarch/.travis.yml @@ -0,0 +1,16 @@ +language: rust +sudo: false + +matrix: + include: + - rust: nightly + - rust: nightly + os: osx + +script: + - cargo test + - cargo test --release + +notifications: + email: + on_success: never diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml index 9a6e7c95d704..0da061e71c80 100644 --- a/library/stdarch/Cargo.toml +++ b/library/stdarch/Cargo.toml @@ -13,3 +13,10 @@ license = "MIT" [profile.release] debug = true opt-level = 3 + +[profile.bench] +debug = 1 +opt-level = 3 + +[dev-dependencies] +assert-instr = { path = "assert-instr" } diff --git a/library/stdarch/assert-instr/Cargo.toml b/library/stdarch/assert-instr/Cargo.toml new file mode 100644 index 000000000000..fda3e32c7682 --- /dev/null +++ b/library/stdarch/assert-instr/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "assert-instr" +version = "0.1.0" +authors = ["Alex Crichton "] + +[dependencies] +assert-instr-macro = { path = "assert-instr-macro" } +backtrace = "0.3" +cc = "1.0" +lazy_static = "0.2" +rustc-demangle = "0.1" diff --git a/library/stdarch/assert-instr/assert-instr-macro/Cargo.toml b/library/stdarch/assert-instr/assert-instr-macro/Cargo.toml new file mode 100644 index 000000000000..367f4b5e94ba --- /dev/null +++ b/library/stdarch/assert-instr/assert-instr-macro/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "assert-instr-macro" +version = "0.1.0" +authors = ["Alex Crichton "] + +[lib] +proc-macro = true diff --git a/library/stdarch/assert-instr/assert-instr-macro/build.rs b/library/stdarch/assert-instr/assert-instr-macro/build.rs new file mode 100644 index 000000000000..dc42e265b737 --- /dev/null +++ b/library/stdarch/assert-instr/assert-instr-macro/build.rs @@ -0,0 +1,10 @@ +use std::env; + +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + let opt_level = env::var("OPT_LEVEL").ok().and_then(|s| s.parse().ok()).unwrap_or(0); + let profile = env::var("PROFILE").unwrap_or(String::new()); + if profile == "release" || opt_level >= 2 { + println!("cargo:rustc-cfg=optimized"); + } +} diff --git a/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs b/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs new file mode 100644 index 000000000000..728c6936eeda --- /dev/null +++ b/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs @@ -0,0 +1,59 @@ +#![feature(proc_macro)] + +extern crate proc_macro; + +use proc_macro::{TokenStream, Term, TokenNode, Delimiter}; + +#[proc_macro_attribute] +pub fn assert_instr(attr: TokenStream, item: TokenStream) -> TokenStream { + let name = find_name(item.clone()); + let tokens = attr.into_iter().collect::>(); + if tokens.len() != 1 { + panic!("expected #[assert_instr(foo)]"); + } + let tokens = match tokens[0].kind { + TokenNode::Group(Delimiter::Parenthesis, ref rest) => rest.clone(), + _ => panic!("expected #[assert_instr(foo)]"), + }; + let tokens = tokens.into_iter().collect::>(); + if tokens.len() != 1 { + panic!("expected #[assert_instr(foo)]"); + } + let instr = match tokens[0].kind { + TokenNode::Term(term) => term, + _ => panic!("expected #[assert_instr(foo)]"), + }; + + let ignore = if cfg!(optimized) { + "" + } else { + "#[ignore]" + }; + let test = format!(" + #[test] + #[allow(non_snake_case)] + {ignore} + fn assert_instr_{name}() {{ + ::assert_instr::assert({name} as usize, \"{instr}\"); + }} + ", name = name.as_str(), instr = instr.as_str(), ignore = ignore); + let test: TokenStream = test.parse().unwrap(); + + item.into_iter().chain(test.into_iter()).collect() +} + +fn find_name(item: TokenStream) -> Term { + let mut tokens = item.into_iter(); + while let Some(tok) = tokens.next() { + if let TokenNode::Term(word) = tok.kind { + if word.as_str() == "fn" { + break + } + } + } + + match tokens.next().map(|t| t.kind) { + Some(TokenNode::Term(word)) => word, + _ => panic!("failed to find function name"), + } +} diff --git a/library/stdarch/assert-instr/src/lib.rs b/library/stdarch/assert-instr/src/lib.rs new file mode 100644 index 000000000000..651c8606731a --- /dev/null +++ b/library/stdarch/assert-instr/src/lib.rs @@ -0,0 +1,247 @@ +#![feature(proc_macro)] + +extern crate assert_instr_macro; +extern crate backtrace; +extern crate cc; +extern crate rustc_demangle; +#[macro_use] +extern crate lazy_static; + +use std::collections::HashMap; +use std::env; +use std::process::Command; +use std::str; + +pub use assert_instr_macro::*; + +lazy_static! { + static ref DISASSEMBLY: HashMap> = disassemble_myself(); +} + +struct Function { + instrs: Vec, +} + +struct Instruction { + parts: Vec, +} + +fn disassemble_myself() -> HashMap> { + let me = env::current_exe().expect("failed to get current exe"); + + if cfg!(target_arch = "x86_64") && + cfg!(target_os = "windows") && + cfg!(target_env = "msvc") { + let mut cmd = cc::windows_registry::find("x86_64-pc-windows-msvc", "dumpbin.exe") + .expect("failed to find `dumpbin` tool"); + let output = cmd.arg("/DISASM").arg(&me).output() + .expect("failed to execute dumpbin"); + println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr)); + assert!(output.status.success()); + parse_dumpbin(&String::from_utf8_lossy(&output.stdout)) + } else if cfg!(target_os = "windows") { + panic!("disassembly unimplemented") + } else if cfg!(target_os = "macos") { + let output = Command::new("otool") + .arg("-vt") + .arg(&me) + .output() + .expect("failed to execute otool"); + println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr)); + assert!(output.status.success()); + + parse_otool(&str::from_utf8(&output.stdout).expect("stdout not utf8")) + } else { + let output = Command::new("objdump") + .arg("--disassemble") + .arg(&me) + .output() + .expect("failed to execute objdump"); + println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr)); + assert!(output.status.success()); + + parse_objdump(&str::from_utf8(&output.stdout).expect("stdout not utf8")) + } +} + +fn parse_objdump(output: &str) -> HashMap> { + let mut lines = output.lines(); + + for line in output.lines().take(100) { + println!("{}", line); + } + + let mut ret = HashMap::new(); + while let Some(header) = lines.next() { + // symbols should start with `$hex_addr <$name>:` + if !header.ends_with(">:") { + continue + } + let start = header.find("<").unwrap(); + let symbol = &header[start + 1..header.len() - 2]; + + let mut instructions = Vec::new(); + while let Some(instruction) = lines.next() { + if instruction.is_empty() { + break + } + // Each line of instructions should look like: + // + // $rel_offset: ab cd ef 00 $instruction... + let parts = instruction.split_whitespace() + .skip(1) + .skip_while(|s| { + s.len() == 2 && usize::from_str_radix(s, 16).is_ok() + }) + .map(|s| s.to_string()) + .collect::>(); + instructions.push(Instruction { parts }); + } + + ret.entry(normalize(symbol)) + .or_insert(Vec::new()) + .push(Function { instrs: instructions }); + } + + return ret +} + +fn parse_otool(output: &str) -> HashMap> { + let mut lines = output.lines(); + + for line in output.lines().take(100) { + println!("{}", line); + } + + let mut ret = HashMap::new(); + let mut cached_header = None; + loop { + let header = match cached_header.take().or_else(|| lines.next()) { + Some(header) => header, + None => break, + }; + // symbols should start with `$symbol:` + if !header.ends_with(":") { + continue + } + // strip the leading underscore and the trailing colon + let symbol = &header[1..header.len() - 1]; + + let mut instructions = Vec::new(); + while let Some(instruction) = lines.next() { + if instruction.ends_with(":") { + cached_header = Some(instruction); + break + } + // Each line of instructions should look like: + // + // $addr $instruction... + let parts = instruction.split_whitespace() + .skip(1) + .map(|s| s.to_string()) + .collect::>(); + instructions.push(Instruction { parts }); + } + + ret.entry(normalize(symbol)) + .or_insert(Vec::new()) + .push(Function { instrs: instructions }); + } + + return ret +} + +fn parse_dumpbin(output: &str) -> HashMap> { + let mut lines = output.lines(); + + for line in output.lines().take(100) { + println!("{}", line); + } + + let mut ret = HashMap::new(); + let mut cached_header = None; + loop { + let header = match cached_header.take().or_else(|| lines.next()) { + Some(header) => header, + None => break, + }; + // symbols should start with `$symbol:` + if !header.ends_with(":") { + continue + } + // strip the trailing colon + let symbol = &header[..header.len() - 1]; + + let mut instructions = Vec::new(); + while let Some(instruction) = lines.next() { + if !instruction.starts_with(" ") { + cached_header = Some(instruction); + break + } + // Each line looks like: + // + // > $addr: ab cd ef $instr.. + // > 00 12 # this line os optional + if instruction.starts_with(" ") { + continue + } + let parts = instruction.split_whitespace() + .skip(1) + .skip_while(|s| { + s.len() == 2 && usize::from_str_radix(s, 16).is_ok() + }) + .map(|s| s.to_string()) + .collect::>(); + instructions.push(Instruction { parts }); + } + + ret.entry(normalize(symbol)) + .or_insert(Vec::new()) + .push(Function { instrs: instructions }); + } + + return ret +} + +fn normalize(symbol: &str) -> String { + let symbol = rustc_demangle::demangle(symbol).to_string(); + match symbol.rfind("::h") { + Some(i) => symbol[..i].to_string(), + None => symbol.to_string(), + } +} + +pub fn assert(fnptr: usize, expected: &str) { + let mut sym = None; + backtrace::resolve(fnptr as *mut _, |name| { + sym = name.name().and_then(|s| s.as_str()).map(normalize); + }); + + let sym = match sym { + Some(s) => s, + None => panic!("failed to get symbol of function pointer: {}", fnptr), + }; + + let functions = &DISASSEMBLY.get(&sym) + .expect(&format!("failed to find disassembly of {}", sym)); + assert_eq!(functions.len(), 1); + let function = &functions[0]; + for instr in function.instrs.iter() { + if let Some(part) = instr.parts.get(0) { + if part == expected { + return + } + } + } + + println!("disassembly for {}: ", sym); + for (i, instr) in function.instrs.iter().enumerate() { + print!("\t{:2}: ", i); + for part in instr.parts.iter() { + print!("{} ", part); + } + println!(""); + } + panic!("failed to find instruction `{}` in the disassembly", expected); +} + diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index e8c9c518e8c6..e2ec27688409 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -3,6 +3,10 @@ const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi, target_feature, cfg_target_feature, i128_type )] +#![cfg_attr(test, feature(proc_macro))] + +#[cfg(test)] +extern crate assert_instr; /// Platform independent SIMD vector types and operations. pub mod simd { diff --git a/library/stdarch/src/x86/sse.rs b/library/stdarch/src/x86/sse.rs index d1af0e1a9b61..b4afc88eaef5 100644 --- a/library/stdarch/src/x86/sse.rs +++ b/library/stdarch/src/x86/sse.rs @@ -1,9 +1,13 @@ use v128::*; +#[cfg(test)] +use assert_instr::assert_instr; + /// Return the square root of packed single-precision (32-bit) floating-point /// elements in `a`. #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(sqrtps))] pub fn _mm_sqrt_ps(a: f32x4) -> f32x4 { unsafe { sqrtps(a) } } @@ -12,6 +16,7 @@ pub fn _mm_sqrt_ps(a: f32x4) -> f32x4 { /// floating-point elements in `a`. #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(rcpps))] pub fn _mm_rcp_ps(a: f32x4) -> f32x4 { unsafe { rcpps(a) } } @@ -20,6 +25,7 @@ pub fn _mm_rcp_ps(a: f32x4) -> f32x4 { /// (32-bit) floating-point elements in `a`. #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(rsqrtps))] pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 { unsafe { rsqrtps(a) } } @@ -28,6 +34,7 @@ pub fn _mm_rsqrt_ps(a: f32x4) -> f32x4 { /// `b`, and return the corresponding minimum values. #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(minps))] pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 { unsafe { minps(a, b) } } @@ -36,6 +43,7 @@ pub fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 { /// `b`, and return the corresponding maximum values. #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(maxps))] pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 { unsafe { maxps(a, b) } } @@ -46,6 +54,7 @@ pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 { /// All other bits are set to `0`. #[inline(always)] #[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(movmskps))] pub fn _mm_movemask_ps(a: f32x4) -> i32 { unsafe { movmskps(a) } } From 124f731ce2507ec9a15868ab596ca0d299eba0bf Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 20 Sep 2017 10:28:00 -0700 Subject: [PATCH 18/25] Migrate existing tests to #[assert_instr] Also add some documentation to the assert_instr infrastructure --- library/stdarch/CONTRIBUTING.md | 7 +- library/stdarch/asm/x86_bmi2_bzhi.asm | 12 -- library/stdarch/asm/x86_bmi2_bzhi.rs | 11 -- library/stdarch/asm/x86_bmi2_mulx.asm | 17 --- library/stdarch/asm/x86_bmi2_mulx.rs | 11 -- library/stdarch/asm/x86_bmi2_pdep.asm | 12 -- library/stdarch/asm/x86_bmi2_pdep.rs | 11 -- library/stdarch/asm/x86_bmi2_pext.asm | 12 -- library/stdarch/asm/x86_bmi2_pext.rs | 11 -- library/stdarch/asm/x86_bmi_andn.asm | 12 -- library/stdarch/asm/x86_bmi_andn.rs | 11 -- library/stdarch/asm/x86_bmi_bextr.asm | 32 ---- library/stdarch/asm/x86_bmi_bextr.rs | 21 --- library/stdarch/asm/x86_bmi_blsi.asm | 12 -- library/stdarch/asm/x86_bmi_blsi.rs | 11 -- library/stdarch/asm/x86_bmi_blsr.asm | 12 -- library/stdarch/asm/x86_bmi_blsr.rs | 11 -- library/stdarch/asm/x86_bmi_tzcnt.asm | 12 -- library/stdarch/asm/x86_bmi_tzcnt.rs | 11 -- library/stdarch/asm/x86_lzcnt_lzcnt.asm | 12 -- library/stdarch/asm/x86_lzcnt_lzcnt.rs | 11 -- library/stdarch/asm/x86_popcnt_popcnt.asm | 12 -- library/stdarch/asm/x86_popcnt_popcnt.rs | 11 -- library/stdarch/asm/x86_tbm_blcfill.asm | 12 -- library/stdarch/asm/x86_tbm_blcfill.rs | 11 -- library/stdarch/asm/x86_tbm_blci.asm | 12 -- library/stdarch/asm/x86_tbm_blci.rs | 11 -- library/stdarch/asm/x86_tbm_blcic.asm | 12 -- library/stdarch/asm/x86_tbm_blcic.rs | 11 -- library/stdarch/asm/x86_tbm_blcmsk.asm | 12 -- library/stdarch/asm/x86_tbm_blcmsk.rs | 11 -- library/stdarch/asm/x86_tbm_blcs.asm | 12 -- library/stdarch/asm/x86_tbm_blcs.rs | 11 -- library/stdarch/asm/x86_tbm_blsfill.asm | 12 -- library/stdarch/asm/x86_tbm_blsfill.rs | 11 -- library/stdarch/asm/x86_tbm_blsic.asm | 12 -- library/stdarch/asm/x86_tbm_blsic.rs | 11 -- library/stdarch/asm/x86_tbm_t1mskc.asm | 12 -- library/stdarch/asm/x86_tbm_t1mskc.rs | 11 -- library/stdarch/asm/x86_tbm_tzmsk.asm | 12 -- library/stdarch/asm/x86_tbm_tzmsk.rs | 11 -- .../assert-instr-macro/src/lib.rs | 10 ++ library/stdarch/assert-instr/src/lib.rs | 19 ++- library/stdarch/check_asm.py | 144 ------------------ library/stdarch/src/x86/abm.rs | 8 +- library/stdarch/src/x86/bmi.rs | 20 +++ library/stdarch/src/x86/bmi2.rs | 9 ++ library/stdarch/src/x86/sse2.rs | 4 + library/stdarch/src/x86/tbm.rs | 25 ++- 49 files changed, 97 insertions(+), 644 deletions(-) delete mode 100644 library/stdarch/asm/x86_bmi2_bzhi.asm delete mode 100644 library/stdarch/asm/x86_bmi2_bzhi.rs delete mode 100644 library/stdarch/asm/x86_bmi2_mulx.asm delete mode 100644 library/stdarch/asm/x86_bmi2_mulx.rs delete mode 100644 library/stdarch/asm/x86_bmi2_pdep.asm delete mode 100644 library/stdarch/asm/x86_bmi2_pdep.rs delete mode 100644 library/stdarch/asm/x86_bmi2_pext.asm delete mode 100644 library/stdarch/asm/x86_bmi2_pext.rs delete mode 100644 library/stdarch/asm/x86_bmi_andn.asm delete mode 100644 library/stdarch/asm/x86_bmi_andn.rs delete mode 100644 library/stdarch/asm/x86_bmi_bextr.asm delete mode 100644 library/stdarch/asm/x86_bmi_bextr.rs delete mode 100644 library/stdarch/asm/x86_bmi_blsi.asm delete mode 100644 library/stdarch/asm/x86_bmi_blsi.rs delete mode 100644 library/stdarch/asm/x86_bmi_blsr.asm delete mode 100644 library/stdarch/asm/x86_bmi_blsr.rs delete mode 100644 library/stdarch/asm/x86_bmi_tzcnt.asm delete mode 100644 library/stdarch/asm/x86_bmi_tzcnt.rs delete mode 100644 library/stdarch/asm/x86_lzcnt_lzcnt.asm delete mode 100644 library/stdarch/asm/x86_lzcnt_lzcnt.rs delete mode 100644 library/stdarch/asm/x86_popcnt_popcnt.asm delete mode 100644 library/stdarch/asm/x86_popcnt_popcnt.rs delete mode 100644 library/stdarch/asm/x86_tbm_blcfill.asm delete mode 100644 library/stdarch/asm/x86_tbm_blcfill.rs delete mode 100644 library/stdarch/asm/x86_tbm_blci.asm delete mode 100644 library/stdarch/asm/x86_tbm_blci.rs delete mode 100644 library/stdarch/asm/x86_tbm_blcic.asm delete mode 100644 library/stdarch/asm/x86_tbm_blcic.rs delete mode 100644 library/stdarch/asm/x86_tbm_blcmsk.asm delete mode 100644 library/stdarch/asm/x86_tbm_blcmsk.rs delete mode 100644 library/stdarch/asm/x86_tbm_blcs.asm delete mode 100644 library/stdarch/asm/x86_tbm_blcs.rs delete mode 100644 library/stdarch/asm/x86_tbm_blsfill.asm delete mode 100644 library/stdarch/asm/x86_tbm_blsfill.rs delete mode 100644 library/stdarch/asm/x86_tbm_blsic.asm delete mode 100644 library/stdarch/asm/x86_tbm_blsic.rs delete mode 100644 library/stdarch/asm/x86_tbm_t1mskc.asm delete mode 100644 library/stdarch/asm/x86_tbm_t1mskc.rs delete mode 100644 library/stdarch/asm/x86_tbm_tzmsk.asm delete mode 100644 library/stdarch/asm/x86_tbm_tzmsk.rs delete mode 100755 library/stdarch/check_asm.py diff --git a/library/stdarch/CONTRIBUTING.md b/library/stdarch/CONTRIBUTING.md index a99eaa5eedf5..8de3f8466080 100644 --- a/library/stdarch/CONTRIBUTING.md +++ b/library/stdarch/CONTRIBUTING.md @@ -14,6 +14,7 @@ example for `_mm_adds_epi16`: /// Add packed 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(paddsw))] pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 { unsafe { paddsw(a, b) } } @@ -32,6 +33,10 @@ Let's break this down: support `sse2`, the compiler will still generate code for `_mm_adds_epi16` *as if* `sse2` support existed. Without this attribute, the compiler might not generate the intended CPU instruction. +* The `#[cfg_attr(test, assert_instr(paddsw))]` attribute indicates that when + we're testing the crate we'll assert that the `paddsw` instruction is + generated inside this function, ensuring that the SIMD intrinsic truly is an + intrinsic for the instruction! * The types of the vectors given to the intrinsic should generally match the types as provided in the vendor interface. We'll talk about this more below. * The implementation of the vendor intrinsic is generally very simple. @@ -40,7 +45,7 @@ Let's break this down: compiler intrinsic (in this case, `paddsw`) when one is available. More on this below as well. -Once a function has been added, you should add at least one test for basic +Once a function has been added, you should also add at least one test for basic functionality. Here's an example for `_mm_adds_epi16`: ```rust diff --git a/library/stdarch/asm/x86_bmi2_bzhi.asm b/library/stdarch/asm/x86_bmi2_bzhi.asm deleted file mode 100644 index f5e6006f2e28..000000000000 --- a/library/stdarch/asm/x86_bmi2_bzhi.asm +++ /dev/null @@ -1,12 +0,0 @@ -_bzhi_u32: - pushq %rbp - movq %rsp, %rbp - bzhil %esi, %edi, %eax - popq %rbp - retq -_bzhi_u64: - pushq %rbp - movq %rsp, %rbp - bzhiq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_bmi2_bzhi.rs b/library/stdarch/asm/x86_bmi2_bzhi.rs deleted file mode 100644 index 98323037c130..000000000000 --- a/library/stdarch/asm/x86_bmi2_bzhi.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn bzhi_u32(x: u32, mask: u32) -> u32 { - stdsimd::vendor::_bzhi_u32(x, mask) -} - -#[no_mangle] -pub fn bzhi_u64(x: u64, mask: u64) -> u64 { - stdsimd::vendor::_bzhi_u64(x, mask) -} diff --git a/library/stdarch/asm/x86_bmi2_mulx.asm b/library/stdarch/asm/x86_bmi2_mulx.asm deleted file mode 100644 index e884a07c464c..000000000000 --- a/library/stdarch/asm/x86_bmi2_mulx.asm +++ /dev/null @@ -1,17 +0,0 @@ -_umulx_u32: - pushq %rbp - movq %rsp, %rbp - movl %edi, %ecx - movl %esi, %eax - imulq %rcx, %rax - popq %rbp - retq -_umulx_u64: - pushq %rbp - movq %rsp, %rbp - mulxq %rsi, %rcx, %rax - movq %rcx, (%rdi) - movq %rax, 8(%rdi) - movq %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_bmi2_mulx.rs b/library/stdarch/asm/x86_bmi2_mulx.rs deleted file mode 100644 index 08ce65ef3f4d..000000000000 --- a/library/stdarch/asm/x86_bmi2_mulx.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn umulx_u32(x: u32, y: u32) -> (u32, u32) { - stdsimd::vendor::_mulx_u32(x, y) -} - -#[no_mangle] -pub fn umulx_u64(x: u64, y: u64) -> (u64, u64) { - stdsimd::vendor::_mulx_u64(x, y) -} diff --git a/library/stdarch/asm/x86_bmi2_pdep.asm b/library/stdarch/asm/x86_bmi2_pdep.asm deleted file mode 100644 index 157e07a2c87b..000000000000 --- a/library/stdarch/asm/x86_bmi2_pdep.asm +++ /dev/null @@ -1,12 +0,0 @@ -_pdep_u32: - pushq %rbp - movq %rsp, %rbp - pdepl %esi, %edi, %eax - popq %rbp - retq -_pdep_u64: - pushq %rbp - movq %rsp, %rbp - pdepq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_bmi2_pdep.rs b/library/stdarch/asm/x86_bmi2_pdep.rs deleted file mode 100644 index 05c64e0c5a9e..000000000000 --- a/library/stdarch/asm/x86_bmi2_pdep.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn pdep_u32(x: u32, mask: u32) -> u32 { - stdsimd::vendor::_pdep_u32(x, mask) -} - -#[no_mangle] -pub fn pdep_u64(x: u64, mask: u64) -> u64 { - stdsimd::vendor::_pdep_u64(x, mask) -} diff --git a/library/stdarch/asm/x86_bmi2_pext.asm b/library/stdarch/asm/x86_bmi2_pext.asm deleted file mode 100644 index 76014780e211..000000000000 --- a/library/stdarch/asm/x86_bmi2_pext.asm +++ /dev/null @@ -1,12 +0,0 @@ -_pext_u32: - pushq %rbp - movq %rsp, %rbp - pextl %esi, %edi, %eax - popq %rbp - retq -_pext_u64: - pushq %rbp - movq %rsp, %rbp - pextq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_bmi2_pext.rs b/library/stdarch/asm/x86_bmi2_pext.rs deleted file mode 100644 index 62f795411d89..000000000000 --- a/library/stdarch/asm/x86_bmi2_pext.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn pext_u32(x: u32, mask: u32) -> u32 { - stdsimd::vendor::_pext_u32(x, mask) -} - -#[no_mangle] -pub fn pext_u64(x: u64, mask: u64) -> u64 { - stdsimd::vendor::_pext_u64(x, mask) -} diff --git a/library/stdarch/asm/x86_bmi_andn.asm b/library/stdarch/asm/x86_bmi_andn.asm deleted file mode 100644 index 9751ee469c62..000000000000 --- a/library/stdarch/asm/x86_bmi_andn.asm +++ /dev/null @@ -1,12 +0,0 @@ -_andn_u32: - pushq %rbp - movq %rsp, %rbp - andnl %esi, %edi, %eax - popq %rbp - retq -_andn_u64: - pushq %rbp - movq %rsp, %rbp - andnq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_bmi_andn.rs b/library/stdarch/asm/x86_bmi_andn.rs deleted file mode 100644 index 2770cb930493..000000000000 --- a/library/stdarch/asm/x86_bmi_andn.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn andn_u32(x: u32, y: u32) -> u32 { - stdsimd::vendor::_andn_u32(x, y) -} - -#[no_mangle] -pub fn andn_u64(x: u64, y: u64) -> u64 { - stdsimd::vendor::_andn_u64(x, y) -} diff --git a/library/stdarch/asm/x86_bmi_bextr.asm b/library/stdarch/asm/x86_bmi_bextr.asm deleted file mode 100644 index 0dd3c950d5ee..000000000000 --- a/library/stdarch/asm/x86_bmi_bextr.asm +++ /dev/null @@ -1,32 +0,0 @@ -_bextr_u32: - pushq %rbp - movq %rsp, %rbp - movzbl %sil, %eax - shll $8, %edx - movzwl %dx, %ecx - orl %eax, %ecx - bextrl %ecx, %edi, %eax - popq %rbp - retq -_bextr_u64: - pushq %rbp - movq %rsp, %rbp - movzbl %sil, %eax - shlq $8, %rdx - movzwl %dx, %ecx - orq %rax, %rcx - bextrq %rcx, %rdi, %rax - popq %rbp - retq -_bextr2_u32: - pushq %rbp - movq %rsp, %rbp - bextrl %esi, %edi, %eax - popq %rbp - retq -_bextr2_u64: - pushq %rbp - movq %rsp, %rbp - bextrq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_bmi_bextr.rs b/library/stdarch/asm/x86_bmi_bextr.rs deleted file mode 100644 index 1c661e529682..000000000000 --- a/library/stdarch/asm/x86_bmi_bextr.rs +++ /dev/null @@ -1,21 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn bextr_u32(x: u32, y: u32, z: u32) -> u32 { - stdsimd::vendor::_bextr_u32(x, y, z) -} - -#[no_mangle] -pub fn bextr_u64(x: u64, y: u64, z: u64) -> u64 { - stdsimd::vendor::_bextr_u64(x, y, z) -} - -#[no_mangle] -pub fn bextr2_u32(x: u32, y: u32) -> u32 { - stdsimd::vendor::_bextr2_u32(x, y) -} - -#[no_mangle] -pub fn bextr2_u64(x: u64, y: u64) -> u64 { - stdsimd::vendor::_bextr2_u64(x, y) -} diff --git a/library/stdarch/asm/x86_bmi_blsi.asm b/library/stdarch/asm/x86_bmi_blsi.asm deleted file mode 100644 index a2f6231f9c40..000000000000 --- a/library/stdarch/asm/x86_bmi_blsi.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blsi_u32: - pushq %rbp - movq %rsp, %rbp - blsil %edi, %eax - popq %rbp - retq -_blsi_u64: - pushq %rbp - movq %rsp, %rbp - blsiq %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_bmi_blsi.rs b/library/stdarch/asm/x86_bmi_blsi.rs deleted file mode 100644 index 637051c43769..000000000000 --- a/library/stdarch/asm/x86_bmi_blsi.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blsi_u32(x: u32) -> u32 { - stdsimd::vendor::_blsi_u32(x) -} - -#[no_mangle] -pub fn blsi_u64(x: u64) -> u64 { - stdsimd::vendor::_blsi_u64(x) -} diff --git a/library/stdarch/asm/x86_bmi_blsr.asm b/library/stdarch/asm/x86_bmi_blsr.asm deleted file mode 100644 index 8ace6bc19556..000000000000 --- a/library/stdarch/asm/x86_bmi_blsr.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blsr_u32: - pushq %rbp - movq %rsp, %rbp - blsrl %edi, %eax - popq %rbp - retq -_blsr_u64: - pushq %rbp - movq %rsp, %rbp - blsrq %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_bmi_blsr.rs b/library/stdarch/asm/x86_bmi_blsr.rs deleted file mode 100644 index 48a193869a63..000000000000 --- a/library/stdarch/asm/x86_bmi_blsr.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blsr_u32(x: u32) -> u32 { - stdsimd::vendor::_blsr_u32(x) -} - -#[no_mangle] -pub fn blsr_u64(x: u64) -> u64 { - stdsimd::vendor::_blsr_u64(x) -} diff --git a/library/stdarch/asm/x86_bmi_tzcnt.asm b/library/stdarch/asm/x86_bmi_tzcnt.asm deleted file mode 100644 index 2412cc177f21..000000000000 --- a/library/stdarch/asm/x86_bmi_tzcnt.asm +++ /dev/null @@ -1,12 +0,0 @@ -_tzcnt_u32: - pushq %rbp - movq %rsp, %rbp - tzcntl %edi, %eax - popq %rbp - retq -_tzcnt_u64: - pushq %rbp - movq %rsp, %rbp - tzcntq %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_bmi_tzcnt.rs b/library/stdarch/asm/x86_bmi_tzcnt.rs deleted file mode 100644 index d4ac48aa2b5e..000000000000 --- a/library/stdarch/asm/x86_bmi_tzcnt.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn tzcnt_u32(x: u32) -> u32 { - stdsimd::vendor::_tzcnt_u32(x) -} - -#[no_mangle] -pub fn tzcnt_u64(x: u64) -> u64 { - stdsimd::vendor::_tzcnt_u64(x) -} diff --git a/library/stdarch/asm/x86_lzcnt_lzcnt.asm b/library/stdarch/asm/x86_lzcnt_lzcnt.asm deleted file mode 100644 index 920644f31308..000000000000 --- a/library/stdarch/asm/x86_lzcnt_lzcnt.asm +++ /dev/null @@ -1,12 +0,0 @@ -_lzcnt_u32: - pushq %rbp - movq %rsp, %rbp - lzcntl %edi, %eax - popq %rbp - retq -_lzcnt_u64: - pushq %rbp - movq %rsp, %rbp - lzcntq %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_lzcnt_lzcnt.rs b/library/stdarch/asm/x86_lzcnt_lzcnt.rs deleted file mode 100644 index 34185009727b..000000000000 --- a/library/stdarch/asm/x86_lzcnt_lzcnt.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn lzcnt_u32(x: u32) -> u32 { - stdsimd::vendor::_lzcnt_u32(x) -} - -#[no_mangle] -pub fn lzcnt_u64(x: u64) -> u64 { - stdsimd::vendor::_lzcnt_u64(x) -} diff --git a/library/stdarch/asm/x86_popcnt_popcnt.asm b/library/stdarch/asm/x86_popcnt_popcnt.asm deleted file mode 100644 index ef8fcf211c54..000000000000 --- a/library/stdarch/asm/x86_popcnt_popcnt.asm +++ /dev/null @@ -1,12 +0,0 @@ -_popcnt_u32: - pushq %rbp - movq %rsp, %rbp - popcntl %edi, %eax - popq %rbp - retq -_popcnt_u64: - pushq %rbp - movq %rsp, %rbp - popcntq %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_popcnt_popcnt.rs b/library/stdarch/asm/x86_popcnt_popcnt.rs deleted file mode 100644 index 9f215be6b2a3..000000000000 --- a/library/stdarch/asm/x86_popcnt_popcnt.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn popcnt_u32(x: u32) -> u32 { - stdsimd::vendor::_popcnt32(x) -} - -#[no_mangle] -pub fn popcnt_u64(x: u64) -> u64 { - stdsimd::vendor::_popcnt64(x) -} diff --git a/library/stdarch/asm/x86_tbm_blcfill.asm b/library/stdarch/asm/x86_tbm_blcfill.asm deleted file mode 100644 index a7214198007b..000000000000 --- a/library/stdarch/asm/x86_tbm_blcfill.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blcfill_u32: - pushq %rbp - movq %rsp, %rbp - blcfill %edi, %eax - popq %rbp - retq -_blcfill_u64: - pushq %rbp - movq %rsp, %rbp - blcfill %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_tbm_blcfill.rs b/library/stdarch/asm/x86_tbm_blcfill.rs deleted file mode 100644 index 9712449e1483..000000000000 --- a/library/stdarch/asm/x86_tbm_blcfill.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blcfill_u32(x: u32) -> u32 { - stdsimd::vendor::_blcfill_u32(x) -} - -#[no_mangle] -pub fn blcfill_u64(x: u64) -> u64 { - stdsimd::vendor::_blcfill_u64(x) -} diff --git a/library/stdarch/asm/x86_tbm_blci.asm b/library/stdarch/asm/x86_tbm_blci.asm deleted file mode 100644 index c7a8708b6235..000000000000 --- a/library/stdarch/asm/x86_tbm_blci.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blci_u32: - pushq %rbp - movq %rsp, %rbp - blci %edi, %eax - popq %rbp - retq -_blci_u64: - pushq %rbp - movq %rsp, %rbp - blci %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_tbm_blci.rs b/library/stdarch/asm/x86_tbm_blci.rs deleted file mode 100644 index 6cc306ed065a..000000000000 --- a/library/stdarch/asm/x86_tbm_blci.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blci_u32(x: u32) -> u32 { - stdsimd::vendor::_blci_u32(x) -} - -#[no_mangle] -pub fn blci_u64(x: u64) -> u64 { - stdsimd::vendor::_blci_u64(x) -} diff --git a/library/stdarch/asm/x86_tbm_blcic.asm b/library/stdarch/asm/x86_tbm_blcic.asm deleted file mode 100644 index 1c6796f1d18e..000000000000 --- a/library/stdarch/asm/x86_tbm_blcic.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blcic_u32: - pushq %rbp - movq %rsp, %rbp - blcic %edi, %eax - popq %rbp - retq -_blcic_u64: - pushq %rbp - movq %rsp, %rbp - blcic %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_tbm_blcic.rs b/library/stdarch/asm/x86_tbm_blcic.rs deleted file mode 100644 index 390d131d6cb3..000000000000 --- a/library/stdarch/asm/x86_tbm_blcic.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blcic_u32(x: u32) -> u32 { - stdsimd::vendor::_blcic_u32(x) -} - -#[no_mangle] -pub fn blcic_u64(x: u64) -> u64 { - stdsimd::vendor::_blcic_u64(x) -} diff --git a/library/stdarch/asm/x86_tbm_blcmsk.asm b/library/stdarch/asm/x86_tbm_blcmsk.asm deleted file mode 100644 index 360aff904c85..000000000000 --- a/library/stdarch/asm/x86_tbm_blcmsk.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blcmsk_u32: - pushq %rbp - movq %rsp, %rbp - blcmsk %edi, %eax - popq %rbp - retq -_blcmsk_u64: - pushq %rbp - movq %rsp, %rbp - blcmsk %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_tbm_blcmsk.rs b/library/stdarch/asm/x86_tbm_blcmsk.rs deleted file mode 100644 index 7174b778deac..000000000000 --- a/library/stdarch/asm/x86_tbm_blcmsk.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blcmsk_u32(x: u32) -> u32 { - stdsimd::vendor::_blcmsk_u32(x) -} - -#[no_mangle] -pub fn blcmsk_u64(x: u64) -> u64 { - stdsimd::vendor::_blcmsk_u64(x) -} diff --git a/library/stdarch/asm/x86_tbm_blcs.asm b/library/stdarch/asm/x86_tbm_blcs.asm deleted file mode 100644 index 6a524b162d00..000000000000 --- a/library/stdarch/asm/x86_tbm_blcs.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blcs_u32: - pushq %rbp - movq %rsp, %rbp - blcs %edi, %eax - popq %rbp - retq -_blcs_u64: - pushq %rbp - movq %rsp, %rbp - blcs %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_tbm_blcs.rs b/library/stdarch/asm/x86_tbm_blcs.rs deleted file mode 100644 index 9c8d51ab7f19..000000000000 --- a/library/stdarch/asm/x86_tbm_blcs.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blcs_u32(x: u32) -> u32 { - stdsimd::vendor::_blcs_u32(x) -} - -#[no_mangle] -pub fn blcs_u64(x: u64) -> u64 { - stdsimd::vendor::_blcs_u64(x) -} diff --git a/library/stdarch/asm/x86_tbm_blsfill.asm b/library/stdarch/asm/x86_tbm_blsfill.asm deleted file mode 100644 index aa756feec33e..000000000000 --- a/library/stdarch/asm/x86_tbm_blsfill.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blsfill_u32: - pushq %rbp - movq %rsp, %rbp - blsfill %edi, %eax - popq %rbp - retq -_blsfill_u64: - pushq %rbp - movq %rsp, %rbp - blsfill %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_tbm_blsfill.rs b/library/stdarch/asm/x86_tbm_blsfill.rs deleted file mode 100644 index f794dc63bf87..000000000000 --- a/library/stdarch/asm/x86_tbm_blsfill.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blsfill_u32(x: u32) -> u32 { - stdsimd::vendor::_blsfill_u32(x) -} - -#[no_mangle] -pub fn blsfill_u64(x: u64) -> u64 { - stdsimd::vendor::_blsfill_u64(x) -} diff --git a/library/stdarch/asm/x86_tbm_blsic.asm b/library/stdarch/asm/x86_tbm_blsic.asm deleted file mode 100644 index d400398283a2..000000000000 --- a/library/stdarch/asm/x86_tbm_blsic.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blsic_u32: - pushq %rbp - movq %rsp, %rbp - blsic %edi, %eax - popq %rbp - retq -_blsic_u64: - pushq %rbp - movq %rsp, %rbp - blsic %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_tbm_blsic.rs b/library/stdarch/asm/x86_tbm_blsic.rs deleted file mode 100644 index d79f1937d183..000000000000 --- a/library/stdarch/asm/x86_tbm_blsic.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blsic_u32(x: u32) -> u32 { - stdsimd::vendor::_blsic_u32(x) -} - -#[no_mangle] -pub fn blsic_u64(x: u64) -> u64 { - stdsimd::vendor::_blsic_u64(x) -} diff --git a/library/stdarch/asm/x86_tbm_t1mskc.asm b/library/stdarch/asm/x86_tbm_t1mskc.asm deleted file mode 100644 index 414a463a7230..000000000000 --- a/library/stdarch/asm/x86_tbm_t1mskc.asm +++ /dev/null @@ -1,12 +0,0 @@ -_t1mskc_u32: - pushq %rbp - movq %rsp, %rbp - t1mskc %edi, %eax - popq %rbp - retq -_t1mskc_u64: - pushq %rbp - movq %rsp, %rbp - t1mskc %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_tbm_t1mskc.rs b/library/stdarch/asm/x86_tbm_t1mskc.rs deleted file mode 100644 index e1fe51565e68..000000000000 --- a/library/stdarch/asm/x86_tbm_t1mskc.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn t1mskc_u32(x: u32) -> u32 { - stdsimd::vendor::_t1mskc_u32(x) -} - -#[no_mangle] -pub fn t1mskc_u64(x: u64) -> u64 { - stdsimd::vendor::_t1mskc_u64(x) -} diff --git a/library/stdarch/asm/x86_tbm_tzmsk.asm b/library/stdarch/asm/x86_tbm_tzmsk.asm deleted file mode 100644 index fa471844b749..000000000000 --- a/library/stdarch/asm/x86_tbm_tzmsk.asm +++ /dev/null @@ -1,12 +0,0 @@ -_tzmsk_u32: - pushq %rbp - movq %rsp, %rbp - tzmsk %edi, %eax - popq %rbp - retq -_tzmsk_u64: - pushq %rbp - movq %rsp, %rbp - tzmsk %rdi, %rax - popq %rbp - retq diff --git a/library/stdarch/asm/x86_tbm_tzmsk.rs b/library/stdarch/asm/x86_tbm_tzmsk.rs deleted file mode 100644 index 7f8eb4a1b2a7..000000000000 --- a/library/stdarch/asm/x86_tbm_tzmsk.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn tzmsk_u32(x: u32) -> u32 { - stdsimd::vendor::_tzmsk_u32(x) -} - -#[no_mangle] -pub fn tzmsk_u64(x: u64) -> u64 { - stdsimd::vendor::_tzmsk_u64(x) -} diff --git a/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs b/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs index 728c6936eeda..1c4126149097 100644 --- a/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs +++ b/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs @@ -1,3 +1,13 @@ +//! Implementation of the `#[assert_instr]` macro +//! +//! This macro is used when testing the `stdsimd` crate and is used to generate +//! test cases to assert that functions do indeed contain the instructions that +//! we're expecting them to contain. +//! +//! The procedural macro here is relatively simple, it simply appends a +//! `#[test]` function to the original token stream which asserts that the +//! function itself contains the relevant instruction. + #![feature(proc_macro)] extern crate proc_macro; diff --git a/library/stdarch/assert-instr/src/lib.rs b/library/stdarch/assert-instr/src/lib.rs index 651c8606731a..596668a8f59b 100644 --- a/library/stdarch/assert-instr/src/lib.rs +++ b/library/stdarch/assert-instr/src/lib.rs @@ -1,3 +1,9 @@ +//! Runtime support needed for the `#![assert_instr]` macro +//! +//! This basically just disassembles the current executable and then parses the +//! output once globally and then provides the `assert` function which makes +//! assertions about the disassembly of a function. + #![feature(proc_macro)] extern crate assert_instr_macro; @@ -211,21 +217,30 @@ fn normalize(symbol: &str) -> String { } } +/// Main entry point for this crate, called by the `#[assert_instr]` macro. +/// +/// This asserts that the function at `fnptr` contains the instruction +/// `expected` provided. pub fn assert(fnptr: usize, expected: &str) { + // Translate this function pointer to a symbolic name that we'd have found + // in the disassembly. let mut sym = None; backtrace::resolve(fnptr as *mut _, |name| { sym = name.name().and_then(|s| s.as_str()).map(normalize); }); - let sym = match sym { Some(s) => s, None => panic!("failed to get symbol of function pointer: {}", fnptr), }; + // Find our function in the list of all disassembled functions let functions = &DISASSEMBLY.get(&sym) .expect(&format!("failed to find disassembly of {}", sym)); assert_eq!(functions.len(), 1); let function = &functions[0]; + + // Look for `expected` as the first part of any instruction in this + // function, returning if we do indeed find it. for instr in function.instrs.iter() { if let Some(part) = instr.parts.get(0) { if part == expected { @@ -234,6 +249,8 @@ pub fn assert(fnptr: usize, expected: &str) { } } + // Help debug by printing out the found disassembly, and then panic as we + // didn't find the instruction. println!("disassembly for {}: ", sym); for (i, instr) in function.instrs.iter().enumerate() { print!("\t{:2}: ", i); diff --git a/library/stdarch/check_asm.py b/library/stdarch/check_asm.py deleted file mode 100755 index 1959ed279100..000000000000 --- a/library/stdarch/check_asm.py +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/env python -# Script to check the assembly generated -import os, sys -import os.path -from subprocess import Popen, PIPE -import argparse - -asm_dir = './asm' - -files = set() -verbose = False -extern_crate = None - -def arm_triplet(arch) : - triples = { 'armv7' : 'armv7-unknown-linux-gnueabihf', - 'armv8' : 'aarch64-unknown-linux-gnu' } - return triples[arch] - -class File(object): - def __init__(self, path_rs): - self.path_rs = path_rs - self.path_asm_should = os.path.join(os.path.splitext(path_rs)[0] + ".asm") - self.path_asm_output = os.path.join(os.path.splitext(path_rs)[0] + "_output.asm") - self.path_llvmir_output = os.path.join(os.path.splitext(path_rs)[0] + "_ir.ll") - self.name = os.path.splitext(os.path.basename(path_rs))[0] - self.feature = self.name.split("_")[1] - self.arch = self.name.split("_")[0] - - if self.feature == "none": - self.feature = None - - def __str__(self): - return "name: " + self.name + ", path-rs: " + self.path_rs + ", path-asm: " + self.path_asm_should + ', arch: ' + self.arch + ", feature: " + str(self.feature) - - def __hash__(self): - return hash(self.name) - -def find_files(): - for dirpath, dirnames, filenames in os.walk(asm_dir): - for filename in [f for f in filenames if f.endswith(".rs")]: - files.add(File(os.path.join(dirpath, filename))) - -def call(args): - if verbose: - print "command: " + str(args) - p = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True) - lines = p.stdout.readlines() - if verbose and p.returncode != 0: - error = p.stderr.readlines() - print >>sys.stdout, lines - print >>sys.stderr, "ERROR: %s" % error - -def compile_file(file): - if verbose: - print "Checking: " + str(file) + "..." - - cargo_args = 'cargo rustc --verbose --release -- -C panic=abort ' - if file.feature: - cargo_args = cargo_args + '-C target-feature=+{}'.format(file.feature) - if file.arch == 'armv7' or file.arch == 'armv8': - cargo_args = cargo_args + '--target={}'.format(arm_triplet(file.arch)) - call(str(cargo_args)) - - rustc_args = 'rustc --verbose -C opt-level=3 -C panic="abort" --extern %s=target/release/lib%s.rlib --crate-type lib' % (extern_crate, extern_crate); - if file.feature: - rustc_args = rustc_args + ' -C target-feature=+{}'.format(file.feature) - if file.arch == 'armv7' or file.arch == 'armv8': - rustc_args = rustc_args + ' --target={}'.format(arm_triplet(file.arch)) - rustc_args_asm = rustc_args + ' --emit asm {} -o {}'.format(file.path_rs, file.path_asm_output) - call(rustc_args_asm) - rustc_args_ll = rustc_args + ' --emit llvm-ir {} -o {}'.format(file.path_rs, file.path_llvmir_output) - call(rustc_args_ll) - - if verbose: - print "...done!" - -def diff_files(rustc_output, asm_snippet): - with open(rustc_output, 'r') as rustc_output_file: - rustc_output_lines = rustc_output_file.readlines() - - with open(asm_snippet, 'r') as asm_snippet_file: - asm_snippet_lines = asm_snippet_file.readlines() - - # remove all empty lines and lines starting with "." - rustc_output_lines = [l.strip() for l in rustc_output_lines] - rustc_output_lines = [l for l in rustc_output_lines if not l.startswith(".") and not len(l) == 0] - asm_snippet_lines = [l.strip() for l in asm_snippet_lines] - asm_snippet_lines = [l for l in asm_snippet_lines if not l.startswith(".") and not len(l) == 0] - - results_differ = False - - if len(rustc_output_lines) != len(asm_snippet_lines): - results_differ = True - - for line_is, line_should in zip(rustc_output_lines, asm_snippet_lines): - if line_is != line_should: - results_differ = True - - if results_differ: - print "Error: results differ" - print "Is:" - print rustc_output_lines - print "Should:" - print asm_snippet_lines - return False - - return True - -def check_file(file): - compile_file(file) - return diff_files(file.path_asm_output, file.path_asm_should) - -def main(): - - parser = argparse.ArgumentParser(description='Checks ASM code') - parser.add_argument('-verbose', action="store_true", default=False) - parser.add_argument('-extern-crate', dest='extern_crate', default='stdsimd') - results = parser.parse_args() - - global verbose - if results.verbose: - verbose = True - - global extern_crate - extern_crate = results.extern_crate - - find_files() - - if verbose: - for f in files: - print f - error = False - for f in files: - result = check_file(f) - if not result: - error = True - - if error == True: - exit(1) - else: - exit(0) - -if __name__ == "__main__": - main() diff --git a/library/stdarch/src/x86/abm.rs b/library/stdarch/src/x86/abm.rs index 7479bf2f17ef..19f50de2190f 100644 --- a/library/stdarch/src/x86/abm.rs +++ b/library/stdarch/src/x86/abm.rs @@ -1,5 +1,5 @@ //! Advanced Bit Manipulation (ABM) instructions -//! +//! //! The POPCNT and LZCNT have their own CPUID bits to indicate support. //! //! The references are: @@ -10,12 +10,15 @@ //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29) //! provides a quick overview of the instructions available. +#[cfg(test)] +use assert_instr::assert_instr; /// Counts the leading most significant zero bits. /// /// When the operand is zero, it returns its size in bits. #[inline(always)] #[target_feature = "+lzcnt"] +#[cfg_attr(test, assert_instr(lzcnt))] pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() } /// Counts the leading most significant zero bits. @@ -23,16 +26,19 @@ pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() } /// When the operand is zero, it returns its size in bits. #[inline(always)] #[target_feature = "+lzcnt"] +#[cfg_attr(test, assert_instr(lzcnt))] pub fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 } /// Counts the bits that are set. #[inline(always)] #[target_feature = "+popcnt"] +#[cfg_attr(test, assert_instr(popcnt))] pub fn _popcnt32(x: u32) -> u32 { x.count_ones() } /// Counts the bits that are set. #[inline(always)] #[target_feature = "+popcnt"] +#[cfg_attr(test, assert_instr(popcnt))] pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 } #[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))] diff --git a/library/stdarch/src/x86/bmi.rs b/library/stdarch/src/x86/bmi.rs index 9932d53b503d..ae5dbf223801 100644 --- a/library/stdarch/src/x86/bmi.rs +++ b/library/stdarch/src/x86/bmi.rs @@ -7,6 +7,9 @@ //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI1_.28Bit_Manipulation_Instruction_Set_1.29) //! provides a quick overview of the available instructions. +#[cfg(test)] +use assert_instr::assert_instr; + #[allow(dead_code)] extern "C" { #[link_name="llvm.x86.bmi.bextr.32"] @@ -19,6 +22,7 @@ extern "C" { /// the least significant bits of the result. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(bextr))] pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32)) } @@ -27,6 +31,7 @@ pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { /// the least significant bits of the result. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(bextr))] pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64)) } @@ -38,6 +43,7 @@ pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { /// extracted, and bits [15,8] specify the length of the range. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(bextr))] pub fn _bextr2_u32(a: u32, control: u32) -> u32 { unsafe { x86_bmi_bextr_32(a, control) } } @@ -49,6 +55,7 @@ pub fn _bextr2_u32(a: u32, control: u32) -> u32 { /// extracted, and bits [15,8] specify the length of the range. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(bextr))] pub fn _bextr2_u64(a: u64, control: u64) -> u64 { unsafe { x86_bmi_bextr_64(a, control) } } @@ -56,6 +63,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 { /// Bitwise logical `AND` of inverted `a` with `b`. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(andn))] pub fn _andn_u32(a: u32, b: u32) -> u32 { !a & b } @@ -63,6 +71,7 @@ pub fn _andn_u32(a: u32, b: u32) -> u32 { /// Bitwise logical `AND` of inverted `a` with `b`. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(andn))] pub fn _andn_u64(a: u64, b: u64) -> u64 { !a & b } @@ -70,6 +79,7 @@ pub fn _andn_u64(a: u64, b: u64) -> u64 { /// Extract lowest set isolated bit. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsi))] pub fn _blsi_u32(x: u32) -> u32 { x & x.wrapping_neg() } @@ -77,6 +87,7 @@ pub fn _blsi_u32(x: u32) -> u32 { /// Extract lowest set isolated bit. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsi))] pub fn _blsi_u64(x: u64) -> u64 { x & x.wrapping_neg() } @@ -84,6 +95,7 @@ pub fn _blsi_u64(x: u64) -> u64 { /// Get mask up to lowest set bit. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsmsk))] pub fn _blsmsk_u32(x: u32) -> u32 { x ^ (x.wrapping_sub(1u32)) } @@ -91,6 +103,7 @@ pub fn _blsmsk_u32(x: u32) -> u32 { /// Get mask up to lowest set bit. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsmsk))] pub fn _blsmsk_u64(x: u64) -> u64 { x ^ (x.wrapping_sub(1u64)) } @@ -100,6 +113,7 @@ pub fn _blsmsk_u64(x: u64) -> u64 { /// If `x` is sets CF. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsr))] pub fn _blsr_u32(x: u32) -> u32 { x & (x.wrapping_sub(1)) } @@ -109,6 +123,7 @@ pub fn _blsr_u32(x: u32) -> u32 { /// If `x` is sets CF. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsr))] pub fn _blsr_u64(x: u64) -> u64 { x & (x.wrapping_sub(1)) } @@ -118,6 +133,7 @@ pub fn _blsr_u64(x: u64) -> u64 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _tzcnt_u16(x: u16) -> u16 { x.trailing_zeros() as u16 } @@ -127,6 +143,7 @@ pub fn _tzcnt_u16(x: u16) -> u16 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _tzcnt_u32(x: u32) -> u32 { x.trailing_zeros() } @@ -136,6 +153,7 @@ pub fn _tzcnt_u32(x: u32) -> u32 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _tzcnt_u64(x: u64) -> u64 { x.trailing_zeros() as u64 } @@ -145,6 +163,7 @@ pub fn _tzcnt_u64(x: u64) -> u64 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _mm_tzcnt_u32(x: u32) -> u32 { x.trailing_zeros() } @@ -154,6 +173,7 @@ pub fn _mm_tzcnt_u32(x: u32) -> u32 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _mm_tzcnt_u64(x: u64) -> u64 { x.trailing_zeros() as u64 } diff --git a/library/stdarch/src/x86/bmi2.rs b/library/stdarch/src/x86/bmi2.rs index 64b778e0bff4..321df40777f1 100644 --- a/library/stdarch/src/x86/bmi2.rs +++ b/library/stdarch/src/x86/bmi2.rs @@ -7,6 +7,9 @@ //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29) //! provides a quick overview of the available instructions. +#[cfg(test)] +use assert_instr::assert_instr; + /// Unsigned multiply without affecting flags. /// /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with @@ -51,6 +54,7 @@ extern "C" { /// Zero higher bits of `a` >= `index`. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(bzhi))] pub fn _bzhi_u32(a: u32, index: u32) -> u32 { unsafe { x86_bmi2_bzhi_32(a, index) } } @@ -58,6 +62,7 @@ pub fn _bzhi_u32(a: u32, index: u32) -> u32 { /// Zero higher bits of `a` >= `index`. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(bzhi))] pub fn _bzhi_u64(a: u64, index: u64) -> u64 { unsafe { x86_bmi2_bzhi_64(a, index) } } @@ -67,6 +72,7 @@ pub fn _bzhi_u64(a: u64, index: u64) -> u64 { /// specified by the `mask`. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(pdep))] pub fn _pdep_u32(a: u32, mask: u32) -> u32 { unsafe { x86_bmi2_pdep_32(a, mask) } } @@ -75,6 +81,7 @@ pub fn _pdep_u32(a: u32, mask: u32) -> u32 { /// specified by the `mask`. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(pdep))] pub fn _pdep_u64(a: u64, mask: u64) -> u64 { unsafe { x86_bmi2_pdep_64(a, mask) } } @@ -83,6 +90,7 @@ pub fn _pdep_u64(a: u64, mask: u64) -> u64 { /// order bit positions of the result. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(pext))] pub fn _pext_u32(a: u32, mask: u32) -> u32 { unsafe { x86_bmi2_pext_32(a, mask) } } @@ -91,6 +99,7 @@ pub fn _pext_u32(a: u32, mask: u32) -> u32 { /// order bit positions of the result. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(pext))] pub fn _pext_u64(a: u64, mask: u64) -> u64 { unsafe { x86_bmi2_pext_64(a, mask) } } diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs index 882259fa5502..e67c96518061 100644 --- a/library/stdarch/src/x86/sse2.rs +++ b/library/stdarch/src/x86/sse2.rs @@ -9,6 +9,9 @@ use x86::__m128i; use v128::*; use v64::*; +#[cfg(test)] +use assert_instr::assert_instr; + /// Provide a hint to the processor that the code sequence is a spin-wait loop. /// /// This can help improve the performance and power consumption of spin-wait @@ -89,6 +92,7 @@ pub fn _mm_adds_epi8(a: i8x16, b: i8x16) -> i8x16 { /// Add packed 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(paddsw))] pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 { unsafe { paddsw(a, b) } } diff --git a/library/stdarch/src/x86/tbm.rs b/library/stdarch/src/x86/tbm.rs index cfcba05686d0..213188536a33 100644 --- a/library/stdarch/src/x86/tbm.rs +++ b/library/stdarch/src/x86/tbm.rs @@ -7,6 +7,9 @@ //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_.28Trailing_Bit_Manipulation.29) //! provides a quick overview of the available instructions. +#[cfg(test)] +use assert_instr::assert_instr; + // TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32 /* #[allow(dead_code)] @@ -20,7 +23,7 @@ extern "C" { /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. #[inline(always)] -#[target_feature = "+tbm"] +#[target_feature = "+tbm"] pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32)) } @@ -28,7 +31,7 @@ pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. #[inline(always)] -#[target_feature = "+tbm"] +#[target_feature = "+tbm"] pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64)) } @@ -61,6 +64,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 { /// If there is no zero bit in `x`, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcfill))] pub fn _blcfill_u32(x: u32) -> u32 { x & (x.wrapping_add(1)) } @@ -70,6 +74,7 @@ pub fn _blcfill_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcfill))] pub fn _blcfill_u64(x: u64) -> u64 { x & (x.wrapping_add(1)) } @@ -79,6 +84,7 @@ pub fn _blcfill_u64(x: u64) -> u64 { /// If there is no zero bit in `x`, it sets all bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blci))] pub fn _blci_u32(x: u32) -> u32 { x | !(x.wrapping_add(1)) } @@ -88,6 +94,7 @@ pub fn _blci_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it sets all bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blci))] pub fn _blci_u64(x: u64) -> u64 { x | !(x.wrapping_add(1)) } @@ -97,6 +104,7 @@ pub fn _blci_u64(x: u64) -> u64 { /// If there is no zero bit in `x`, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcic))] pub fn _blcic_u32(x: u32) -> u32 { !x & (x.wrapping_add(1)) } @@ -106,6 +114,7 @@ pub fn _blcic_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcic))] pub fn _blcic_u64(x: u64) -> u64 { !x & (x.wrapping_add(1)) } @@ -115,6 +124,7 @@ pub fn _blcic_u64(x: u64) -> u64 { /// If there is no zero bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcmsk))] pub fn _blcmsk_u32(x: u32) -> u32 { x ^ (x.wrapping_add(1)) } @@ -124,6 +134,7 @@ pub fn _blcmsk_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcmsk))] pub fn _blcmsk_u64(x: u64) -> u64 { x ^ (x.wrapping_add(1)) } @@ -133,6 +144,7 @@ pub fn _blcmsk_u64(x: u64) -> u64 { /// If there is no zero bit in `x`, it returns `x`. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcs))] pub fn _blcs_u32(x: u32) -> u32 { x | (x.wrapping_add(1)) } @@ -142,6 +154,7 @@ pub fn _blcs_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it returns `x`. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcs))] pub fn _blcs_u64(x: u64) -> u64 { x | x.wrapping_add(1) } @@ -151,6 +164,7 @@ pub fn _blcs_u64(x: u64) -> u64 { /// If there is no set bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blsfill))] pub fn _blsfill_u32(x: u32) -> u32 { x | (x.wrapping_sub(1)) } @@ -160,6 +174,7 @@ pub fn _blsfill_u32(x: u32) -> u32 { /// If there is no set bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blsfill))] pub fn _blsfill_u64(x: u64) -> u64 { x | (x.wrapping_sub(1)) } @@ -169,6 +184,7 @@ pub fn _blsfill_u64(x: u64) -> u64 { /// If there is no set bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blsic))] pub fn _blsic_u32(x: u32) -> u32 { !x | (x.wrapping_sub(1)) } @@ -178,6 +194,7 @@ pub fn _blsic_u32(x: u32) -> u32 { /// If there is no set bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blsic))] pub fn _blsic_u64(x: u64) -> u64 { !x | (x.wrapping_sub(1)) } @@ -188,6 +205,7 @@ pub fn _blsic_u64(x: u64) -> u64 { /// If the least significant bit of `x` is 0, it sets all bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(t1mskc))] pub fn _t1mskc_u32(x: u32) -> u32 { !x | (x.wrapping_add(1)) } @@ -198,6 +216,7 @@ pub fn _t1mskc_u32(x: u32) -> u32 { /// If the least significant bit of `x` is 0, it sets all bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(t1mskc))] pub fn _t1mskc_u64(x: u64) -> u64 { !x | (x.wrapping_add(1)) } @@ -208,6 +227,7 @@ pub fn _t1mskc_u64(x: u64) -> u64 { /// If the least significant bit of `x` is 1, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(tzmsk))] pub fn _tzmsk_u32(x: u32) -> u32 { !x & (x.wrapping_sub(1)) } @@ -218,6 +238,7 @@ pub fn _tzmsk_u32(x: u32) -> u32 { /// If the least significant bit of `x` is 1, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(tzmsk))] pub fn _tzmsk_u64(x: u64) -> u64 { !x & (x.wrapping_sub(1)) } From 53540f0eee3a0d9fce12e11030328b4fd1d626b0 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 20 Sep 2017 12:16:54 +0200 Subject: [PATCH 19/25] [arm] bitwise manipulation instructions --- library/stdarch/src/arm/mod.rs | 10 +++++++ library/stdarch/src/arm/v6.rs | 25 ++++++++++++++++ library/stdarch/src/arm/v7.rs | 40 +++++++++++++++++++++++++ library/stdarch/src/arm/v8.rs | 54 ++++++++++++++++++++++++++++++++++ library/stdarch/src/lib.rs | 6 ++++ 5 files changed, 135 insertions(+) create mode 100644 library/stdarch/src/arm/mod.rs create mode 100644 library/stdarch/src/arm/v6.rs create mode 100644 library/stdarch/src/arm/v7.rs create mode 100644 library/stdarch/src/arm/v8.rs diff --git a/library/stdarch/src/arm/mod.rs b/library/stdarch/src/arm/mod.rs new file mode 100644 index 000000000000..9472441ae4fe --- /dev/null +++ b/library/stdarch/src/arm/mod.rs @@ -0,0 +1,10 @@ +//! ARM intrinsics. +pub use self::v6::*; +pub use self::v7::*; +#[cfg(target_arch = "aarch64")] +pub use self::v8::*; + +mod v6; +mod v7; +#[cfg(target_arch = "aarch64")] +mod v8; diff --git a/library/stdarch/src/arm/v6.rs b/library/stdarch/src/arm/v6.rs new file mode 100644 index 000000000000..95442b374f8c --- /dev/null +++ b/library/stdarch/src/arm/v6.rs @@ -0,0 +1,25 @@ +//! ARMv6 intrinsics. +//! +//! The reference is [ARMv6-M Architecture Reference +//! Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.html). + +/// Reverse the order of the bytes. +#[inline(always)] +#[cfg_attr(test, assert_instr(rev))] +pub fn _rev_u8(x: u8) -> u8 { + x.swap_bytes() as u8 +} + +/// Reverse the order of the bytes. +#[inline(always)] +#[cfg_attr(test, assert_instr(rev))] +pub fn _rev_u16(x: u16) -> u16 { + x.swap_bytes() as u16 +} + +/// Reverse the order of the bytes. +#[inline(always)] +#[cfg_attr(test, assert_instr(rev))] +pub fn _rev_u32(x: u32) -> u32 { + x.swap_bytes() as u32 +} diff --git a/library/stdarch/src/arm/v7.rs b/library/stdarch/src/arm/v7.rs new file mode 100644 index 000000000000..1052b8477a92 --- /dev/null +++ b/library/stdarch/src/arm/v7.rs @@ -0,0 +1,40 @@ +//! ARMv7 intrinsics. +//! +//! The reference is [ARMv7-M Architecture Reference Manual (Issue +//! E.b)](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.b/index.html). + +pub use super::v6::*; + +/// Count Leading Zeros. +#[inline(always)] +#[cfg_attr(test, assert_instr(clz))] +pub fn _clz_u8(x: u8) -> u8 { + x.leading_zeros() as u8 +} + +/// Count Leading Zeros. +#[inline(always)] +#[cfg_attr(test, assert_instr(clz))] +pub fn _clz_u16(x: u16) -> u16 { + x.leading_zeros() as u16 +} + +/// Count Leading Zeros. +#[inline(always)] +#[cfg_attr(test, assert_instr(clz))] +pub fn _clz_u32(x: u32) -> u32 { + x.leading_zeros() as u32 +} + +#[allow(dead_code)] +extern "C" { + #[link_name="llvm.bitreverse.i32"] + fn rbit_u32(i: i32) -> i32; +} + +/// Reverse the bit order. +#[inline(always)] +#[cfg_attr(test, assert_instr(rbit))] +pub fn _rbit_u32(x: u32) -> u32 { + unsafe { rbit_u32(x as i32) as u32 } +} diff --git a/library/stdarch/src/arm/v8.rs b/library/stdarch/src/arm/v8.rs new file mode 100644 index 000000000000..92b1507aada1 --- /dev/null +++ b/library/stdarch/src/arm/v8.rs @@ -0,0 +1,54 @@ +//! ARMv8 intrinsics. +//! +//! The reference is [ARMv8-A Reference Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.k_10775/index.html). + +pub use super::v7::*; + +/// Reverse the order of the bytes. +#[inline(always)] +#[cfg_attr(test, assert_instr(rev))] +pub fn _rev_u64(x: u64) -> u64 { + x.swap_bytes() as u64 +} + +/// Count Leading Zeros. +#[inline(always)] +#[cfg_attr(test, assert_instr(clz))] +pub fn _clz_u64(x: u64) -> u64 { + x.leading_zeros() as u64 +} + +#[allow(dead_code)] +extern "C" { + #[link_name="llvm.bitreverse.i64"] + fn rbit_u64(i: i64) -> i64; +} + +/// Reverse the bit order. +#[inline(always)] +#[cfg_attr(test, assert_instr(rbit))] +pub fn _rbit_u64(x: u64) -> u64 { + unsafe { rbit_u64(x as i64) as u64 } +} + +/// Counts the leading most significant bits set. +/// +/// When all bits of the operand are set it returns the size of the operand in +/// bits. +#[inline(always)] +// #[cfg_attr(test, assert_instr(cls))] // LLVM Bug: https://bugs.llvm.org/show_bug.cgi?id=31802 +#[cfg_attr(test, assert_instr(clz))] +pub fn _cls_u32(x: u32) -> u32 { + u32::leading_zeros(!x) as u32 +} + +/// Counts the leading most significant bits set. +/// +/// When all bits of the operand are set it returns the size of the operand in +/// bits. +#[inline(always)] +// #[cfg_attr(test, assert_instr(cls))] // LLVM Bug: https://bugs.llvm.org/show_bug.cgi?id=31802 +#[cfg_attr(test, assert_instr(clz))] +pub fn _cls_u64(x: u64) -> u64 { + u64::leading_zeros(!x) as u64 +} diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index e2ec27688409..2e75c3e833d0 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -20,6 +20,9 @@ pub mod simd { pub mod vendor { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub use x86::*; + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + pub use arm::*; } #[macro_use] @@ -31,3 +34,6 @@ mod v512; mod v64; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod x86; + +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + mod arm; From bbeec63f52cd5711bc7061f5d717a215a927840f Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 21 Sep 2017 08:53:13 +0200 Subject: [PATCH 20/25] [bmi] add some more code-gen tests --- library/stdarch/src/arm/v8.rs | 4 ++-- library/stdarch/src/x86/bmi2.rs | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/library/stdarch/src/arm/v8.rs b/library/stdarch/src/arm/v8.rs index 92b1507aada1..e49ca4fe1f25 100644 --- a/library/stdarch/src/arm/v8.rs +++ b/library/stdarch/src/arm/v8.rs @@ -36,7 +36,7 @@ pub fn _rbit_u64(x: u64) -> u64 { /// When all bits of the operand are set it returns the size of the operand in /// bits. #[inline(always)] -// #[cfg_attr(test, assert_instr(cls))] // LLVM Bug: https://bugs.llvm.org/show_bug.cgi?id=31802 +// LLVM Bug (should be cls): https://bugs.llvm.org/show_bug.cgi?id=31802 #[cfg_attr(test, assert_instr(clz))] pub fn _cls_u32(x: u32) -> u32 { u32::leading_zeros(!x) as u32 @@ -47,7 +47,7 @@ pub fn _cls_u32(x: u32) -> u32 { /// When all bits of the operand are set it returns the size of the operand in /// bits. #[inline(always)] -// #[cfg_attr(test, assert_instr(cls))] // LLVM Bug: https://bugs.llvm.org/show_bug.cgi?id=31802 +// LLVM Bug (should be cls): https://bugs.llvm.org/show_bug.cgi?id=31802 #[cfg_attr(test, assert_instr(clz))] pub fn _cls_u64(x: u64) -> u64 { u64::leading_zeros(!x) as u64 diff --git a/library/stdarch/src/x86/bmi2.rs b/library/stdarch/src/x86/bmi2.rs index 321df40777f1..67f8740399e4 100644 --- a/library/stdarch/src/x86/bmi2.rs +++ b/library/stdarch/src/x86/bmi2.rs @@ -2,7 +2,7 @@ //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, -//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf). +//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectu res-software-developer-instruction-set-reference-manual-325383.pdf). //! //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29) //! provides a quick overview of the available instructions. @@ -15,6 +15,8 @@ use assert_instr::assert_instr; /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with /// the low half and the high half of the result. #[inline(always)] +// LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232 +#[cfg_attr(test, assert_instr(imul))] #[target_feature = "+bmi2"] pub fn _mulx_u32(a: u32, b: u32) -> (u32, u32) { let result: u64 = (a as u64) * (b as u64); @@ -27,6 +29,7 @@ pub fn _mulx_u32(a: u32, b: u32) -> (u32, u32) { /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with /// the low half and the high half of the result. #[inline(always)] +#[cfg_attr(test, assert_instr(mulx))] #[target_feature = "+bmi2"] pub fn _mulx_u64(a: u64, b: u64) -> (u64, u64) { let result: u128 = (a as u128) * (b as u128); From 800b43ec591eba95d5239f3ecd2b85348d0420a6 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 21 Sep 2017 09:28:02 +0200 Subject: [PATCH 21/25] [assert-instr] compare only the instruction prefix When comparing the assembly instructions against the expected instruction, depending on the platform, we might end up with `tzcntl != tzcnt`. This commit truncates the instructions to the length of the expected instruction, such that `tzcntl => tzcnt` and the comparison succeeds. --- library/stdarch/assert-instr/src/lib.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/library/stdarch/assert-instr/src/lib.rs b/library/stdarch/assert-instr/src/lib.rs index 596668a8f59b..1011d8310615 100644 --- a/library/stdarch/assert-instr/src/lib.rs +++ b/library/stdarch/assert-instr/src/lib.rs @@ -242,9 +242,14 @@ pub fn assert(fnptr: usize, expected: &str) { // Look for `expected` as the first part of any instruction in this // function, returning if we do indeed find it. for instr in function.instrs.iter() { + // Gets the first instruction, e.g. tzcntl in tzcntl %rax,%rax if let Some(part) = instr.parts.get(0) { - if part == expected { - return + // Truncates the instruction with the length of the expected + // instruction: tzcntl => tzcnt and compares that. + if let Some(part) = part.get(0..expected.len()) { + if part == expected { + return + } } } } From 868399bcc40ad5231f55e49fd4a4b8779892a204 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 21 Sep 2017 13:19:29 +0200 Subject: [PATCH 22/25] [appveyor] enable panic backtraces --- library/stdarch/.appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/library/stdarch/.appveyor.yml b/library/stdarch/.appveyor.yml index 352b3bc3aa91..af352a0c8688 100644 --- a/library/stdarch/.appveyor.yml +++ b/library/stdarch/.appveyor.yml @@ -15,4 +15,5 @@ build: false test_script: - cargo test --target %TARGET% + - set RUST_BACKTRACE=1 - cargo test --target %TARGET% --release From 0ab8c0600dca5a5c9d903525cb69d73da38b8a42 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 21 Sep 2017 16:13:46 +0200 Subject: [PATCH 23/25] [assert-instr] simplify --- library/stdarch/assert-instr/src/lib.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/library/stdarch/assert-instr/src/lib.rs b/library/stdarch/assert-instr/src/lib.rs index 1011d8310615..cddb3850a694 100644 --- a/library/stdarch/assert-instr/src/lib.rs +++ b/library/stdarch/assert-instr/src/lib.rs @@ -246,10 +246,8 @@ pub fn assert(fnptr: usize, expected: &str) { if let Some(part) = instr.parts.get(0) { // Truncates the instruction with the length of the expected // instruction: tzcntl => tzcnt and compares that. - if let Some(part) = part.get(0..expected.len()) { - if part == expected { - return - } + if part.starts_with(expected) { + return } } } From c77c903e881341e53fc43c57665d0ef76ff1f73b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 21 Sep 2017 07:15:24 -0700 Subject: [PATCH 24/25] Help debug missing assembly --- library/stdarch/Cargo.toml | 2 +- .../assert-instr-macro/src/lib.rs | 4 +++- library/stdarch/assert-instr/src/lib.rs | 22 ++++++++++++------- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml index 0da061e71c80..87cd5dd14ca4 100644 --- a/library/stdarch/Cargo.toml +++ b/library/stdarch/Cargo.toml @@ -15,7 +15,7 @@ debug = true opt-level = 3 [profile.bench] -debug = 1 +debug = true opt-level = 3 [dev-dependencies] diff --git a/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs b/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs index 1c4126149097..9d7093a52322 100644 --- a/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs +++ b/library/stdarch/assert-instr/assert-instr-macro/src/lib.rs @@ -44,7 +44,9 @@ pub fn assert_instr(attr: TokenStream, item: TokenStream) -> TokenStream { #[allow(non_snake_case)] {ignore} fn assert_instr_{name}() {{ - ::assert_instr::assert({name} as usize, \"{instr}\"); + ::assert_instr::assert({name} as usize, + \"{name}\", + \"{instr}\"); }} ", name = name.as_str(), instr = instr.as_str(), ignore = ignore); let test: TokenStream = test.parse().unwrap(); diff --git a/library/stdarch/assert-instr/src/lib.rs b/library/stdarch/assert-instr/src/lib.rs index cddb3850a694..ada7b8bc3fa0 100644 --- a/library/stdarch/assert-instr/src/lib.rs +++ b/library/stdarch/assert-instr/src/lib.rs @@ -221,21 +221,28 @@ fn normalize(symbol: &str) -> String { /// /// This asserts that the function at `fnptr` contains the instruction /// `expected` provided. -pub fn assert(fnptr: usize, expected: &str) { +pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // Translate this function pointer to a symbolic name that we'd have found // in the disassembly. let mut sym = None; backtrace::resolve(fnptr as *mut _, |name| { sym = name.name().and_then(|s| s.as_str()).map(normalize); }); - let sym = match sym { + + let functions = match sym.as_ref().and_then(|s| DISASSEMBLY.get(s)) { Some(s) => s, - None => panic!("failed to get symbol of function pointer: {}", fnptr), + None => { + if let Some(sym) = sym { + println!("assumed symbol name: `{}`", sym); + } + println!("maybe related functions"); + for f in DISASSEMBLY.keys().filter(|k| k.contains(fnname)) { + println!("\t- {}", f); + } + panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname); + } }; - // Find our function in the list of all disassembled functions - let functions = &DISASSEMBLY.get(&sym) - .expect(&format!("failed to find disassembly of {}", sym)); assert_eq!(functions.len(), 1); let function = &functions[0]; @@ -254,7 +261,7 @@ pub fn assert(fnptr: usize, expected: &str) { // Help debug by printing out the found disassembly, and then panic as we // didn't find the instruction. - println!("disassembly for {}: ", sym); + println!("disassembly for {}: ", sym.as_ref().unwrap()); for (i, instr) in function.instrs.iter().enumerate() { print!("\t{:2}: ", i); for part in instr.parts.iter() { @@ -264,4 +271,3 @@ pub fn assert(fnptr: usize, expected: &str) { } panic!("failed to find instruction `{}` in the disassembly", expected); } - From f1f513d461986f0fdcd2a6c924880cc6cd0d02b2 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 21 Sep 2017 07:45:11 -0700 Subject: [PATCH 25/25] Fix Windows MSVC CI Pass the `/OPT:NOICF` flag to the linker to ensure that all functions don't get eliminated (somethign we don't want in this scenario) --- library/stdarch/.appveyor.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/library/stdarch/.appveyor.yml b/library/stdarch/.appveyor.yml index af352a0c8688..bd02240ad106 100644 --- a/library/stdarch/.appveyor.yml +++ b/library/stdarch/.appveyor.yml @@ -1,4 +1,9 @@ environment: + # We don't want to do identical comdat folding as it messes up the ability to + # generate lossless backtraces in some cases. This is enabled by rustc by + # default so pass a flag to disable it to ensure our tests work ok. + RUSTFLAGS: -Clink-args=/OPT:NOICF + matrix: - TARGET: x86_64-pc-windows-msvc