From b8a4b397ade236e2735b413ca2cb4ff1f91855e8 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Mon, 27 Nov 2017 19:47:23 +0100 Subject: [PATCH] update docs (#217) * update docs * cargo clean deletes previous docs * remove stdsimd from coresimd examples * use stdsimd instead of coresimd in core docs * add stdsimd as a dev-dependency of coresimd --- library/stdarch/ci/dox.sh | 1 - library/stdarch/coresimd/Cargo.toml | 1 + library/stdarch/coresimd/src/lib.rs | 109 +------------ library/stdarch/coresimd/src/x86/i586/avx2.rs | 18 +-- library/stdarch/coresimd/src/x86/i586/sse.rs | 4 +- .../stdarch/coresimd/src/x86/i586/sse42.rs | 10 +- library/stdarch/src/lib.rs | 148 +++++++++--------- 7 files changed, 96 insertions(+), 195 deletions(-) diff --git a/library/stdarch/ci/dox.sh b/library/stdarch/ci/dox.sh index 3bcb7bcf9f91..9b412757c84c 100755 --- a/library/stdarch/ci/dox.sh +++ b/library/stdarch/ci/dox.sh @@ -22,7 +22,6 @@ dox() { rm -rf target/doc/$arch mkdir target/doc/$arch - cargo clean cargo build --target $target rustdoc --target $target -o target/doc/$arch src/lib.rs --crate-name stdsimd --library-path target/$target/debug/deps diff --git a/library/stdarch/coresimd/Cargo.toml b/library/stdarch/coresimd/Cargo.toml index 52415468b925..644612802faa 100644 --- a/library/stdarch/coresimd/Cargo.toml +++ b/library/stdarch/coresimd/Cargo.toml @@ -21,6 +21,7 @@ maintenance = { status = "experimental" } [dev-dependencies] cupid = "0.5.0" stdsimd-test = { version = "0.*", path = "../stdsimd-test" } +stdsimd = { version = "0.0.3", path = ".." } [features] # Internal-usage only: denies all warnings. diff --git a/library/stdarch/coresimd/src/lib.rs b/library/stdarch/coresimd/src/lib.rs index f0ce9e8178e9..d5c4d92746f1 100644 --- a/library/stdarch/coresimd/src/lib.rs +++ b/library/stdarch/coresimd/src/lib.rs @@ -1,26 +1,4 @@ -//! SIMD support -//! -//! This crate provides the fundamentals of supporting SIMD in Rust. This crate -//! should compile on all platforms and provide `simd` and `vendor` modules at -//! the top-level. The `simd` module contains *portable vector types* which -//! should work across all platforms and be implemented in the most efficient -//! manner possible for the platform at hand. The `vendor` module contains -//! vendor intrinsics that operate over these SIMD types, typically -//! corresponding to a particular CPU instruction -//! -//! ```rust -//! extern crate coresimd as stdsimd; -//! use stdsimd::simd::u32x4; -//! -//! fn main() { -//! let a = u32x4::new(1, 2, 3, 4); -//! let b = u32x4::splat(10); -//! assert_eq!(a + b, u32x4::new(11, 12, 13, 14)); -//! } -//! ``` -//! -//! > **Note**: This crate is *nightly only* at the moment, and requires a -//! > nightly rust toolchain to compile. +//! SIMD and vendor intrinsics support library. //! //! This documentation is only for one particular architecture, you can find //! others at: @@ -29,91 +7,6 @@ //! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/) //! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/) //! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/) -//! -//! ## Portability -//! -//! The `simd` module and its types should be portable to all platforms. The -//! runtime characteristics of these types may vary per platform and per CPU -//! feature enabled, but they should always have the most optimized -//! implementation for the target at hand. -//! -//! The `vendor` module provides no portability guarantees. The `vendor` module -//! is per CPU architecture currently and provides intrinsics corresponding to -//! functions for that particular CPU architecture. Note that the functions -//! provided in this module are intended to correspond to CPU instructions and -//! have no runtime support for whether you CPU actually supports the -//! instruction. -//! -//! CPU target feature detection is done via the `cfg_feature_enabled!` macro -//! at runtime. This macro will detect at runtime whether the specified feature -//! is available or not, returning true or false depending on the current CPU. -//! -//! ``` -//! #![feature(cfg_target_feature)] -//! -//! #[macro_use] -//! extern crate coresimd as stdsimd; -//! -//! fn main() { -//! if cfg_feature_enabled!("avx2") { -//! println!("avx2 intrinsics will work"); -//! } else { -//! println!("avx2 intrinsics will not work"); -//! // undefined behavior: may generate a `SIGILL`. -//! } -//! } -//! ``` -//! -//! After verifying that a specified feature is available, use `target_feature` -//! to enable a given feature and use the desired intrinsic. -//! -//! ```ignore -//! # #![feature(cfg_target_feature)] -//! # #![feature(target_feature)] -//! # #[macro_use] -//! # extern crate coresimd as stdsimd; -//! # fn main() { -//! # if cfg_feature_enabled!("avx2") { -//! // avx2 specific code may be used in this function -//! #[target_feature = "+avx2"] -//! fn and_256() { -//! // avx2 feature specific intrinsics will work here! -//! use stdsimd::vendor::{__m256i, _mm256_and_si256}; -//! -//! let a = __m256i::splat(5); -//! let b = __m256i::splat(3); -//! -//! let got = unsafe { _mm256_and_si256(a, b) }; -//! -//! assert_eq!(got, __m256i::splat(1)); -//! } -//! # and_256(); -//! # } -//! # } -//! ``` -//! -//! # Status -//! -//! This crate is intended for eventual inclusion into the standard library, -//! but some work and experimentation is needed to get there! First and -//! foremost you can help out by kicking the tires on this crate and seeing if -//! it works for your use case! Next up you can help us fill out the [vendor -//! intrinsics][vendor] to ensure that we've got all the SIMD support -//! necessary. -//! -//! The language support and status of SIMD is also still a little up in the -//! air right now, you may be interested in a few issues along these lines: -//! -//! * [Overal tracking issue for SIMD support][simd_tracking_issue] -//! * [`cfg_target_feature` tracking issue][cfg_target_feature_issue] -//! * [SIMD types currently not sound][simd_soundness_bug] -//! * [`#[target_feature]` improvements][target_feature_impr] -//! -//! [vendor]: https://github.com/rust-lang-nursery/stdsimd/issues/40 -//! [simd_tracking_issue]: https://github.com/rust-lang/rust/issues/27731 -//! [cfg_target_feature_issue]: https://github.com/rust-lang/rust/issues/29717 -//! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367 -//! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839 #![cfg_attr(feature = "strict", deny(warnings))] #![allow(dead_code)] diff --git a/library/stdarch/coresimd/src/x86/i586/avx2.rs b/library/stdarch/coresimd/src/x86/i586/avx2.rs index 81f836f15466..26956477ce93 100644 --- a/library/stdarch/coresimd/src/x86/i586/avx2.rs +++ b/library/stdarch/coresimd/src/x86/i586/avx2.rs @@ -1774,7 +1774,7 @@ pub unsafe fn _mm256_shuffle_epi8(a: u8x32, b: u8x32) -> u8x32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2318,7 +2318,7 @@ pub unsafe fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2367,7 +2367,7 @@ pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2415,7 +2415,7 @@ pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2459,7 +2459,7 @@ pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2503,7 +2503,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: i16x16, b: i16x16) -> i16x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2542,7 +2542,7 @@ pub unsafe fn _mm256_unpackhi_epi32(a: i32x8, b: i32x8) -> i32x8 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2581,7 +2581,7 @@ pub unsafe fn _mm256_unpacklo_epi32(a: i32x8, b: i32x8) -> i32x8 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { @@ -2620,7 +2620,7 @@ pub unsafe fn _mm256_unpackhi_epi64(a: i64x4, b: i64x4) -> i64x4 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("avx2") { diff --git a/library/stdarch/coresimd/src/x86/i586/sse.rs b/library/stdarch/coresimd/src/x86/i586/sse.rs index f5533f5f7883..5ed8005b30a0 100644 --- a/library/stdarch/coresimd/src/x86/i586/sse.rs +++ b/library/stdarch/coresimd/src/x86/i586/sse.rs @@ -884,7 +884,7 @@ pub unsafe fn _mm_movemask_ps(a: f32x4) -> i32 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # // The real main function /// # fn main() { @@ -936,7 +936,7 @@ pub unsafe fn _mm_loadh_pi(a: f32x4, p: *const f32) -> f32x4 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # // The real main function /// # fn main() { diff --git a/library/stdarch/coresimd/src/x86/i586/sse42.rs b/library/stdarch/coresimd/src/x86/i586/sse42.rs index ce3bcdbaaa9c..e3bc7b77574f 100644 --- a/library/stdarch/coresimd/src/x86/i586/sse42.rs +++ b/library/stdarch/coresimd/src/x86/i586/sse42.rs @@ -96,7 +96,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { @@ -139,7 +139,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { @@ -180,7 +180,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { @@ -219,7 +219,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 { /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { @@ -392,7 +392,7 @@ pub unsafe fn _mm_cmpestrm( /// # #![feature(cfg_target_feature)] /// # #![feature(target_feature)] /// # -/// # #[macro_use] extern crate coresimd as stdsimd; +/// # #[macro_use] extern crate stdsimd; /// # /// # fn main() { /// # if cfg_feature_enabled!("sse4.2") { diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs index 37fe7b6f59d4..fb89c452cb16 100644 --- a/library/stdarch/src/lib.rs +++ b/library/stdarch/src/lib.rs @@ -1,26 +1,4 @@ -//! SIMD support -//! -//! This crate provides the fundamentals of supporting SIMD in Rust. This crate -//! should compile on all platforms and provide `simd` and `vendor` modules at -//! the top-level. The `simd` module contains *portable vector types* which -//! should work across all platforms and be implemented in the most efficient -//! manner possible for the platform at hand. The `vendor` module contains -//! vendor intrinsics that operate over these SIMD types, typically -//! corresponding to a particular CPU instruction -//! -//! ```rust -//! extern crate stdsimd; -//! use stdsimd::simd::u32x4; -//! -//! fn main() { -//! let a = u32x4::new(1, 2, 3, 4); -//! let b = u32x4::splat(10); -//! assert_eq!(a + b, u32x4::new(11, 12, 13, 14)); -//! } -//! ``` -//! -//! > **Note**: This crate is *nightly only* at the moment, and requires a -//! > nightly rust toolchain to compile. +//! SIMD and vendor intrinsics support library. //! //! This documentation is only for one particular architecture, you can find //! others at: @@ -30,66 +8,96 @@ //! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/) //! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/) //! -//! ## Portability +//! # Overview //! -//! The `simd` module and its types should be portable to all platforms. The -//! runtime characteristics of these types may vary per platform and per CPU -//! feature enabled, but they should always have the most optimized -//! implementation for the target at hand. +//! The `simd` module exposes *portable vector types*. These types work on all +//! platforms, but their run-time performance may vary depending on hardware +//! support. //! -//! The `vendor` module provides no portability guarantees. The `vendor` module -//! is per CPU architecture currently and provides intrinsics corresponding to -//! functions for that particular CPU architecture. Note that the functions -//! provided in this module are intended to correspond to CPU instructions and -//! have no runtime support for whether you CPU actually supports the -//! instruction. +//! The `vendor` module exposes vendor-specific intrinsics that typically +//! correspond to a single machine instruction. In general, these intrinsics are +//! not portable: their availability is architecture-dependent, and not all +//! machines of that architecture might provide the intrinsic. //! -//! CPU target feature detection is done via the `cfg_feature_enabled!` macro -//! at runtime. This macro will detect at runtime whether the specified feature -//! is available or not, returning true or false depending on the current CPU. +//! Two macros make it possible to write portable code: //! -//! ``` -//! #![feature(cfg_target_feature)] +//! * `cfg!(target_feature = "feature")`: returns `true` if the `feature` is +//! enabled in all CPUs that the binary will run on (at compile-time) +//! * `cfg_feature_enabled!("feature")`: returns `true` if the `feature` is +//! enabled in the CPU in which the binary is currently running on (at run-time, +//! unless the result is known at compile time) +//! +//! # Example +//! +//! ```rust +//! #![feature(cfg_target_feature, target_feature)] //! //! #[macro_use] //! extern crate stdsimd; +//! use stdsimd::vendor; +//! use stdsimd::simd::i32x4; //! //! fn main() { -//! if cfg_feature_enabled!("avx2") { -//! println!("avx2 intrinsics will work"); -//! } else { -//! println!("avx2 intrinsics will not work"); -//! // undefined behavior: may generate a `SIGILL`. +//! let a = i32x4::new(1, 2, 3, 4); +//! let b = i32x4::splat(10); +//! assert_eq!(b, i32x4::new(10, 10, 10, 10)); +//! let c = a + b; +//! assert_eq!(c, i32x4::new(11, 12, 13, 14)); +//! assert_eq!(sum_portable(b), 40); +//! assert_eq!(sum_ct(b), 40); +//! assert_eq!(sum_rt(b), 40); +//! } +//! +//! // Sums the elements of the vector. +//! fn sum_portable(x: i32x4) -> i32 { +//! let mut r = 0; +//! for i in 0..4 { +//! r += x.extract(i); +//! } +//! r +//! } +//! +//! // Sums the elements of the vector using SSE2 instructions. +//! // This function is only safe to call if the CPU where the +//! // binary runs supports SSE2. +//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +//! #[target_feature = "+sse2"] +//! unsafe fn sum_sse2(x: i32x4) -> i32 { +//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x.into(), 8).into()); +//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x.into(), 4).into()); +//! vendor::_mm_cvtsi128_si32(x) +//! } +//! +//! // Uses the SSE2 version if SSE2 is enabled for all target +//! // CPUs at compile-time (does not perform any run-time +//! // feature detection). +//! fn sum_ct(x: i32x4) -> i32 { +//! #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), +//! target_feature = "sse2"))] +//! { +//! // This function is only available for x86/x86_64 targets, +//! // and is only safe to call it if the target supports SSE2 +//! unsafe { sum_sse2(x) } +//! } +//! #[cfg(not(all(any(target_arch = "x86_64", target_arch = "x86"), +//! target_feature = "sse2")))] +//! { +//! sum_portable(x) //! } //! } -//! ``` //! -//! After verifying that a specified feature is available, use `target_feature` -//! to enable a given feature and use the desired intrinsic. -//! -//! ```ignore -//! # #![feature(cfg_target_feature)] -//! # #![feature(target_feature)] -//! # #[macro_use] -//! # extern crate stdsimd; -//! # fn main() { -//! # if cfg_feature_enabled!("avx2") { -//! // avx2 specific code may be used in this function -//! #[target_feature = "+avx2"] -//! fn and_256() { -//! // avx2 feature specific intrinsics will work here! -//! use stdsimd::vendor::{__m256i, _mm256_and_si256}; -//! -//! let a = __m256i::splat(5); -//! let b = __m256i::splat(3); -//! -//! let got = unsafe { _mm256_and_si256(a, b) }; -//! -//! assert_eq!(got, __m256i::splat(1)); +//! // Detects SSE2 at run-time, and uses a SIMD intrinsic if enabled. +//! fn sum_rt(x: i32x4) -> i32 { +//! #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +//! { +//! // If SSE2 is not enabled at compile-time, this +//! // detects whether SSE2 is available at run-time: +//! if cfg_feature_enabled!("sse2") { +//! return unsafe { sum_sse2(x) }; +//! } +//! } +//! sum_portable(x) //! } -//! # and_256(); -//! # } -//! # } //! ``` //! //! # Status