From 69d2ad85f3e361a7c61cf4aa8697c39bf70b6afc Mon Sep 17 00:00:00 2001
From: gnzlbg <gnzlbg@users.noreply.github.com>
Date: Fri, 27 Oct 2017 17:55:29 +0200
Subject: [PATCH] [ci] check formatting (#64)

* [ci] check formatting

* [rustfmt] reformat the whole library
---
 library/stdarch/.travis.yml                   |  14 +-
 library/stdarch/examples/nbody.rs             | 119 ++--
 library/stdarch/examples/play.rs              |   8 +-
 library/stdarch/rustfmt.toml                  |   5 +
 library/stdarch/src/arm/mod.rs                |   4 +-
 library/stdarch/src/arm/v6.rs                 |  19 +-
 library/stdarch/src/arm/v7.rs                 |  14 +-
 library/stdarch/src/arm/v7_neon.rs            |  88 +--
 library/stdarch/src/arm/v8.rs                 |  37 +-
 library/stdarch/src/lib.rs                    |  43 +-
 library/stdarch/src/macros.rs                 |  15 +-
 library/stdarch/src/v128.rs                   |  18 +-
 library/stdarch/src/v256.rs                   |  18 +-
 library/stdarch/src/v512.rs                   |  19 +-
 library/stdarch/src/v64.rs                    |   4 +-
 library/stdarch/src/x86/abm.rs                |  32 +-
 library/stdarch/src/x86/avx.rs                | 494 ++++++++------
 library/stdarch/src/x86/avx2.rs               | 538 ++++++++++------
 library/stdarch/src/x86/bmi.rs                |  25 +-
 library/stdarch/src/x86/bmi2.rs               |  54 +-
 library/stdarch/src/x86/macros.rs             |   2 -
 library/stdarch/src/x86/mod.rs                |   2 +-
 library/stdarch/src/x86/runtime.rs            | 190 ++++--
 library/stdarch/src/x86/sse.rs                | 603 +++++++++++-------
 library/stdarch/src/x86/sse2.rs               | 482 ++++++++------
 library/stdarch/src/x86/sse3.rs               |   9 +-
 library/stdarch/src/x86/sse41.rs              | 114 ++--
 library/stdarch/src/x86/sse42.rs              | 202 +++---
 library/stdarch/src/x86/ssse3.rs              |  98 +--
 library/stdarch/src/x86/tbm.rs                | 159 +++--
 .../stdsimd-test/assert-instr-macro/build.rs  |   5 +-
 .../assert-instr-macro/src/lib.rs             |  55 +-
 .../stdsimd-test/simd-test-macro/src/lib.rs   |  22 +-
 library/stdarch/stdsimd-test/src/lib.rs       |  99 ++-
 library/stdarch/tests/cpu-detection.rs        |   2 +-
 35 files changed, 2207 insertions(+), 1405 deletions(-)
 create mode 100644 library/stdarch/rustfmt.toml

diff --git a/library/stdarch/.travis.yml b/library/stdarch/.travis.yml
index 3c2dd7b2d866..ed8cbdae308d 100644
--- a/library/stdarch/.travis.yml
+++ b/library/stdarch/.travis.yml
@@ -17,9 +17,21 @@ matrix:
       script: ci/run.sh
     - install: true
       script: ci/dox.sh
+    - env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1
+      script: |
+        cargo install rustfmt-nightly
+        cargo fmt -- --write-mode=diff
+        cd stdsimd
+        cargo fmt -- --write-mode=diff
+        cd assert-instr-macro
+        cargo fmt -- --write-mode=diff
+        cd ../simd-test-macro
+        cargo fmt -- --write-mode=diff
+  allow_failures:
+    - env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1
 
 install:
-  - if [ "$NO_ADD" = "" ]; then rustup target add $TARGET; fi
+  - if [ "$NO_ADD" == "" ]; then rustup target add $TARGET; fi
 
 script:
   - cargo generate-lockfile
diff --git a/library/stdarch/examples/nbody.rs b/library/stdarch/examples/nbody.rs
index 3f6e7ccffe9c..a9baa74ff76c 100644
--- a/library/stdarch/examples/nbody.rs
+++ b/library/stdarch/examples/nbody.rs
@@ -26,7 +26,8 @@ impl Frsqrt for f64x2 {
 
             let u = unsafe {
                 vendor::_mm_rsqrt_ps(
-                    f32x4::new(t.extract(0), t.extract(1), 0., 0.)).as_f64x4()
+                    f32x4::new(t.extract(0), t.extract(1), 0., 0.),
+                ).as_f64x4()
             };
             f64x2::new(u.extract(0), u.extract(1))
         }
@@ -36,11 +37,12 @@ impl Frsqrt for f64x2 {
             use self::stdsimd::vendor;
             unsafe { vendor::vrsqrte_f32(self.as_f32x2()).as_f64x2() }
         }
-        #[cfg(not(any(all(any(target_arch = "x86", target_arch = "x86_64"),
+        #[cfg(not(any(all(any(target_arch = "x86",
+                              target_arch = "x86_64"),
                           target_feature = "sse"),
-                      all(any(target_arch = "arm", target_arch = "aarch64"),
-                          target_feature = "neon")
-        )))]
+                      all(any(target_arch = "arm",
+                              target_arch = "aarch64"),
+                          target_feature = "neon"))))]
         {
             self.replace(0, 1. / self.extract(0).sqrt());
             self.replace(1, 1. / self.extract(1).sqrt());
@@ -57,9 +59,9 @@ struct Body {
 }
 
 impl Body {
-    fn new(x0: f64, x1: f64, x2: f64,
-           v0: f64, v1: f64, v2: f64,
-           mass: f64) -> Body {
+    fn new(
+        x0: f64, x1: f64, x2: f64, v0: f64, v1: f64, v2: f64, mass: f64
+    ) -> Body {
         Body {
             x: [x0, x1, x2],
             _fill: 0.0,
@@ -91,7 +93,7 @@ fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
 
     let mut i = 0;
     for j in 0..N_BODIES {
-        for k in j+1..N_BODIES {
+        for k in j + 1..N_BODIES {
             for m in 0..3 {
                 r[i][m] = bodies[j].x[m] - bodies[k].x[m];
             }
@@ -102,14 +104,15 @@ fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
     i = 0;
     while i < N {
         for m in 0..3 {
-            dx[m] = f64x2::new(r[i][m], r[i+1][m]);
+            dx[m] = f64x2::new(r[i][m], r[i + 1][m]);
         }
 
         dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
         distance = dsquared.frsqrt();
         for _ in 0..2 {
-            distance = distance * f64x2::splat(1.5) -
-                ((f64x2::splat(0.5) * dsquared) * distance) * (distance * distance)
+            distance = distance * f64x2::splat(1.5)
+                - ((f64x2::splat(0.5) * dsquared) * distance)
+                    * (distance * distance)
         }
         dmag = f64x2::splat(dt) / dsquared * distance;
         dmag.store(&mut mag, i);
@@ -119,7 +122,7 @@ fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
 
     i = 0;
     for j in 0..N_BODIES {
-        for k in j+1..N_BODIES {
+        for k in j + 1..N_BODIES {
             for m in 0..3 {
                 bodies[j].v[m] -= r[i][m] * bodies[k].mass * mag[i];
                 bodies[k].v[m] += r[i][m] * bodies[j].mass * mag[i];
@@ -138,15 +141,19 @@ fn energy(bodies: &[Body; N_BODIES]) -> f64 {
     let mut e = 0.0;
     for i in 0..N_BODIES {
         let bi = &bodies[i];
-        e += bi.mass * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2]) / 2.0;
-        for j in i+1..N_BODIES {
+        e += bi.mass
+            * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2])
+            / 2.0;
+        for j in i + 1..N_BODIES {
             let bj = &bodies[j];
             let mut dx = [0.0; 3];
             for k in 0..3 {
                 dx[k] = bi.x[k] - bj.x[k];
             }
             let mut distance = 0.0;
-            for &d in &dx { distance += d * d }
+            for &d in &dx {
+                distance += d * d
+            }
             e -= bi.mass * bj.mass / distance.sqrt()
         }
     }
@@ -156,48 +163,54 @@ fn energy(bodies: &[Body; N_BODIES]) -> f64 {
 fn main() {
     let mut bodies: [Body; N_BODIES] = [
         /* sun */
-        Body::new(0.0, 0.0, 0.0,
-                  0.0, 0.0, 0.0,
-                  SOLAR_MASS),
+        Body::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SOLAR_MASS),
         /* jupiter */
-        Body::new(4.84143144246472090e+00,
-                  -1.16032004402742839e+00,
-                  -1.03622044471123109e-01 ,
-                  1.66007664274403694e-03 * DAYS_PER_YEAR,
-                  7.69901118419740425e-03 * DAYS_PER_YEAR,
-                  -6.90460016972063023e-05 * DAYS_PER_YEAR ,
-                  9.54791938424326609e-04 * SOLAR_MASS
-                  ),
+        Body::new(
+            4.84143144246472090e+00,
+            -1.16032004402742839e+00,
+            -1.03622044471123109e-01,
+            1.66007664274403694e-03 * DAYS_PER_YEAR,
+            7.69901118419740425e-03 * DAYS_PER_YEAR,
+            -6.90460016972063023e-05 * DAYS_PER_YEAR,
+            9.54791938424326609e-04 * SOLAR_MASS,
+        ),
         /* saturn */
-        Body::new(8.34336671824457987e+00,
-                  4.12479856412430479e+00,
-                  -4.03523417114321381e-01 ,
-                  -2.76742510726862411e-03 * DAYS_PER_YEAR,
-                  4.99852801234917238e-03 * DAYS_PER_YEAR,
-                  2.30417297573763929e-05 * DAYS_PER_YEAR ,
-                  2.85885980666130812e-04 * SOLAR_MASS
-                  ),
+        Body::new(
+            8.34336671824457987e+00,
+            4.12479856412430479e+00,
+            -4.03523417114321381e-01,
+            -2.76742510726862411e-03 * DAYS_PER_YEAR,
+            4.99852801234917238e-03 * DAYS_PER_YEAR,
+            2.30417297573763929e-05 * DAYS_PER_YEAR,
+            2.85885980666130812e-04 * SOLAR_MASS,
+        ),
         /* uranus */
-        Body::new(1.28943695621391310e+01,
-                  -1.51111514016986312e+01,
-                  -2.23307578892655734e-01 ,
-                  2.96460137564761618e-03 * DAYS_PER_YEAR,
-                  2.37847173959480950e-03 * DAYS_PER_YEAR,
-                  -2.96589568540237556e-05 * DAYS_PER_YEAR ,
-                  4.36624404335156298e-05 * SOLAR_MASS
-                  ),
+        Body::new(
+            1.28943695621391310e+01,
+            -1.51111514016986312e+01,
+            -2.23307578892655734e-01,
+            2.96460137564761618e-03 * DAYS_PER_YEAR,
+            2.37847173959480950e-03 * DAYS_PER_YEAR,
+            -2.96589568540237556e-05 * DAYS_PER_YEAR,
+            4.36624404335156298e-05 * SOLAR_MASS,
+        ),
         /* neptune */
-        Body::new(1.53796971148509165e+01,
-                  -2.59193146099879641e+01,
-                  1.79258772950371181e-01 ,
-                  2.68067772490389322e-03 * DAYS_PER_YEAR,
-                  1.62824170038242295e-03 * DAYS_PER_YEAR,
-                  -9.51592254519715870e-05 * DAYS_PER_YEAR ,
-                  5.15138902046611451e-05 * SOLAR_MASS
-                  )
-            ];
+        Body::new(
+            1.53796971148509165e+01,
+            -2.59193146099879641e+01,
+            1.79258772950371181e-01,
+            2.68067772490389322e-03 * DAYS_PER_YEAR,
+            1.62824170038242295e-03 * DAYS_PER_YEAR,
+            -9.51592254519715870e-05 * DAYS_PER_YEAR,
+            5.15138902046611451e-05 * SOLAR_MASS,
+        ),
+    ];
 
-    let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap();
+    let n: usize = std::env::args()
+        .nth(1)
+        .expect("need one arg")
+        .parse()
+        .unwrap();
 
     offset_momentum(&mut bodies);
     println!("{:.9}", energy(&bodies));
diff --git a/library/stdarch/examples/play.rs b/library/stdarch/examples/play.rs
index 5cbbdd44d52a..26ce5dd8124c 100644
--- a/library/stdarch/examples/play.rs
+++ b/library/stdarch/examples/play.rs
@@ -27,8 +27,12 @@ mod example {
 
         unsafe {
             vendor::_mm_cmpestri(
-                vneedle, needle_len as i32, vhaystack, hay_len as i32,
-                vendor::_SIDD_CMP_EQUAL_ORDERED) as usize
+                vneedle,
+                needle_len as i32,
+                vhaystack,
+                hay_len as i32,
+                vendor::_SIDD_CMP_EQUAL_ORDERED,
+            ) as usize
         }
     }
 
diff --git a/library/stdarch/rustfmt.toml b/library/stdarch/rustfmt.toml
new file mode 100644
index 000000000000..91dd4706fd4d
--- /dev/null
+++ b/library/stdarch/rustfmt.toml
@@ -0,0 +1,5 @@
+max_width = 79
+fn_call_width = 79
+wrap_comments = true
+error_on_line_overflow = false
+fn_args_density = "Compressed"
\ No newline at end of file
diff --git a/library/stdarch/src/arm/mod.rs b/library/stdarch/src/arm/mod.rs
index 08daf23bbdb3..8266842d8254 100644
--- a/library/stdarch/src/arm/mod.rs
+++ b/library/stdarch/src/arm/mod.rs
@@ -3,7 +3,9 @@
 //! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
 //! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
 //!
-//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
+//! [arm_ref]:
+//! http://infocenter.arm.com/help/topic/com.arm.doc.
+//! ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
 //! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
 
 pub use self::v6::*;
diff --git a/library/stdarch/src/arm/v6.rs b/library/stdarch/src/arm/v6.rs
index 63d492d9f187..33fdda67e940 100644
--- a/library/stdarch/src/arm/v6.rs
+++ b/library/stdarch/src/arm/v6.rs
@@ -1,7 +1,10 @@
 //! ARMv6 intrinsics.
 //!
-//! The reference is [ARMv6-M Architecture Reference
-//! Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.html).
+//! The reference is [ARMv6-M Architecture Reference Manual][armv6m].
+//!
+//! [armv6m]:
+//! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.
+//! html
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -27,16 +30,20 @@ mod tests {
     #[test]
     fn _rev_u16() {
         unsafe {
-            assert_eq!(v6::_rev_u16(0b0000_0000_1111_1111_u16), 0b1111_1111_0000_0000_u16);
+            assert_eq!(
+                v6::_rev_u16(0b0000_0000_1111_1111_u16),
+                0b1111_1111_0000_0000_u16
+            );
         }
     }
 
     #[test]
     fn _rev_u32() {
         unsafe {
-            assert_eq!(v6::_rev_u32(
-                0b0000_0000_1111_1111_0000_0000_1111_1111_u32
-            ), 0b1111_1111_0000_0000_1111_1111_0000_0000_u32);
+            assert_eq!(
+                v6::_rev_u32(0b0000_0000_1111_1111_0000_0000_1111_1111_u32),
+                0b1111_1111_0000_0000_1111_1111_0000_0000_u32
+            );
         }
     }
 }
diff --git a/library/stdarch/src/arm/v7.rs b/library/stdarch/src/arm/v7.rs
index f0143e581d48..b1c66647120e 100644
--- a/library/stdarch/src/arm/v7.rs
+++ b/library/stdarch/src/arm/v7.rs
@@ -1,7 +1,11 @@
 //! ARMv7 intrinsics.
 //!
 //! The reference is [ARMv7-M Architecture Reference Manual (Issue
-//! E.b)](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.b/index.html).
+//! E.b)][armv7m].
+//!
+//! [armv7m]:
+//! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.
+//! b/index.html
 
 pub use super::v6::*;
 
@@ -39,7 +43,7 @@ pub unsafe fn _rbit_u32(x: u32) -> u32 {
 
 #[allow(dead_code)]
 extern "C" {
-    #[link_name="llvm.bitreverse.i32"]
+    #[link_name = "llvm.bitreverse.i32"]
     fn rbit_u32(i: i32) -> i32;
 }
 
@@ -72,8 +76,10 @@ mod tests {
     #[test]
     fn _rbit_u32() {
         unsafe {
-            assert_eq!(v7::_rbit_u32(0b0000_1010u32),
-                       0b0101_0000_0000_0000_0000_0000_0000_0000u32);
+            assert_eq!(
+                v7::_rbit_u32(0b0000_1010u32),
+                0b0101_0000_0000_0000_0000_0000_0000_0000u32
+            );
         }
     }
 }
diff --git a/library/stdarch/src/arm/v7_neon.rs b/library/stdarch/src/arm/v7_neon.rs
index 7e82659e97c9..4c4c5f35e261 100644
--- a/library/stdarch/src/arm/v7_neon.rs
+++ b/library/stdarch/src/arm/v7_neon.rs
@@ -5,10 +5,8 @@ use stdsimd_test::assert_instr;
 
 use simd_llvm::simd_add;
 
-use v64::{i8x8, i16x4, i32x2,
-          u8x8, u16x4, u32x2, f32x2};
-use v128::{i8x16, i16x8, i32x4, i64x2,
-           u8x16, u16x8, u32x4, u64x2, f32x4};
+use v64::{f32x2, i16x4, i32x2, i8x8, u16x4, u32x2, u8x8};
+use v128::{f32x4, i16x8, i32x4, i64x2, i8x16, u16x8, u32x4, u64x2, u8x16};
 
 /// Vector add.
 #[inline(always)]
@@ -230,18 +228,9 @@ mod tests {
 
     #[test]
     fn vaddq_s8_() {
-        let a = i8x16::new(
-            1, 2, 3, 4, 5, 6, 7, 8,
-            1, 2, 3, 4, 5, 6, 7, 8,
-        );
-        let b = i8x16::new(
-            8, 7, 6, 5, 4, 3, 2, 1,
-            8, 7, 6, 5, 4, 3, 2, 1,
-        );
-        let e = i8x16::new(
-            9, 9, 9, 9, 9, 9, 9, 9,
-            9, 9, 9, 9, 9, 9, 9, 9,
-        );
+        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let b = i8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
+        let e = i8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
         let r = unsafe { vaddq_s8(a, b) };
         assert_eq!(r, e);
     }
@@ -293,18 +282,9 @@ mod tests {
 
     #[test]
     fn vaddq_u8_() {
-        let a = u8x16::new(
-            1, 2, 3, 4, 5, 6, 7, 8,
-            1, 2, 3, 4, 5, 6, 7, 8,
-        );
-        let b = u8x16::new(
-            8, 7, 6, 5, 4, 3, 2, 1,
-            8, 7, 6, 5, 4, 3, 2, 1,
-        );
-        let e = u8x16::new(
-            9, 9, 9, 9, 9, 9, 9, 9,
-            9, 9, 9, 9, 9, 9, 9, 9,
-        );
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let b = u8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
+        let e = u8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
         let r = unsafe { vaddq_u8(a, b) };
         assert_eq!(r, e);
     }
@@ -366,15 +346,9 @@ mod tests {
     #[test]
     fn vaddl_s8_() {
         let v = ::std::i8::MAX;
-        let a = i8x8::new(
-            v, v, v, v,
-            v, v, v, v,
-        );
+        let a = i8x8::new(v, v, v, v, v, v, v, v);
         let v = 2 * (v as i16);
-        let e = i16x8::new(
-            v, v, v, v,
-            v, v, v, v,
-        );
+        let e = i16x8::new(v, v, v, v, v, v, v, v);
         let r = unsafe { vaddl_s8(a, a) };
         assert_eq!(r, e);
     }
@@ -382,13 +356,9 @@ mod tests {
     #[test]
     fn vaddl_s16_() {
         let v = ::std::i16::MAX;
-        let a = i16x4::new(
-            v, v, v, v,
-        );
+        let a = i16x4::new(v, v, v, v);
         let v = 2 * (v as i32);
-        let e = i32x4::new(
-            v, v, v, v,
-        );
+        let e = i32x4::new(v, v, v, v);
         let r = unsafe { vaddl_s16(a, a) };
         assert_eq!(r, e);
     }
@@ -396,13 +366,9 @@ mod tests {
     #[test]
     fn vaddl_s32_() {
         let v = ::std::i32::MAX;
-        let a = i32x2::new(
-            v, v,
-        );
+        let a = i32x2::new(v, v);
         let v = 2 * (v as i64);
-        let e = i64x2::new(
-            v, v,
-        );
+        let e = i64x2::new(v, v);
         let r = unsafe { vaddl_s32(a, a) };
         assert_eq!(r, e);
     }
@@ -410,15 +376,9 @@ mod tests {
     #[test]
     fn vaddl_u8_() {
         let v = ::std::u8::MAX;
-        let a = u8x8::new(
-            v, v, v, v,
-            v, v, v, v,
-        );
+        let a = u8x8::new(v, v, v, v, v, v, v, v);
         let v = 2 * (v as u16);
-        let e = u16x8::new(
-            v, v, v, v,
-            v, v, v, v,
-        );
+        let e = u16x8::new(v, v, v, v, v, v, v, v);
         let r = unsafe { vaddl_u8(a, a) };
         assert_eq!(r, e);
     }
@@ -426,13 +386,9 @@ mod tests {
     #[test]
     fn vaddl_u16_() {
         let v = ::std::u16::MAX;
-        let a = u16x4::new(
-            v, v, v, v,
-        );
+        let a = u16x4::new(v, v, v, v);
         let v = 2 * (v as u32);
-        let e = u32x4::new(
-            v, v, v, v,
-        );
+        let e = u32x4::new(v, v, v, v);
         let r = unsafe { vaddl_u16(a, a) };
         assert_eq!(r, e);
     }
@@ -440,13 +396,9 @@ mod tests {
     #[test]
     fn vaddl_u32_() {
         let v = ::std::u32::MAX;
-        let a = u32x2::new(
-            v, v,
-        );
+        let a = u32x2::new(v, v);
         let v = 2 * (v as u64);
-        let e = u64x2::new(
-            v, v,
-        );
+        let e = u64x2::new(v, v);
         let r = unsafe { vaddl_u32(a, a) };
         assert_eq!(r, e);
     }
diff --git a/library/stdarch/src/arm/v8.rs b/library/stdarch/src/arm/v8.rs
index 53815e7a628d..cf623fd9fc36 100644
--- a/library/stdarch/src/arm/v8.rs
+++ b/library/stdarch/src/arm/v8.rs
@@ -1,6 +1,9 @@
 //! ARMv8 intrinsics.
 //!
-//! The reference is [ARMv8-A Reference Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.k_10775/index.html).
+//! The reference is [ARMv8-A Reference Manual][armv8].
+//!
+//! [armv8]: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.
+//! ddi0487a.k_10775/index.html
 
 pub use super::v7::*;
 
@@ -23,7 +26,7 @@ pub unsafe fn _clz_u64(x: u64) -> u64 {
 
 #[allow(dead_code)]
 extern "C" {
-    #[link_name="llvm.bitreverse.i64"]
+    #[link_name = "llvm.bitreverse.i64"]
     fn rbit_u64(i: i64) -> i64;
 }
 
@@ -61,9 +64,10 @@ mod tests {
     #[test]
     fn _rev_u64() {
         unsafe {
-            assert_eq!(v8::_rev_u64(
-                0b0000_0000_1111_1111_0000_0000_1111_1111_u64
-            ), 0b1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64);
+            assert_eq!(
+                v8::_rev_u64(0b0000_0000_1111_1111_0000_0000_1111_1111_u64),
+                0b1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
+            );
         }
     }
 
@@ -77,27 +81,32 @@ mod tests {
     #[test]
     fn _rbit_u64() {
         unsafe {
-            assert_eq!(v8::_rbit_u64(
-                0b0000_0000_1111_1101_0000_0000_1111_1111_u64
-            ), 0b1111_1111_0000_0000_1011_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64);
+            assert_eq!(
+                v8::_rbit_u64(0b0000_0000_1111_1101_0000_0000_1111_1111_u64),
+                0b1111_1111_0000_0000_1011_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
+            );
         }
     }
 
     #[test]
     fn _cls_u32() {
         unsafe {
-            assert_eq!(v8::_cls_u32(
-                0b1111_1111_1111_1111_0000_0000_1111_1111_u32
-            ), 15_u32);
+            assert_eq!(
+                v8::_cls_u32(0b1111_1111_1111_1111_0000_0000_1111_1111_u32),
+                15_u32
+            );
         }
     }
 
     #[test]
     fn _cls_u64() {
         unsafe {
-            assert_eq!(v8::_cls_u64(
-                0b1111_1111_1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_u64
-            ), 15_u64);
+            assert_eq!(
+                v8::_cls_u64(
+                    0b1111_1111_1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_u64
+                ),
+                15_u64
+            );
         }
     }
 }
diff --git a/library/stdarch/src/lib.rs b/library/stdarch/src/lib.rs
index e30be8c53d53..849acba395ad 100644
--- a/library/stdarch/src/lib.rs
+++ b/library/stdarch/src/lib.rs
@@ -44,9 +44,9 @@
 //! have no runtime support for whether you CPU actually supports the
 //! instruction.
 //!
-//! CPU target feature detection is done via the `cfg_feature_enabled!` macro at
-//! runtime. This macro will detect at runtime whether the specified feature is
-//! available or not, returning true or false depending on the current CPU.
+//! CPU target feature detection is done via the `cfg_feature_enabled!` macro
+//! at runtime. This macro will detect at runtime whether the specified feature
+//! is available or not, returning true or false depending on the current CPU.
 //!
 //! ```
 //! #![feature(cfg_target_feature)]
@@ -58,7 +58,8 @@
 //!     if cfg_feature_enabled!("avx2") {
 //!         println!("avx2 intrinsics will work");
 //!     } else {
-//!         println!("avx2 intrinsics will not work, they may generate SIGILL");
+//!         println!("avx2 intrinsics will not work");
+//!         // undefined behavior: may generate a `SIGILL`.
 //!     }
 //! }
 //! ```
@@ -93,29 +94,33 @@
 //!
 //! # Status
 //!
-//! This crate is intended for eventual inclusion into the standard library, but
-//! some work and experimentation is needed to get there! First and foremost you
-//! can help out by kicking the tires on this crate and seeing if it works for
-//! your use case! Next up you can help us fill out the [vendor
-//! intrinsics][vendor] to ensure that we've got all the SIMD support necessary.
+//! This crate is intended for eventual inclusion into the standard library,
+//! but some work and experimentation is needed to get there! First and
+//! foremost you can help out by kicking the tires on this crate and seeing if
+//! it works for your use case! Next up you can help us fill out the [vendor
+//! intrinsics][vendor] to ensure that we've got all the SIMD support
+//! necessary.
 //!
-//! The language support and status of SIMD is also still a little up in the air
-//! right now, you may be interested in a few issues along these lines:
+//! The language support and status of SIMD is also still a little up in the
+//! air right now, you may be interested in a few issues along these lines:
 //!
-//! * [Overal tracking issue for SIMD support](https://github.com/rust-lang/rust/issues/27731)
-//! * [`cfg_target_feature` tracking issue](https://github.com/rust-lang/rust/issues/29717)
-//! * [SIMD types currently not sound](https://github.com/rust-lang/rust/issues/44367)
-//! * [`#[target_feature]` improvements](https://github.com/rust-lang/rust/issues/44839)
+//! * [Overal tracking issue for SIMD support]
+//!   (https://github.com/rust-lang/rust/issues/27731)
+//! * [`cfg_target_feature` tracking issue]
+//!   (https://github.com/rust-lang/rust/issues/29717)
+//! * [SIMD types currently not sound]
+//!   (https://github.com/rust-lang/rust/issues/44367)
+//! * [`#[target_feature]` improvements]
+//!   (https://github.com/rust-lang/rust/issues/44839)
 //!
 //! [vendor]: https://github.com/rust-lang-nursery/stdsimd/issues/40
 
 #![cfg_attr(feature = "strict", deny(warnings))]
 #![allow(dead_code)]
 #![allow(unused_features)]
-#![feature(
-    const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi,
-    target_feature, cfg_target_feature, i128_type, asm, const_atomic_usize_new
-)]
+#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
+           simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
+           const_atomic_usize_new, stmt_expr_attributes)]
 #![cfg_attr(test, feature(proc_macro, test))]
 
 #[cfg(test)]
diff --git a/library/stdarch/src/macros.rs b/library/stdarch/src/macros.rs
index 43b6a9b2f854..ab287a80000b 100644
--- a/library/stdarch/src/macros.rs
+++ b/library/stdarch/src/macros.rs
@@ -240,9 +240,11 @@ macro_rules! define_integer_ops {
                 i8, i16, i32, i64, isize);
 
             impl ::std::fmt::LowerHex for $ty {
-                fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
+                fn fmt(&self, f: &mut ::std::fmt::Formatter)
+                       -> ::std::fmt::Result {
                     write!(f, "{}(", stringify!($ty))?;
-                    let n = ::std::mem::size_of_val(self) / ::std::mem::size_of::<$elem>();
+                    let n = ::std::mem::size_of_val(self)
+                        / ::std::mem::size_of::<$elem>();
                     for i in 0..n {
                         if i > 0 {
                             write!(f, ", ")?;
@@ -292,8 +294,7 @@ macro_rules! cfg_feature_enabled {
 /// On ARM features are only detected at compile-time using
 /// cfg(target_feature), so if this macro is executed the
 /// feature is not supported.
-#[cfg(any(target_arch = "arm",
-          target_arch = "aarch64"))]
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
 #[macro_export]
 #[doc(hidden)]
 macro_rules! __unstable_detect_feature {
@@ -302,10 +303,8 @@ macro_rules! __unstable_detect_feature {
 }
 
 /// In all unsupported architectures using the macro is an error
-#[cfg(not(any(target_arch = "x86",
-              target_arch = "x86_64",
-              target_arch = "arm",
-              target_arch = "aarch64")))]
+#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
+              target_arch = "arm", target_arch = "aarch64")))]
 #[macro_export]
 #[doc(hidden)]
 macro_rules! __unstable_detect_feature {
diff --git a/library/stdarch/src/v128.rs b/library/stdarch/src/v128.rs
index 922f983a59ad..f5c425bc745d 100644
--- a/library/stdarch/src/v128.rs
+++ b/library/stdarch/src/v128.rs
@@ -50,7 +50,17 @@ define_from!(u8x16, u64x2, i64x2, u32x4, i32x4, u16x8, i16x8, i8x16);
 define_from!(i8x16, u64x2, i64x2, u32x4, i32x4, u16x8, i16x8, u8x16);
 
 define_common_ops!(
-    f64x2, f32x4, u64x2, i64x2, u32x4, i32x4, u16x8, i16x8, u8x16, i8x16);
+    f64x2,
+    f32x4,
+    u64x2,
+    i64x2,
+    u32x4,
+    i32x4,
+    u16x8,
+    i16x8,
+    u8x16,
+    i8x16
+);
 define_float_ops!(f64x2, f32x4);
 define_integer_ops!(
     (u64x2, u64),
@@ -60,7 +70,8 @@ define_integer_ops!(
     (u16x8, u16),
     (i16x8, i16),
     (u8x16, u8),
-    (i8x16, i8));
+    (i8x16, i8)
+);
 define_casts!(
     (f64x2, f32x2, as_f32x2),
     (f64x2, u64x2, as_u64x2),
@@ -79,4 +90,5 @@ define_casts!(
     (u16x8, i16x8, as_i16x8),
     (i16x8, u16x8, as_u16x8),
     (u8x16, i8x16, as_i8x16),
-    (i8x16, u8x16, as_u8x16));
+    (i8x16, u8x16, as_u8x16)
+);
diff --git a/library/stdarch/src/v256.rs b/library/stdarch/src/v256.rs
index a5e163e45319..33d2584f7127 100644
--- a/library/stdarch/src/v256.rs
+++ b/library/stdarch/src/v256.rs
@@ -74,7 +74,17 @@ define_from!(u8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, i8x32);
 define_from!(i8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32);
 
 define_common_ops!(
-    f64x4, f32x8, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32, i8x32);
+    f64x4,
+    f32x8,
+    u64x4,
+    i64x4,
+    u32x8,
+    i32x8,
+    u16x16,
+    i16x16,
+    u8x32,
+    i8x32
+);
 define_float_ops!(f64x4, f32x8);
 define_integer_ops!(
     (u64x4, u64),
@@ -84,7 +94,8 @@ define_integer_ops!(
     (u16x16, u16),
     (i16x16, i16),
     (u8x32, u8),
-    (i8x32, i8));
+    (i8x32, i8)
+);
 define_casts!(
     (f64x4, f32x4, as_f32x4),
     (f64x4, u64x4, as_u64x4),
@@ -102,4 +113,5 @@ define_casts!(
     (u16x16, i16x16, as_i16x16),
     (i16x16, u16x16, as_u16x16),
     (u8x32, i8x32, as_i8x32),
-    (i8x32, u8x32, as_u8x32));
+    (i8x32, u8x32, as_u8x32)
+);
diff --git a/library/stdarch/src/v512.rs b/library/stdarch/src/v512.rs
index 5b1afee639b9..4973a7001ed6 100644
--- a/library/stdarch/src/v512.rs
+++ b/library/stdarch/src/v512.rs
@@ -120,7 +120,17 @@ define_from!(u8x64, u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, i8x64);
 define_from!(i8x64, u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, u8x64);
 
 define_common_ops!(
-    f64x8, f32x16, u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, u8x64, i8x64);
+    f64x8,
+    f32x16,
+    u64x8,
+    i64x8,
+    u32x16,
+    i32x16,
+    u16x32,
+    i16x32,
+    u8x64,
+    i8x64
+);
 define_float_ops!(f64x8, f32x16);
 define_integer_ops!(
     (u64x8, u64),
@@ -130,7 +140,8 @@ define_integer_ops!(
     (u16x32, u16),
     (i16x32, i16),
     (u8x64, u8),
-    (i8x64, i8));
+    (i8x64, i8)
+);
 define_casts!(
     (f64x8, f32x8, as_f32x8),
     (f64x8, u64x8, as_u64x8),
@@ -148,5 +159,5 @@ define_casts!(
     (u16x32, i16x32, as_i16x32),
     (i16x32, u16x32, as_u16x32),
     (u8x64, i8x64, as_i8x64),
-    (i8x64, u8x64, as_u8x64));
-
+    (i8x64, u8x64, as_u8x64)
+);
diff --git a/library/stdarch/src/v64.rs b/library/stdarch/src/v64.rs
index a889f037a744..fe7f59c2fde1 100644
--- a/library/stdarch/src/v64.rs
+++ b/library/stdarch/src/v64.rs
@@ -42,7 +42,8 @@ define_integer_ops!(
     (u16x4, u16),
     (i16x4, i16),
     (u8x8, u8),
-    (i8x8, i8));
+    (i8x8, i8)
+);
 define_casts!(
     (f32x2, f64x2, as_f64x2),
     (f32x2, u32x2, as_u32x2),
@@ -61,5 +62,4 @@ define_casts!(
     (u8x8, u16x8, as_u16x8),
     (u16x4, u32x4, as_u32x4),
     (u32x2, u64x2, as_u64x2)
-
 );
diff --git a/library/stdarch/src/x86/abm.rs b/library/stdarch/src/x86/abm.rs
index 4b9b49e81595..9f2fa9811b1f 100644
--- a/library/stdarch/src/x86/abm.rs
+++ b/library/stdarch/src/x86/abm.rs
@@ -4,11 +4,19 @@
 //!
 //! The references are:
 //!
-//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
-//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+//! Instruction Set Reference, A-Z][intel64_ref].
+//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+//! System Instructions][amd64_ref].
 //!
-//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29)
-//! provides a quick overview of the instructions available.
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.
+//! 28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -19,7 +27,9 @@ use stdsimd_test::assert_instr;
 #[inline(always)]
 #[target_feature = "+lzcnt"]
 #[cfg_attr(test, assert_instr(lzcnt))]
-pub unsafe fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
+pub unsafe fn _lzcnt_u32(x: u32) -> u32 {
+    x.leading_zeros()
+}
 
 /// Counts the leading most significant zero bits.
 ///
@@ -27,19 +37,25 @@ pub unsafe fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
 #[inline(always)]
 #[target_feature = "+lzcnt"]
 #[cfg_attr(test, assert_instr(lzcnt))]
-pub unsafe fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 }
+pub unsafe fn _lzcnt_u64(x: u64) -> u64 {
+    x.leading_zeros() as u64
+}
 
 /// Counts the bits that are set.
 #[inline(always)]
 #[target_feature = "+popcnt"]
 #[cfg_attr(test, assert_instr(popcnt))]
-pub unsafe fn _popcnt32(x: u32) -> u32 { x.count_ones() }
+pub unsafe fn _popcnt32(x: u32) -> u32 {
+    x.count_ones()
+}
 
 /// Counts the bits that are set.
 #[inline(always)]
 #[target_feature = "+popcnt"]
 #[cfg_attr(test, assert_instr(popcnt))]
-pub unsafe fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 }
+pub unsafe fn _popcnt64(x: u64) -> u64 {
+    x.count_ones() as u64
+}
 
 #[cfg(test)]
 mod tests {
diff --git a/library/stdarch/src/x86/avx.rs b/library/stdarch/src/x86/avx.rs
index 7e3e0f9dcf60..858935208e4a 100644
--- a/library/stdarch/src/x86/avx.rs
+++ b/library/stdarch/src/x86/avx.rs
@@ -18,7 +18,8 @@ pub unsafe fn _mm256_add_pd(a: f64x4, b: f64x4) -> f64x4 {
     a + b
 }
 
-/// Add packed single-precision (32-bit) floating-point elements in `a` and `b`.
+/// Add packed single-precision (32-bit) floating-point elements in `a` and
+/// `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vaddps))]
@@ -26,7 +27,8 @@ pub unsafe fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 {
     a + b
 }
 
-/// Compute the bitwise AND of a packed double-precision (64-bit) floating-point elements
+/// Compute the bitwise AND of a packed double-precision (64-bit)
+/// floating-point elements
 /// in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -39,7 +41,8 @@ pub unsafe fn _mm256_and_pd(a: f64x4, b: f64x4) -> f64x4 {
     mem::transmute(a & b)
 }
 
-/// Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in `a` and `b`.
+/// Compute the bitwise AND of packed single-precision (32-bit) floating-point
+/// elements in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vandps))]
@@ -49,7 +52,8 @@ pub unsafe fn _mm256_and_ps(a: f32x8, b: f32x8) -> f32x8 {
     mem::transmute(a & b)
 }
 
-/// Compute the bitwise OR packed double-precision (64-bit) floating-point elements
+/// Compute the bitwise OR packed double-precision (64-bit) floating-point
+/// elements
 /// in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -62,7 +66,8 @@ pub unsafe fn _mm256_or_pd(a: f64x4, b: f64x4) -> f64x4 {
     mem::transmute(a | b)
 }
 
-/// Compute the bitwise OR packed single-precision (32-bit) floating-point elements in `a` and `b`.
+/// Compute the bitwise OR packed single-precision (32-bit) floating-point
+/// elements in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vorps))]
@@ -114,7 +119,8 @@ pub unsafe fn _mm256_shuffle_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
     }
 }
 
-/// Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in `a`
+/// Compute the bitwise NOT of packed double-precision (64-bit) floating-point
+/// elements in `a`
 /// and then AND with `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -126,7 +132,8 @@ pub unsafe fn _mm256_andnot_pd(a: f64x4, b: f64x4) -> f64x4 {
     mem::transmute((!a) & b)
 }
 
-/// Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in `a`
+/// Compute the bitwise NOT of packed single-precision (32-bit) floating-point
+/// elements in `a`
 /// and then AND with `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -146,8 +153,8 @@ pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 {
     maxpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
-/// and return packed maximum values
+/// Compare packed single-precision (32-bit) floating-point elements in `a`
+/// and `b`, and return packed maximum values
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmaxps))]
@@ -164,8 +171,8 @@ pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 {
     minpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
-/// and return packed minimum values
+/// Compare packed single-precision (32-bit) floating-point elements in `a`
+/// and `b`, and return packed minimum values
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vminps))]
@@ -182,7 +189,8 @@ pub unsafe fn _mm256_mul_pd(a: f64x4, b: f64x4) -> f64x4 {
     a * b
 }
 
-/// Add packed single-precision (32-bit) floating-point elements in `a` and `b`.
+/// Add packed single-precision (32-bit) floating-point elements in `a` and
+/// `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmulps))]
@@ -266,8 +274,8 @@ pub unsafe fn _mm256_round_pd(a: f64x4, b: i32) -> f64x4 {
     constify_imm8!(b, call)
 }
 
-/// Round packed double-precision (64-bit) floating point elements in `a` toward
-/// positive infinity.
+/// Round packed double-precision (64-bit) floating point elements in `a`
+/// toward positive infinity.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundpd))]
@@ -275,8 +283,8 @@ pub unsafe fn _mm256_ceil_pd(a: f64x4) -> f64x4 {
     roundpd256(a, 0x02)
 }
 
-/// Round packed double-precision (64-bit) floating point elements in `a` toward
-/// negative infinity.
+/// Round packed double-precision (64-bit) floating point elements in `a`
+/// toward negative infinity.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundpd))]
@@ -292,9 +300,9 @@ pub unsafe fn _mm256_floor_pd(a: f64x4) -> f64x4 {
 /// - `0x02`: Round up, toward positive infinity.
 /// - `0x03`: Truncate the values.
 ///
-/// For a complete list of options, check the LLVM docs:
+/// For a complete list of options, check [the LLVM docs][llvm_docs].
 ///
-/// https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
+/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundps, b = 0x00))]
@@ -307,8 +315,8 @@ pub unsafe fn _mm256_round_ps(a: f32x8, b: i32) -> f32x8 {
     constify_imm8!(b, call)
 }
 
-/// Round packed single-precision (32-bit) floating point elements in `a` toward
-/// positive infinity.
+/// Round packed single-precision (32-bit) floating point elements in `a`
+/// toward positive infinity.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundps))]
@@ -316,8 +324,8 @@ pub unsafe fn _mm256_ceil_ps(a: f32x8) -> f32x8 {
     roundps256(a, 0x02)
 }
 
-/// Round packed single-precision (32-bit) floating point elements in `a` toward
-/// negative infinity.
+/// Round packed single-precision (32-bit) floating point elements in `a`
+/// toward negative infinity.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundps))]
@@ -606,7 +614,8 @@ pub unsafe fn _mm256_cmp_ps(a: f32x8, b: f32x8, imm8: u8) -> f32x8 {
 /// Compare the lower double-precision (64-bit) floating-point element in
 /// `a` and `b` based on the comparison operand specified by `imm8`,
 /// store the result in the lower element of returned vector,
-/// and copy the upper element from `a` to the upper element of returned vector.
+/// and copy the upper element from `a` to the upper element of returned
+/// vector.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
 #[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd
@@ -811,7 +820,9 @@ pub unsafe fn _mm_permutevar_ps(a: f32x4, b: i32x4) -> f32x4 {
 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
 pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
     let imm8 = (imm8 & 0xFF) as u8;
-    const fn add4(x: u32) -> u32 { x + 4 }
+    const fn add4(x: u32) -> u32 {
+        x + 4
+    }
     macro_rules! shuffle4 {
         ($a:expr, $b:expr, $c:expr, $d:expr) => {
             simd_shuffle8(a, _mm256_undefined_ps(), [
@@ -857,7 +868,7 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
     }
 }
 
-/// Shuffle single-precision (32-bit) floating-point elements in `a` 
+/// Shuffle single-precision (32-bit) floating-point elements in `a`
 /// using the control in `imm8`.
 #[inline(always)]
 #[target_feature = "+avx,+sse"]
@@ -1026,7 +1037,9 @@ pub unsafe fn _mm256_permute2f128_pd(a: f64x4, b: f64x4, imm8: i8) -> f64x4 {
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))]
-pub unsafe fn _mm256_permute2f128_si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8 {
+pub unsafe fn _mm256_permute2f128_si256(
+    a: i32x8, b: i32x8, imm8: i8
+) -> i32x8 {
     macro_rules! call {
         ($imm8:expr) => { vperm2f128si256(a, b, $imm8) }
     }
@@ -1110,7 +1123,9 @@ pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: f64x2, imm8: i32) -> f64x4 {
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
-pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m256i {
+pub unsafe fn _mm256_insertf128_si256(
+    a: __m256i, b: __m128i, imm8: i32
+) -> __m256i {
     let b = i64x4::from(_mm256_castsi128_si256(b));
     let dst: i64x4 = match imm8 & 1 {
         0 => simd_shuffle4(i64x4::from(a), b, [4, 5, 2, 3]),
@@ -1166,7 +1181,8 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> f64x4 {
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
         &mut dst as *mut f64x4 as *mut u8,
-        mem::size_of::<f64x4>());
+        mem::size_of::<f64x4>(),
+    );
     dst
 }
 
@@ -1191,7 +1207,8 @@ pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> f32x8 {
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
         &mut dst as *mut f32x8 as *mut u8,
-        mem::size_of::<f32x8>());
+        mem::size_of::<f32x8>(),
+    );
     dst
 }
 
@@ -1215,12 +1232,13 @@ pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
         &mut dst as *mut __m256i as *mut u8,
-        mem::size_of::<__m256i>());
+        mem::size_of::<__m256i>(),
+    );
     dst
 }
 
 /// Store 256-bits of integer data from `a` into memory.
-///	`mem_addr` does not need to be aligned on any particular boundary.
+/// 	`mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
@@ -1234,7 +1252,7 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
-pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: i64x4) -> f64x4  {
+pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: i64x4) -> f64x4 {
     maskloadpd256(mem_addr as *const i8, mask)
 }
 
@@ -1272,7 +1290,7 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: i64x2, a: f64x2) {
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
-pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: i32x8) -> f32x8  {
+pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: i32x8) -> f32x8 {
     maskloadps256(mem_addr as *const i8, mask)
 }
 
@@ -1592,7 +1610,8 @@ pub unsafe fn _mm_testnzc_ps(a: f32x4, b: f32x4) -> i32 {
 }
 
 /// Set each bit of the returned mask based on the most significant bit of the
-/// corresponding packed double-precision (64-bit) floating-point element in `a`.
+/// corresponding packed double-precision (64-bit) floating-point element in
+/// `a`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmovmskpd))]
@@ -1601,7 +1620,8 @@ pub unsafe fn _mm256_movemask_pd(a: f64x4) -> i32 {
 }
 
 /// Set each bit of the returned mask based on the most significant bit of the
-/// corresponding packed single-precision (32-bit) floating-point element in `a`.
+/// corresponding packed single-precision (32-bit) floating-point element in
+/// `a`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmovmskps))]
@@ -1646,8 +1666,9 @@ pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 {
 /// vector with the supplied values.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32,
-                            e: f32, f: f32, g: f32, h: f32) -> f32x8 {
+pub unsafe fn _mm256_set_ps(
+    a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
+) -> f32x8 {
     f32x8::new(h, g, f, e, d, c, b, a)
 }
 
@@ -1655,44 +1676,45 @@ pub unsafe fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32,
 /// reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_set_epi8(e00: i8, e01: i8, e02: i8, e03: i8,
-                              e04: i8, e05: i8, e06: i8, e07: i8,
-                              e08: i8, e09: i8, e10: i8, e11: i8,
-                              e12: i8, e13: i8, e14: i8, e15: i8,
-                              e16: i8, e17: i8, e18: i8, e19: i8,
-                              e20: i8, e21: i8, e22: i8, e23: i8,
-                              e24: i8, e25: i8, e26: i8, e27: i8,
-                              e28: i8, e29: i8, e30: i8, e31: i8) -> i8x32 {
-    i8x32::new(e31, e30, e29, e28,
-               e27, e26, e25, e24,
-               e23, e22, e21, e20,
-               e19, e18, e17, e16,
-               e15, e14, e13, e12,
-               e11, e10, e09, e08,
-               e07, e06, e05, e04,
-               e03, e02, e01, e00)
+pub unsafe fn _mm256_set_epi8(
+    e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8,
+    e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8,
+    e16: i8, e17: i8, e18: i8, e19: i8, e20: i8, e21: i8, e22: i8, e23: i8,
+    e24: i8, e25: i8, e26: i8, e27: i8, e28: i8, e29: i8, e30: i8, e31: i8,
+) -> i8x32 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i8x32::new(
+        e31, e30, e29, e28, e27, e26, e25, e24,
+        e23, e22, e21, e20, e19, e18, e17, e16,
+        e15, e14, e13, e12, e11, e10, e09, e08,
+        e07, e06, e05, e04, e03, e02, e01, e00,
+    )
 }
 
 /// Set packed 16-bit integers in returned vector with the supplied values.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_set_epi16(e00: i16, e01: i16, e02: i16, e03: i16,
-                               e04: i16, e05: i16, e06: i16, e07: i16,
-                               e08: i16, e09: i16, e10: i16, e11: i16,
-                               e12: i16, e13: i16, e14: i16, e15: i16) -> i16x16 {
-    i16x16::new(e15, e14, e13, e12,
-                e11, e10, e09, e08,
-                e07, e06, e05, e04,
-                e03, e02, e01, e00)
+pub unsafe fn _mm256_set_epi16(
+    e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16,
+    e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16,
+    e14: i16, e15: i16,
+) -> i16x16 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i16x16::new(
+        e15, e14, e13, e12,
+        e11, e10, e09, e08,
+        e07, e06, e05, e04,
+        e03, e02, e01, e00,
+    )
 }
 
 /// Set packed 32-bit integers in returned vector with the supplied values.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_set_epi32(e0: i32, e1: i32, e2: i32, e3: i32,
-                               e4: i32, e5: i32, e6: i32, e7: i32) -> i32x8 {
-    i32x8::new(e7, e6, e5, e4,
-               e3, e2, e1, e0)
+pub unsafe fn _mm256_set_epi32(
+    e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
+) -> i32x8 {
+    i32x8::new(e7, e6, e5, e4, e3, e2, e1, e0)
 }
 
 /// Set packed 64-bit integers in returned vector with the supplied values.
@@ -1715,8 +1737,9 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 {
 /// vector with the supplied values in reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32,
-                             e: f32, f: f32, g: f32, h: f32) -> f32x8 {
+pub unsafe fn _mm256_setr_ps(
+    a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
+) -> f32x8 {
     f32x8::new(a, b, c, d, e, f, g, h)
 }
 
@@ -1724,46 +1747,47 @@ pub unsafe fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32,
 /// reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_setr_epi8(e00: i8, e01: i8, e02: i8, e03: i8,
-                               e04: i8, e05: i8, e06: i8, e07: i8,
-                               e08: i8, e09: i8, e10: i8, e11: i8,
-                               e12: i8, e13: i8, e14: i8, e15: i8,
-                               e16: i8, e17: i8, e18: i8, e19: i8,
-                               e20: i8, e21: i8, e22: i8, e23: i8,
-                               e24: i8, e25: i8, e26: i8, e27: i8,
-                               e28: i8, e29: i8, e30: i8, e31: i8) -> i8x32 {
-    i8x32::new(e00, e01, e02, e03,
-               e04, e05, e06, e07,
-               e08, e09, e10, e11,
-               e12, e13, e14, e15,
-               e16, e17, e18, e19,
-               e20, e21, e22, e23,
-               e24, e25, e26, e27,
-               e28, e29, e30, e31)
+pub unsafe fn _mm256_setr_epi8(
+    e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8,
+    e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8,
+    e16: i8, e17: i8, e18: i8, e19: i8, e20: i8, e21: i8, e22: i8, e23: i8,
+    e24: i8, e25: i8, e26: i8, e27: i8, e28: i8, e29: i8, e30: i8, e31: i8,
+) -> i8x32 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i8x32::new(
+        e00, e01, e02, e03, e04, e05, e06, e07,
+        e08, e09, e10, e11, e12, e13, e14, e15,
+        e16, e17, e18, e19, e20, e21, e22, e23,
+        e24, e25, e26, e27, e28, e29, e30, e31,
+    )
 }
 
 /// Set packed 16-bit integers in returned vector with the supplied values in
 /// reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_setr_epi16(e00: i16, e01: i16, e02: i16, e03: i16,
-                                e04: i16, e05: i16, e06: i16, e07: i16,
-                                e08: i16, e09: i16, e10: i16, e11: i16,
-                                e12: i16, e13: i16, e14: i16, e15: i16) -> i16x16 {
-    i16x16::new(e00, e01, e02, e03,
-                e04, e05, e06, e07,
-                e08, e09, e10, e11,
-                e12, e13, e14, e15)
+pub unsafe fn _mm256_setr_epi16(
+    e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16,
+    e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16,
+    e14: i16, e15: i16,
+) -> i16x16 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i16x16::new(
+        e00, e01, e02, e03,
+        e04, e05, e06, e07,
+        e08, e09, e10, e11,
+        e12, e13, e14, e15,
+    )
 }
 
 /// Set packed 32-bit integers in returned vector with the supplied values in
 /// reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_setr_epi32(e0: i32, e1: i32, e2: i32, e3: i32,
-                               e4: i32, e5: i32, e6: i32, e7: i32) -> i32x8 {
-    i32x8::new(e0, e1, e2, e3,
-               e4, e5, e6, e7)
+pub unsafe fn _mm256_setr_epi32(
+    e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
+) -> i32x8 {
+    i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)
 }
 
 /// Set packed 64-bit integers in returned vector with the supplied values in
@@ -1798,10 +1822,13 @@ pub unsafe fn _mm256_set1_ps(a: f32) -> f32x8 {
 #[cfg_attr(test, assert_instr(vpshufb))]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_set1_epi8(a: i8) -> i8x32 {
-    i8x32::new(a, a, a, a, a, a, a, a,
-               a, a, a, a, a, a, a, a,
-               a, a, a, a, a, a, a, a,
-               a, a, a, a, a, a, a, a)
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i8x32::new(
+        a, a, a, a, a, a, a, a,
+        a, a, a, a, a, a, a, a,
+        a, a, a, a, a, a, a, a,
+        a, a, a, a, a, a, a, a,
+    )
 }
 
 /// Broadcast 16-bit integer `a` to all all elements of returned vector.
@@ -1811,8 +1838,7 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> i8x32 {
 //#[cfg_attr(test, assert_instr(vpshufb))]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_set1_epi16(a: i16) -> i16x16 {
-    i16x16::new(a, a, a, a, a, a, a, a,
-               a, a, a, a, a, a, a, a)
+    i16x16::new(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
 }
 
 /// Broadcast 32-bit integer `a` to all elements of returned vector.
@@ -1954,7 +1980,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
 #[target_feature = "+avx,+sse2"]
 pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
     use x86::sse2::_mm_setzero_si128;
-    let b =  mem::transmute(_mm_setzero_si128());
+    let b = mem::transmute(_mm_setzero_si128());
     let dst: i64x4 = simd_shuffle4(i64x2::from(a), b, [0, 1, 2, 3]);
     __m256i::from(dst)
 }
@@ -2044,22 +2070,28 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
 }
 
 /// Load two 128-bit values (composed of 4 packed single-precision (32-bit)
-/// floating-point elements) from memory, and combine them into a 256-bit value.
+/// floating-point elements) from memory, and combine them into a 256-bit
+/// value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse"]
-pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> f32x8 {
+pub unsafe fn _mm256_loadu2_m128(
+    hiaddr: *const f32, loaddr: *const f32
+) -> f32x8 {
     use x86::sse::_mm_loadu_ps;
     let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
     _mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1)
 }
 
 /// Load two 128-bit values (composed of 2 packed double-precision (64-bit)
-/// floating-point elements) from memory, and combine them into a 256-bit value.
+/// floating-point elements) from memory, and combine them into a 256-bit
+/// value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
-pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> f64x4 {
+pub unsafe fn _mm256_loadu2_m128d(
+    hiaddr: *const f64, loaddr: *const f64
+) -> f64x4 {
     use x86::sse2::_mm_loadu_pd;
     let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
     _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1)
@@ -2070,7 +2102,9 @@ pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> f64
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
-pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
+pub unsafe fn _mm256_loadu2_m128i(
+    hiaddr: *const __m128i, loaddr: *const __m128i
+) -> __m256i {
     use x86::sse2::_mm_loadu_si128;
     let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
     _mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
@@ -2082,7 +2116,9 @@ pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse"]
-pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: f32x8) {
+pub unsafe fn _mm256_storeu2_m128(
+    hiaddr: *mut f32, loaddr: *mut f32, a: f32x8
+) {
     use x86::sse::_mm_storeu_ps;
     let lo = _mm256_castps256_ps128(a);
     _mm_storeu_ps(loaddr, lo);
@@ -2096,7 +2132,9 @@ pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: f32x8)
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
-pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: f64x4) {
+pub unsafe fn _mm256_storeu2_m128d(
+    hiaddr: *mut f64, loaddr: *mut f64, a: f64x4
+) {
     use x86::sse2::_mm_storeu_pd;
     let lo = _mm256_castpd256_pd128(a);
     _mm_storeu_pd(loaddr, lo);
@@ -2109,7 +2147,9 @@ pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: f64x4)
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
-pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
+pub unsafe fn _mm256_storeu2_m128i(
+    hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
+) {
     use x86::sse2::_mm_storeu_si128;
     let lo = _mm256_castsi256_si128(a);
     _mm_storeu_si128(loaddr, lo);
@@ -2265,9 +2305,9 @@ extern "C" {
 #[cfg(test)]
 mod tests {
     use stdsimd_test::simd_test;
-    use test::black_box;  // Used to inhibit constant-folding.
+    use test::black_box; // Used to inhibit constant-folding.
 
-    use v128::{f32x4, f64x2, i8x16, i32x4, i64x2};
+    use v128::{f32x4, f64x2, i32x4, i64x2, i8x16};
     use v256::*;
     use x86::avx;
     use x86::{__m128i, __m256i};
@@ -2428,7 +2468,7 @@ mod tests {
         let a = f64x4::new(1., 2., 3., 4.);
         let b = f64x4::new(5., 6., 7., 8.);
         let r = avx::_mm256_sub_pd(a, b);
-        let e = f64x4::new(-4.,-4.,-4.,-4.);
+        let e = f64x4::new(-4., -4., -4., -4.);
         assert_eq!(r, e);
     }
 
@@ -2504,7 +2544,7 @@ mod tests {
     #[simd_test = "avx"]
     unsafe fn _mm256_sqrt_pd() {
         let a = f64x4::new(4., 9., 16., 25.);
-        let r = avx::_mm256_sqrt_pd(a, );
+        let r = avx::_mm256_sqrt_pd(a);
         let e = f64x4::new(2., 3., 4., 5.);
         assert_eq!(r, e);
     }
@@ -2561,7 +2601,10 @@ mod tests {
     unsafe fn _mm256_blendv_ps() {
         let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.);
         let b = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.0);
-        let c = f32x8::new(0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let c = f32x8::new(
+            0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
+        );
         let r = avx::_mm256_blendv_ps(a, b, c);
         let e = f32x8::new(4., 9., 16., 25., 8., 9., 64., 50.0);
         assert_eq!(r, e);
@@ -2572,7 +2615,8 @@ mod tests {
         let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.);
         let b = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.0);
         let r = avx::_mm256_dp_ps(a, b, 0xFF);
-        let e = f32x8::new(200.0, 200.0, 200.0, 200.0, 2387., 2387., 2387., 2387.);
+        let e =
+            f32x8::new(200.0, 200.0, 200.0, 200.0, 2387., 2387., 2387., 2387.);
         assert_eq!(r, e);
     }
 
@@ -2801,20 +2845,21 @@ mod tests {
 
     #[simd_test = "avx"]
     unsafe fn _mm256_extract_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
         let r = avx::_mm256_extract_epi8(a, 0);
         assert_eq!(r, 1);
     }
 
     #[simd_test = "avx"]
     unsafe fn _mm256_extract_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = avx::_mm256_extract_epi16(a, 0);
         assert_eq!(r, 0);
     }
@@ -3004,29 +3049,31 @@ mod tests {
 
     #[simd_test = "avx"]
     unsafe fn _mm256_insert_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
         let r = avx::_mm256_insert_epi8(a, 0, 31);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 0);
+            25, 26, 27, 28, 29, 30, 31, 0,
+        );
         assert_eq!(r, e);
     }
 
     #[simd_test = "avx"]
     unsafe fn _mm256_insert_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = avx::_mm256_insert_epi16(a, 0, 15);
-        let e = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 0);
+        let e =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0);
         assert_eq!(r, e);
     }
 
@@ -3203,18 +3250,22 @@ mod tests {
 
     #[simd_test = "avx"]
     unsafe fn _mm256_lddqu_si256() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
         let p = &a as *const _;
         let r = avx::_mm256_lddqu_si256(black_box(p));
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
         assert_eq!(r, e);
     }
 
@@ -3222,8 +3273,11 @@ mod tests {
     unsafe fn _mm256_rcp_ps() {
         let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = avx::_mm256_rcp_ps(a);
-        let e = f32x8::new(0.99975586, 0.49987793, 0.33325195, 0.24993896,
-                           0.19995117, 0.16662598, 0.14282227, 0.12496948);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = f32x8::new(
+            0.99975586, 0.49987793, 0.33325195, 0.24993896,
+            0.19995117, 0.16662598, 0.14282227, 0.12496948,
+        );
         let rel_err = 0.00048828125;
         for i in 0..8 {
             assert_approx_eq!(r.extract(i), e.extract(i), 2. * rel_err);
@@ -3234,8 +3288,11 @@ mod tests {
     unsafe fn _mm256_rsqrt_ps() {
         let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = avx::_mm256_rsqrt_ps(a);
-        let e = f32x8::new(0.99975586, 0.7069092, 0.5772705, 0.49987793,
-                           0.44714355, 0.40820313, 0.3779297, 0.3534546);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = f32x8::new(
+            0.99975586, 0.7069092, 0.5772705, 0.49987793,
+            0.44714355, 0.40820313, 0.3779297, 0.3534546,
+        );
         let rel_err = 0.00048828125;
         for i in 0..8 {
             assert_approx_eq!(r.extract(i), e.extract(i), 2. * rel_err);
@@ -3478,30 +3535,39 @@ mod tests {
 
     #[simd_test = "avx"]
     unsafe fn _mm256_set_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let r = avx::_mm256_set_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
-        assert_eq!(r, i8x32::new(32, 31, 30, 29, 28, 27, 26, 25,
-                                 24, 23, 22, 21, 20, 19, 18, 17,
-                                 16, 15, 14, 13, 12, 11, 10, 9,
-                                 8, 7, 6, 5, 4, 3, 2, 1));
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x32::new(
+            32, 31, 30, 29, 28, 27, 26, 25,
+            24, 23, 22, 21, 20, 19, 18, 17,
+            16, 15, 14, 13, 12, 11, 10, 9,
+            8, 7, 6, 5, 4, 3, 2, 1
+        );
+        assert_eq!(r, e);
     }
 
     #[simd_test = "avx"]
     unsafe fn _mm256_set_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let r = avx::_mm256_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
-        assert_eq!(r, i16x16::new(16, 15, 14, 13, 12, 11, 10, 9,
-                                  8, 7, 6, 5, 4, 3, 2, 1));
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
+        assert_eq!(
+            r,
+            i16x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
+        );
     }
 
     #[simd_test = "avx"]
     unsafe fn _mm256_set_epi32() {
-        let r = avx::_mm256_set_epi32(
-            1, 2, 3, 4, 5, 6, 7, 8);
+        let r = avx::_mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq!(r, i32x8::new(8, 7, 6, 5, 4, 3, 2, 1));
     }
 
@@ -3525,30 +3591,40 @@ mod tests {
 
     #[simd_test = "avx"]
     unsafe fn _mm256_setr_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let r = avx::_mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
-        assert_eq!(r, i8x32::new(1, 2, 3, 4, 5, 6, 7, 8,
-                                 9, 10, 11, 12, 13, 14, 15, 16,
-                                 17, 18, 19, 20, 21, 22, 23, 24,
-                                 25, 26, 27, 28, 29, 30, 31, 32));
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x32::new(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32
+        );
+
+        assert_eq!(r, e);
     }
 
     #[simd_test = "avx"]
     unsafe fn _mm256_setr_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let r = avx::_mm256_setr_epi16(
             1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
-        assert_eq!(r, i16x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-                                  9, 10, 11, 12, 13, 14, 15, 16));
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
+        assert_eq!(
+            r,
+            i16x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+        );
     }
 
     #[simd_test = "avx"]
     unsafe fn _mm256_setr_epi32() {
-        let r = avx::_mm256_setr_epi32(
-            1, 2, 3, 4, 5, 6, 7, 8);
+        let r = avx::_mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq!(r, i32x8::new(1, 2, 3, 4, 5, 6, 7, 8));
     }
 
@@ -3614,19 +3690,25 @@ mod tests {
     unsafe fn _mm256_castps_si256() {
         let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = avx::_mm256_castps_si256(a);
-        let e = i8x32::new(0, 0, -128, 63, 0, 0, 0, 64,
-                      0, 0, 64, 64, 0, 0, -128, 64,
-                      0, 0, -96, 64, 0, 0, -64, 64,
-                      0, 0, -32, 64, 0, 0, 0, 65);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x32::new(
+            0, 0, -128, 63, 0, 0, 0, 64,
+            0, 0, 64, 64, 0, 0, -128, 64,
+            0, 0, -96, 64, 0, 0, -64, 64,
+            0, 0, -32, 64, 0, 0, 0, 65,
+        );
         assert_eq!(r, e);
     }
 
     #[simd_test = "avx"]
     unsafe fn _mm256_castsi256_ps() {
-        let a = i8x32::new(0, 0, -128, 63, 0, 0, 0, 64,
-                      0, 0, 64, 64, 0, 0, -128, 64,
-                      0, 0, -96, 64, 0, 0, -64, 64,
-                      0, 0, -32, 64, 0, 0, 0, 65);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = i8x32::new(
+            0, 0, -128, 63, 0, 0, 0, 64,
+            0, 0, 64, 64, 0, 0, -128, 64,
+            0, 0, -96, 64, 0, 0, -64, 64,
+            0, 0, -32, 64, 0, 0, 0, 65,
+        );
         let r = avx::_mm256_castsi256_ps(a);
         let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq!(r, e);
@@ -3711,16 +3793,23 @@ mod tests {
 
     #[simd_test = "avx"]
     unsafe fn _mm256_set_m128i() {
-        let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
-        let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let hi = i8x16::new(
+            17, 18, 19, 20,
+            21, 22, 23, 24,
+            25, 26, 27, 28,
+            29, 30, 31, 32,
+        );
+        let lo =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = avx::_mm256_set_m128i(hi, lo);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
         assert_eq!(r, e);
     }
 
@@ -3744,16 +3833,21 @@ mod tests {
 
     #[simd_test = "avx"]
     unsafe fn _mm256_setr_m128i() {
-        let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
-        let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+        let lo =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let hi = i8x16::new(
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
         let r = avx::_mm256_setr_m128i(lo, hi);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
         assert_eq!(r, e);
     }
 
@@ -3781,17 +3875,24 @@ mod tests {
 
     #[simd_test = "avx"]
     unsafe fn _mm256_loadu2_m128i() {
-        let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
-        let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
-        let r = avx::_mm256_loadu2_m128i(&hi as *const _ as *const _,
-                                         &lo as *const _ as *const _);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let hi = i8x16::new(
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        let lo =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r = avx::_mm256_loadu2_m128i(
+            &hi as *const _ as *const _,
+            &lo as *const _ as *const _,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
         assert_eq!(r, e);
     }
 
@@ -3801,9 +3902,11 @@ mod tests {
         let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
         let mut hi = _mm_undefined_ps();
         let mut lo = _mm_undefined_ps();
-        avx::_mm256_storeu2_m128(&mut hi as *mut _ as *mut f32,
-                              &mut lo as *mut _ as *mut f32,
-                              a);
+        avx::_mm256_storeu2_m128(
+            &mut hi as *mut _ as *mut f32,
+            &mut lo as *mut _ as *mut f32,
+            a,
+        );
         assert_eq!(hi, f32x4::new(5., 6., 7., 8.));
         assert_eq!(lo, f32x4::new(1., 2., 3., 4.));
     }
@@ -3814,9 +3917,11 @@ mod tests {
         let a = f64x4::new(1., 2., 3., 4.);
         let mut hi = _mm_undefined_pd();
         let mut lo = _mm_undefined_pd();
-        avx::_mm256_storeu2_m128d(&mut hi as *mut _ as *mut f64,
-                              &mut lo as *mut _ as *mut f64,
-                              a);
+        avx::_mm256_storeu2_m128d(
+            &mut hi as *mut _ as *mut f64,
+            &mut lo as *mut _ as *mut f64,
+            a,
+        );
         assert_eq!(hi, f64x2::new(3., 4.));
         assert_eq!(lo, f64x2::new(1., 2.));
     }
@@ -3824,17 +3929,26 @@ mod tests {
     #[simd_test = "avx"]
     unsafe fn _mm256_storeu2_m128i() {
         use x86::sse2::_mm_undefined_si128;
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
         let mut hi = _mm_undefined_si128();
         let mut lo = _mm_undefined_si128();
         avx::_mm256_storeu2_m128i(&mut hi as *mut _, &mut lo as *mut _, a);
-        assert_eq!(hi, i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32));
-        assert_eq!(lo, i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16));
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x16::new(
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32
+        );
+
+        assert_eq!(hi, e);
+        assert_eq!(
+            lo,
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+        );
     }
 }
diff --git a/library/stdarch/src/x86/avx2.rs b/library/stdarch/src/x86/avx2.rs
index 194ca8f16305..edbf72064329 100644
--- a/library/stdarch/src/x86/avx2.rs
+++ b/library/stdarch/src/x86/avx2.rs
@@ -96,8 +96,8 @@ pub unsafe fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
     paddusw(a, b)
 }
 
-/// Concatenate pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary result,
-/// shift the result right by `n` bytes, and return the low 16 bytes.
+/// Concatenate pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
+/// result, shift the result right by `n` bytes, and return the low 16 bytes.
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpalignr, n = 15))]
@@ -116,7 +116,9 @@ pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
         (a, b, n)
     };
 
-    const fn add(a: u32, b: u32) -> u32 { a + b }
+    const fn add(a: u32, b: u32) -> u32 {
+        a + b
+    }
     macro_rules! shuffle {
         ($shift:expr) => {
             simd_shuffle32(b, a, [
@@ -140,14 +142,22 @@ pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
         }
     }
     match n {
-        0 => shuffle!(0), 1 => shuffle!(1),
-        2 => shuffle!(2), 3 => shuffle!(3),
-        4 => shuffle!(4), 5 => shuffle!(5),
-        6 => shuffle!(6), 7 => shuffle!(7),
-        8 => shuffle!(8), 9 => shuffle!(9),
-        10 => shuffle!(10), 11 => shuffle!(11),
-        12 => shuffle!(12), 13 => shuffle!(13),
-        14 => shuffle!(14), 15 => shuffle!(15),
+        0 => shuffle!(0),
+        1 => shuffle!(1),
+        2 => shuffle!(2),
+        3 => shuffle!(3),
+        4 => shuffle!(4),
+        5 => shuffle!(5),
+        6 => shuffle!(6),
+        7 => shuffle!(7),
+        8 => shuffle!(8),
+        9 => shuffle!(9),
+        10 => shuffle!(10),
+        11 => shuffle!(11),
+        12 => shuffle!(12),
+        13 => shuffle!(13),
+        14 => shuffle!(14),
+        15 => shuffle!(15),
         _ => shuffle!(16),
     }
 }
@@ -174,7 +184,7 @@ pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpavgw))]
-pub unsafe fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 {
+pub unsafe fn _mm256_avg_epu16(a: u16x16, b: u16x16) -> u16x16 {
     pavgw(a, b)
 }
 
@@ -182,7 +192,7 @@ pub unsafe fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpavgb))]
-pub unsafe fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 {
+pub unsafe fn _mm256_avg_epu8(a: u8x32, b: u8x32) -> u8x32 {
     pavgb(a, b)
 }
 
@@ -320,8 +330,8 @@ pub unsafe fn _mm256_blend_epi16(a: i16x16, b: i16x16, imm8: i32) -> i16x16 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpblendvb))]
-pub unsafe fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 {
-    pblendvb(a,b,mask)
+pub unsafe fn _mm256_blendv_epi8(a: i8x32, b: i8x32, mask: __m256i) -> i8x32 {
+    pblendvb(a, b, mask)
 }
 
 /// Broadcast the low packed 8-bit integer from `a` to all elements of
@@ -628,37 +638,83 @@ pub unsafe fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 {
 }
 
 
-// TODO _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-// TODO _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale)
-// TODO _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-// TODO _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-// TODO _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale)
-// TODO _mm_i32gather_pd (double const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i32gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale)
-// TODO _mm256_i32gather_pd (double const* base_addr, __m128i vindex, const int scale)
-// TODO _mm256_mask_i32gather_pd (__m256d src, double const* base_addr, __m128i vindex, __m256d mask, const int scale)
-// TODO _mm_i32gather_ps (float const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i32gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale)
-// TODO _mm256_i32gather_ps (float const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i32gather_ps (__m256 src, float const* base_addr, __m256i vindex, __m256 mask, const int scale)
-// TODO _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-// TODO _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale)
-// TODO _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-// TODO _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale)
-// TODO _mm_i64gather_pd (double const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i64gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale)
-// TODO _mm256_i64gather_pd (double const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i64gather_pd (__m256d src, double const* base_addr, __m256i vindex, __m256d mask, const int scale)
-// TODO _mm_i64gather_ps (float const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i64gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale)
-// TODO _mm256_i64gather_ps (float const* base_addr, __m256i vindex, const int scale)
+// TODO _mm_i32gather_epi32 (int const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr,
+//                                __m128i vindex, __m128i mask,
+//                                const int scale)
+// TODO _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex,
+//                              const int scale)
+// TODO _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr,
+//                                   __m256i vindex, __m256i mask,
+//                                   const int scale)
+// TODO _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr,
+//                                __m128i vindex, __m128i mask,
+//                                const int scale)
+// TODO _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex,
+//                              const int scale)
+// TODO _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr,
+//                                   __m128i vindex, __m256i mask,
+//                                   const int scale)
+// TODO _mm_i32gather_pd (double const* base_addr, __m128i vindex,
+//                        const int scale)
+// TODO _mm_mask_i32gather_pd (__m128d src, double const* base_addr,
+//                             __m128i vindex, __m128d mask,
+//                             const int scale)
+// TODO _mm256_i32gather_pd (double const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm256_mask_i32gather_pd (__m256d src, double const* base_addr,
+//                                __m128i vindex, __m256d mask,
+//                                const int scale)
+// TODO _mm_i32gather_ps (float const* base_addr, __m128i vindex,
+//                        const int scale)
+// TODO _mm_mask_i32gather_ps (__m128 src, float const* base_addr,
+//                             __m128i vindex, __m128 mask,
+//                             const int scale)
+// TODO _mm256_i32gather_ps (float const* base_addr, __m256i vindex,
+//                           const int scale)
+// TODO _mm256_mask_i32gather_ps (__m256 src, float const* base_addr,
+//                                __m256i vindex, __m256 mask,
+//                                const int scale)
+// TODO _mm_i64gather_epi32 (int const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr,
+//                                __m128i vindex, __m128i mask,
+//                                const int scale)
+// TODO _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex,
+//                              const int scale)
+// TODO _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr,
+//                                   __m256i vindex, __m128i mask,
+//                                   const int scale)
+// TODO _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr,
+//                                __m128i vindex, __m128i mask,
+//                                const int scale)
+// TODO _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex,
+//                              const int scale)
+// TODO _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr,
+//                                   __m256i vindex, __m256i mask,
+//                                   const int scale)
+// TODO _mm_i64gather_pd (double const* base_addr, __m128i vindex,
+//                        const int scale)
+// TODO _mm_mask_i64gather_pd (__m128d src, double const* base_addr,
+//                             __m128i vindex, __m128d mask,
+//                             const int scale)
+// TODO _mm256_i64gather_pd (double const* base_addr, __m256i vindex,
+//                           const int scale)
+// TODO _mm256_mask_i64gather_pd (__m256d src, double const* base_addr,
+//                                __m256i vindex, __m256d mask,
+//                                const int scale)
+// TODO _mm_i64gather_ps (float const* base_addr, __m128i vindex,
+//                        const int scale)
+// TODO _mm_mask_i64gather_ps (__m128 src, float const* base_addr,
+//                             __m128i vindex, __m128 mask,
+//                             const int scale)
+// TODO _mm256_i64gather_ps (float const* base_addr, __m256i vindex,
+//                           const int scale)
 // TODO _mm256_mask_i64gather_ps
 // TODO _mm256_inserti128_si256
 
@@ -946,7 +1002,7 @@ pub unsafe fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpmullw))]
-pub unsafe fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 {
+pub unsafe fn _mm256_mullo_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a * b
 }
 
@@ -957,7 +1013,7 @@ pub unsafe fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpmulld))]
-pub unsafe fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 {
+pub unsafe fn _mm256_mullo_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a * b
 }
 
@@ -968,7 +1024,7 @@ pub unsafe fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpmulhrsw))]
-pub unsafe fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 {
+pub unsafe fn _mm256_mulhrs_epi16(a: i16x16, b: i16x16) -> i16x16 {
     pmulhrsw(a, b)
 }
 
@@ -1088,7 +1144,7 @@ pub unsafe fn _mm256_permute4x64_epi64(a: i64x4, imm8: i32) -> i64x4 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpsadbw))]
-pub unsafe fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 {
+pub unsafe fn _mm256_sad_epu8(a: u8x32, b: u8x32) -> u64x4 {
     psadbw(a, b)
 }
 
@@ -1580,15 +1636,19 @@ pub unsafe fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
 /// use stdsimd::simd::i8x32;
 /// use stdsimd::vendor::_mm256_unpackhi_epi8;
 ///
-/// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
-/// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
+/// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+/// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,
+/// -16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
 ///
 /// let c: i8x32;
 /// unsafe {
 ///     c = _mm256_unpackhi_epi8(a, b);
 /// }
 ///
-/// let expected = i8x32::new(8,-8, 9,-9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15, 24,-24, 25,-25, 26,-26, 27,-27, 28,-28, 29,-29, 30,-30, 31,-31);
+/// let expected = i8x32::new(8,-8, 9,-9, 10,-10, 11,-11, 12,-12, 13,-13,
+/// 14,-14, 15,-15, 24,-24, 25,-25, 26,-26, 27,-27, 28,-28, 29,-29, 30,-30,
+/// 31,-31);
 /// assert_eq!(c, expected);
 ///
 /// #         }
@@ -1600,7 +1660,13 @@ pub unsafe fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
 pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 {
-    simd_shuffle32(a, b, [8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63])
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    simd_shuffle32(a, b, [
+            8, 40, 9, 41, 10, 42, 11, 43,
+            12, 44, 13, 45, 14, 46, 15, 47,
+            24, 56, 25, 57, 26, 58, 27, 59,
+            28, 60, 29, 61, 30, 62, 31, 63,
+    ])
 }
 
 /// Unpack and interleave 8-bit integers from the low half of each
@@ -1619,15 +1685,18 @@ pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// use stdsimd::simd::i8x32;
 /// use stdsimd::vendor::_mm256_unpacklo_epi8;
 ///
-/// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
-/// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
+/// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+/// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,
+/// -16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
 ///
 /// let c: i8x32;
 /// unsafe {
 ///     c = _mm256_unpacklo_epi8(a, b);
 /// }
 ///
-/// let expected = i8x32::new(0, 0, 1,-1, 2,-2, 3,-3, 4,-4, 5,-5, 6,-6, 7,-7, 16,-16, 17,-17, 18,-18, 19,-19, 20,-20, 21,-21, 22,-22, 23,-23);
+/// let expected = i8x32::new(0, 0, 1,-1, 2,-2, 3,-3, 4,-4, 5,-5, 6,-6, 7,-7,
+/// 16,-16, 17,-17, 18,-18, 19,-19, 20,-20, 21,-21, 22,-22, 23,-23);
 /// assert_eq!(c, expected);
 ///
 /// #         }
@@ -1639,7 +1708,13 @@ pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 {
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
 pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 {
-    simd_shuffle32(a, b, [0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55])
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    simd_shuffle32(a, b, [
+        0, 32, 1, 33, 2, 34, 3, 35,
+        4, 36, 5, 37, 6, 38, 7, 39,
+        16, 48, 17, 49, 18, 50, 19, 51,
+        20, 52, 21, 53, 22, 54, 23, 55,
+    ])
 }
 
 /// Unpack and interleave 16-bit integers from the high half of each
@@ -1666,7 +1741,8 @@ pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 {
 ///     c = _mm256_unpackhi_epi16(a, b);
 /// }
 ///
-/// let expected = i16x16::new(4,-4, 5,-5, 6,-6, 7,-7, 12,-12, 13,-13, 14,-14, 15,-15);
+/// let expected = i16x16::new(4,-4, 5,-5, 6,-6, 7,-7, 12,-12, 13,-13, 14,-14,
+/// 15,-15);
 /// assert_eq!(c, expected);
 ///
 /// #         }
@@ -1678,7 +1754,11 @@ pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 {
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
 pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
-    simd_shuffle16(a, b, [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31])
+    simd_shuffle16(
+        a,
+        b,
+        [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
+    )
 }
 
 /// Unpack and interleave 16-bit integers from the low half of each
@@ -1705,7 +1785,8 @@ pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
 ///     c = _mm256_unpacklo_epi16(a, b);
 /// }
 ///
-/// let expected = i16x16::new(0, 0, 1,-1, 2,-2, 3,-3, 8,-8, 9,-9, 10,-10, 11,-11);
+/// let expected = i16x16::new(0, 0, 1,-1, 2,-2, 3,-3, 8,-8, 9,-9, 10,-10,
+/// 11,-11);
 /// assert_eq!(c, expected);
 ///
 /// #         }
@@ -1717,7 +1798,11 @@ pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
 pub unsafe fn _mm256_unpacklo_epi16(a: i16x16, b: i16x16) -> i16x16 {
-    simd_shuffle16(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27])
+    simd_shuffle16(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
+    )
 }
 
 /// Unpack and interleave 32-bit integers from the high half of each
@@ -1972,9 +2057,9 @@ extern "C" {
     #[link_name = "llvm.x86.avx2.pmulh.w"]
     fn pmulhw(a: i16x16, b: i16x16) -> i16x16;
     #[link_name = "llvm.x86.avx2.pmul.dq"]
-    fn pmuldq(a: i32x8, b:i32x8) -> i64x4;
+    fn pmuldq(a: i32x8, b: i32x8) -> i64x4;
     #[link_name = "llvm.x86.avx2.pmulu.dq"]
-    fn pmuludq(a: u32x8, b:u32x8) -> u64x4;
+    fn pmuludq(a: u32x8, b: u32x8) -> u64x4;
     #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
     fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
     #[link_name = "llvm.x86.avx2.packsswb"]
@@ -2006,17 +2091,17 @@ extern "C" {
     #[link_name = "llvm.x86.avx2.pslli.q"]
     fn pslliq(a: i64x4, imm8: i32) -> i64x4;
     #[link_name = "llvm.x86.avx2.psllv.d"]
-    fn psllvd(a:i32x4, count:i32x4) -> i32x4;
+    fn psllvd(a: i32x4, count: i32x4) -> i32x4;
     #[link_name = "llvm.x86.avx2.psllv.d.256"]
-    fn psllvd256(a:i32x8, count:i32x8) -> i32x8;
+    fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
     #[link_name = "llvm.x86.avx2.psllv.q"]
-    fn psllvq(a:i64x2, count:i64x2) -> i64x2;
+    fn psllvq(a: i64x2, count: i64x2) -> i64x2;
     #[link_name = "llvm.x86.avx2.psllv.q.256"]
-    fn psllvq256(a:i64x4, count:i64x4) -> i64x4;
+    fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
     #[link_name = "llvm.x86.avx2.psra.w"]
-    fn psraw(a: i16x16, count:i16x8) -> i16x16;
+    fn psraw(a: i16x16, count: i16x8) -> i16x16;
     #[link_name = "llvm.x86.avx2.psra.d"]
-    fn psrad(a: i32x8, count:i32x4) -> i32x8;
+    fn psrad(a: i32x8, count: i32x4) -> i32x8;
     #[link_name = "llvm.x86.avx2.psrai.w"]
     fn psraiw(a: i16x16, imm8: i32) -> i16x16;
     #[link_name = "llvm.x86.avx2.psrai.d"]
@@ -2026,11 +2111,11 @@ extern "C" {
     #[link_name = "llvm.x86.avx2.psrav.d.256"]
     fn psravd256(a: i32x8, count: i32x8) -> i32x8;
     #[link_name = "llvm.x86.avx2.psrl.w"]
-    fn psrlw(a: i16x16, count:i16x8) -> i16x16;
+    fn psrlw(a: i16x16, count: i16x8) -> i16x16;
     #[link_name = "llvm.x86.avx2.psrl.d"]
-    fn psrld(a: i32x8, count:i32x4) -> i32x8;
+    fn psrld(a: i32x8, count: i32x4) -> i32x8;
     #[link_name = "llvm.x86.avx2.psrl.q"]
-    fn psrlq(a: i64x4, count:i64x2) -> i64x4;
+    fn psrlq(a: i64x4, count: i64x2) -> i64x4;
     #[link_name = "llvm.x86.avx2.psrli.w"]
     fn psrliw(a: i16x16, imm8: i32) -> i16x16;
     #[link_name = "llvm.x86.avx2.psrli.d"]
@@ -2071,49 +2156,53 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_abs_epi32() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i32x8::new(
             0, 1, -1, std::i32::MAX,
-            std::i32::MIN + 1, 100, -100, -32);
+            std::i32::MIN + 1, 100, -100, -32,
+        );
         let r = avx2::_mm256_abs_epi32(a);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i32x8::new(
             0, 1, 1, std::i32::MAX,
-            (std::i32::MIN + 1).abs(), 100, 100, 32);
+            (std::i32::MIN + 1).abs(), 100, 100, 32,
+        );
         assert_eq!(r, e);
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_abs_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i16x16::new(
-            0, 1, -1, 2,
-            -2, 3, -3, 4,
-            -4, 5, -5, std::i16::MAX,
-            std::i16::MIN + 1, 100, -100, -32);
+            0,  1, -1, 2, -2, 3, -3, 4,
+            -4, 5, -5, std::i16::MAX, std::i16::MIN + 1, 100, -100, -32,
+        );
         let r = avx2::_mm256_abs_epi16(a);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i16x16::new(
-            0, 1, 1, 2,
-            2, 3, 3, 4,
-            4, 5, 5, std::i16::MAX,
-            (std::i16::MIN + 1).abs(), 100, 100, 32);
+            0, 1, 1, 2, 2, 3, 3, 4,
+            4, 5, 5, std::i16::MAX, (std::i16::MIN + 1).abs(), 100, 100, 32,
+        );
         assert_eq!(r, e);
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_abs_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i8x32::new(
-            0, 1, -1, 2,
-            -2, 3, -3, 4,
-            -4, 5, -5, std::i8::MAX,
-            std::i8::MIN + 1, 100, -100, -32,
-            0, 1, -1, 2,
-            -2, 3, -3, 4,
-            -4, 5, -5, std::i8::MAX,
-            std::i8::MIN + 1, 100, -100, -32);
+            0, 1, -1, 2, -2, 3, -3, 4,
+            -4, 5, -5, std::i8::MAX, std::i8::MIN + 1, 100, -100, -32,
+            0, 1, -1, 2, -2, 3, -3, 4,
+            -4, 5, -5, std::i8::MAX, std::i8::MIN + 1, 100, -100, -32,
+        );
         let r = avx2::_mm256_abs_epi8(a);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x32::new(
             0, 1, 1, 2, 2, 3, 3, 4,
             4, 5, 5, std::i8::MAX, (std::i8::MIN + 1).abs(), 100, 100, 32,
             0, 1, 1, 2, 2, 3, 3, 4,
-            4, 5, 5, std::i8::MAX, (std::i8::MIN + 1).abs(), 100, 100, 32);
+            4, 5, 5, std::i8::MAX, (std::i8::MIN + 1).abs(), 100, 100, 32,
+        );
         assert_eq!(r, e);
     }
 
@@ -2137,52 +2226,70 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_add_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
-        let b = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = avx2::_mm256_add_epi16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i16x16::new(
             0, 2, 4, 6, 8, 10, 12, 14,
-            16, 18, 20, 22, 24, 26, 28, 30);
+            16, 18, 20, 22, 24, 26, 28, 30,
+        );
         assert_eq!(r, e);
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_add_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i8x32::new(
             0, 1, 2, 3, 4, 5, 6, 7,
             8, 9, 10, 11, 12, 13, 14, 15,
             16, 17, 18, 19, 20, 21, 22, 23,
-            24, 25, 26, 27, 28, 29, 30, 31);
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i8x32::new(
             0, 1, 2, 3, 4, 5, 6, 7,
             8, 9, 10, 11, 12, 13, 14, 15,
             16, 17, 18, 19, 20, 21, 22, 23,
-            24, 25, 26, 27, 28, 29, 30, 31);
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
         let r = avx2::_mm256_add_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x32::new(
-            0, 2, 4, 6, 8, 10, 12, 14, 16,
-            18, 20, 22, 24, 26, 28, 30, 32,
-            34, 36, 38, 40, 42, 44, 46, 48,
-            50, 52, 54, 56, 58, 60, 62);
+            0, 2, 4, 6, 8, 10, 12, 14,
+            16, 18, 20, 22, 24, 26, 28, 30,
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+        );
         assert_eq!(r, e);
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_adds_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i8x32::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+            16, 17, 18, 19, 20, 21, 22, 23,
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i8x32::new(
-            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-            48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
+            32, 33, 34, 35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44, 45, 46, 47,
+            48, 49, 50, 51, 52, 53, 54, 55,
+            56, 57, 58, 59, 60, 61, 62, 63,
+        );
         let r = avx2::_mm256_adds_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x32::new(
-            32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
-            64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94);
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+            64, 66, 68, 70, 72, 74, 76, 78,
+            80, 82, 84, 86, 88, 90, 92, 94,
+        );
         assert_eq!(r, e);
     }
 
@@ -2204,13 +2311,19 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_adds_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i16x16::new(
-            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47);
-        let r = avx2::_mm256_adds_epi16(a,  b);
+            32, 33, 34, 35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44, 45, 46, 47,
+        );
+        let r = avx2::_mm256_adds_epi16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i16x16::new(
-            32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62);
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+        );
 
         assert_eq!(r, e);
     }
@@ -2233,16 +2346,28 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_adds_epu8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = u8x32::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+            16, 17, 18, 19, 20, 21, 22, 23,
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = u8x32::new(
-            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-            48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
+            32, 33, 34, 35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44, 45, 46, 47,
+            48, 49, 50, 51, 52, 53, 54, 55,
+            56, 57, 58, 59, 60, 61, 62, 63,
+        );
         let r = avx2::_mm256_adds_epu8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = u8x32::new(
-            32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
-            64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94);
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+            64, 66, 68, 70, 72, 74, 76, 78,
+            80, 82, 84, 86, 88, 90, 92, 94,
+        );
         assert_eq!(r, e);
     }
 
@@ -2257,13 +2382,19 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_adds_epu16() {
-        let a = u16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            u16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = u16x16::new(
-            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47);
+            32, 33, 34, 35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44, 45, 46, 47,
+        );
         let r = avx2::_mm256_adds_epu16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = u16x16::new(
-            32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62);
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+        );
 
         assert_eq!(r, e);
     }
@@ -2346,11 +2477,11 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_blendv_epi8() {
-        let (a,b) = (i8x32::splat(4),i8x32::splat(2));
-        let mask = i8x32::splat(0).replace(2,-1);
-        let e = i8x32::splat(4).replace(2,2);
-        let r= avx2::_mm256_blendv_epi8(a,b,mask);
-        assert_eq!(r,e);
+        let (a, b) = (i8x32::splat(4), i8x32::splat(2));
+        let mask = i8x32::splat(0).replace(2, -1);
+        let e = i8x32::splat(4).replace(2, 2);
+        let r = avx2::_mm256_blendv_epi8(a, b, mask);
+        assert_eq!(r, e);
     }
 
     #[simd_test = "avx2"]
@@ -2413,8 +2544,12 @@ mod tests {
     unsafe fn _mm256_broadcastsi128_si256() {
         let a = i64x2::new(0x0987654321012334, 0x5678909876543210);
         let res = avx2::_mm256_broadcastsi128_si256(a);
-        let retval = i64x4::new(0x0987654321012334, 0x5678909876543210,
-                                0x0987654321012334, 0x5678909876543210);
+        let retval = i64x4::new(
+            0x0987654321012334,
+            0x5678909876543210,
+            0x0987654321012334,
+            0x5678909876543210,
+        );
         assert_eq!(res, retval);
     }
 
@@ -2448,30 +2583,38 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_cmpeq_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i8x32::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+            16, 17, 18, 19, 20, 21, 22, 23,
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i8x32::new(
-            31, 30, 2, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
-            15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+            31, 30, 2, 28, 27, 26, 25, 24,
+            23, 22, 21, 20, 19, 18, 17, 16,
+            15, 14, 13, 12, 11, 10, 9, 8,
+            7, 6, 5, 4, 3, 2, 1, 0,
+        );
         let r = avx2::_mm256_cmpeq_epi8(a, b);
-        assert_eq!(r, i8x32::splat(0).replace(2,0xFFu8 as i8));
+        assert_eq!(r, i8x32::splat(0).replace(2, 0xFFu8 as i8));
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_cmpeq_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b = i16x16::new(
-            15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b =
+            i16x16::new(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = avx2::_mm256_cmpeq_epi16(a, b);
         assert_eq!(r, i16x16::splat(0).replace(2, 0xFFFFu16 as i16));
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_cmpeq_epi32() {
-        let a = i32x8::new(0, 1, 2, 3,4,5,6,7);
-        let b = i32x8::new(7,6,2,4,3, 2, 1, 0);
+        let a = i32x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b = i32x8::new(7, 6, 2, 4, 3, 2, 1, 0);
         let r = avx2::_mm256_cmpeq_epi32(a, b);
         assert_eq!(r, i32x8::splat(0).replace(2, 0xFFFFFFFFu32 as i32));
     }
@@ -2481,8 +2624,10 @@ mod tests {
         let a = i64x4::new(0, 1, 2, 3);
         let b = i64x4::new(3, 2, 2, 0);
         let r = avx2::_mm256_cmpeq_epi64(a, b);
-        assert_eq!(r, i64x4::splat(0).replace(
-            2, 0xFFFFFFFFFFFFFFFFu64 as i64));
+        assert_eq!(
+            r,
+            i64x4::splat(0).replace(2, 0xFFFFFFFFFFFFFFFFu64 as i64)
+        );
     }
 
     #[simd_test = "avx2"]
@@ -2514,27 +2659,33 @@ mod tests {
         let a = i64x4::splat(0).replace(0, 5);
         let b = i64x4::splat(0);
         let r = avx2::_mm256_cmpgt_epi64(a, b);
-        assert_eq!(r, i64x4::splat(0).replace(
-            0, 0xFFFFFFFFFFFFFFFFu64 as i64));
+        assert_eq!(
+            r,
+            i64x4::splat(0).replace(0, 0xFFFFFFFFFFFFFFFFu64 as i64)
+        );
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_cvtepi8_epi16() {
-        let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
-        let r = i16x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        let a =
+            i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        let r =
+            i16x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
         assert_eq!(r, avx2::_mm256_cvtepi8_epi16(a));
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_cvtepi8_epi32() {
-        let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        let a =
+            i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
         let r = i32x8::new(0, 0, -1, 1, -2, 2, -3, 3);
         assert_eq!(r, avx2::_mm256_cvtepi8_epi32(a));
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_cvtepi8_epi64() {
-        let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        let a =
+            i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
         let r = i64x4::new(0, 0, -1, 1);
         assert_eq!(r, avx2::_mm256_cvtepi8_epi64(a));
     }
@@ -2580,11 +2731,11 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_hadds_epi16() {
-        let a = i16x16::splat(2).replace(0,0x7FFF).replace(1,1);
+        let a = i16x16::splat(2).replace(0, 0x7FFF).replace(1, 1);
         let b = i16x16::splat(4);
         let r = avx2::_mm256_hadds_epi16(a, b);
-        let e = i16x16::new(
-            0x7FFF, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
+        let e =
+            i16x16::new(0x7FFF, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
         assert_eq!(r, e);
     }
 
@@ -2608,10 +2759,10 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_hsubs_epi16() {
-        let a = i16x16::splat(2).replace(0,0x7FFF).replace(1,-1);
+        let a = i16x16::splat(2).replace(0, 0x7FFF).replace(1, -1);
         let b = i16x16::splat(4);
         let r = avx2::_mm256_hsubs_epi16(a, b);
-        let e = i16x16::splat(0).replace(0,0x7FFF);
+        let e = i16x16::splat(0).replace(0, 0x7FFF);
         assert_eq!(r, e);
     }
 
@@ -2902,11 +3053,13 @@ mod tests {
         let a = i16x16::splat(2);
         let b = i16x16::splat(4);
         let r = avx2::_mm256_packs_epi16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x32::new(
             2, 2, 2, 2, 2, 2, 2, 2,
             4, 4, 4, 4, 4, 4, 4, 4,
             2, 2, 2, 2, 2, 2, 2, 2,
-            4, 4, 4, 4, 4, 4, 4, 4);
+            4, 4, 4, 4, 4, 4, 4, 4,
+        );
 
         assert_eq!(r, e);
     }
@@ -2916,11 +3069,7 @@ mod tests {
         let a = i32x8::splat(2);
         let b = i32x8::splat(4);
         let r = avx2::_mm256_packs_epi32(a, b);
-        let e = i16x16::new(
-            2, 2, 2, 2,
-            4, 4, 4, 4,
-            2, 2, 2, 2,
-            4, 4, 4, 4);
+        let e = i16x16::new(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
 
         assert_eq!(r, e);
     }
@@ -2930,11 +3079,13 @@ mod tests {
         let a = i16x16::splat(2);
         let b = i16x16::splat(4);
         let r = avx2::_mm256_packus_epi16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = u8x32::new(
             2, 2, 2, 2, 2, 2, 2, 2,
             4, 4, 4, 4, 4, 4, 4, 4,
             2, 2, 2, 2, 2, 2, 2, 2,
-            4, 4, 4, 4, 4, 4, 4, 4);
+            4, 4, 4, 4, 4, 4, 4, 4,
+        );
 
         assert_eq!(r, e);
     }
@@ -2944,11 +3095,7 @@ mod tests {
         let a = i32x8::splat(2);
         let b = i32x8::splat(4);
         let r = avx2::_mm256_packus_epi32(a, b);
-        let e = u16x16::new(
-            2, 2, 2, 2,
-            4, 4, 4, 4,
-            2, 2, 2, 2,
-            4, 4, 4, 4);
+        let e = u16x16::new(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
 
         assert_eq!(r, e);
     }
@@ -3017,21 +3164,24 @@ mod tests {
     unsafe fn _mm256_slli_epi16() {
         assert_eq!(
             avx2::_mm256_slli_epi16(i16x16::splat(0xFF), 4),
-            i16x16::splat(0xFF0));
+            i16x16::splat(0xFF0)
+        );
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_slli_epi32() {
         assert_eq!(
             avx2::_mm256_slli_epi32(i32x8::splat(0xFFFF), 4),
-            i32x8::splat(0xFFFF0));
+            i32x8::splat(0xFFFF0)
+        );
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_slli_epi64() {
         assert_eq!(
             avx2::_mm256_slli_epi64(i64x4::splat(0xFFFFFFFF), 4),
-            i64x4::splat(0xFFFFFFFF0));
+            i64x4::splat(0xFFFFFFFF0)
+        );
     }
 
     #[simd_test = "avx2"]
@@ -3090,14 +3240,16 @@ mod tests {
     unsafe fn _mm256_srai_epi16() {
         assert_eq!(
             avx2::_mm256_srai_epi16(i16x16::splat(-1), 1),
-            i16x16::splat(-1));
+            i16x16::splat(-1)
+        );
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_srai_epi32() {
         assert_eq!(
             avx2::_mm256_srai_epi32(i32x8::splat(-1), 1),
-            i32x8::splat(-1));
+            i32x8::splat(-1)
+        );
     }
 
     #[simd_test = "avx2"]
@@ -3106,7 +3258,7 @@ mod tests {
         let count = i32x4::splat(1);
         let r = avx2::_mm_srav_epi32(a, count);
         let e = i32x4::splat(2);
-        assert_eq!(r, e );
+        assert_eq!(r, e);
     }
 
     #[simd_test = "avx2"]
@@ -3115,7 +3267,7 @@ mod tests {
         let count = i32x8::splat(1);
         let r = avx2::_mm256_srav_epi32(a, count);
         let e = i32x8::splat(2);
-        assert_eq!(r, e );
+        assert_eq!(r, e);
     }
 
     #[simd_test = "avx2"]
@@ -3146,21 +3298,24 @@ mod tests {
     unsafe fn _mm256_srli_epi16() {
         assert_eq!(
             avx2::_mm256_srli_epi16(i16x16::splat(0xFF), 4),
-            i16x16::splat(0xF));
+            i16x16::splat(0xF)
+        );
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_srli_epi32() {
         assert_eq!(
             avx2::_mm256_srli_epi32(i32x8::splat(0xFFFF), 4),
-            i32x8::splat(0xFFF));
+            i32x8::splat(0xFFF)
+        );
     }
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_srli_epi64() {
         assert_eq!(
             avx2::_mm256_srli_epi64(i64x4::splat(0xFFFFFFFF), 4),
-            i64x4::splat(0xFFFFFFF));
+            i64x4::splat(0xFFFFFFF)
+        );
     }
 
     #[simd_test = "avx2"]
@@ -3274,41 +3429,51 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_alignr_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i8x32::new(
             -1, -2, -3, -4, -5, -6, -7, -8,
             -9, -10, -11, -12, -13, -14, -15, -16,
             -17, -18, -19, -20, -21, -22, -23, -24,
-            -25, -26, -27, -28, -29, -30, -31, -32);
+            -25, -26, -27, -28, -29, -30, -31, -32,
+        );
         let r = avx2::_mm256_alignr_epi8(a, b, 33);
         assert_eq!(r, i8x32::splat(0));
 
         let r = avx2::_mm256_alignr_epi8(a, b, 17);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let expected = i8x32::new(
             2, 3, 4, 5, 6, 7, 8, 9,
             10, 11, 12, 13, 14, 15, 16, 17,
             18, 19, 20, 21, 22, 23, 24, 25,
-            26, 27, 28, 29, 30, 31, 32, 0);
+            26, 27, 28, 29, 30, 31, 32, 0,
+        );
         assert_eq!(r, expected);
 
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let expected = i8x32::new(
             -17, -18, -19, -20, -21, -22, -23, -24,
             -25, -26, -27, -28, -29, -30, -31, -32,
             1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = avx2::_mm256_alignr_epi8(a, b, 16);
         assert_eq!(r, expected);
 
         let r = avx2::_mm256_alignr_epi8(a, b, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let expected = i8x32::new(
             -16, -17, -18, -19, -20, -21, -22, -23,
             -24, -25, -26, -27, -28, -29, -30, -31,
             -32, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
         assert_eq!(r, expected);
 
         let r = avx2::_mm256_alignr_epi8(a, b, 0);
@@ -3317,18 +3482,21 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn _mm256_shuffle_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = u8x32::new(
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32
+            25, 26, 27, 28, 29, 30, 31, 32,
         );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = u8x32::new(
             4, 128, 4, 3, 24, 12, 6, 19,
             12, 5, 5, 10, 4, 1, 8, 0,
             4, 128, 4, 3, 24, 12, 6, 19,
             12, 5, 5, 10, 4, 1, 8, 0,
         );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let expected = u8x32::new(
             5, 0, 5, 4, 9, 13, 7, 4,
             13, 6, 6, 11, 5, 2, 9, 1,
diff --git a/library/stdarch/src/x86/bmi.rs b/library/stdarch/src/x86/bmi.rs
index 039318991551..2cc86d800923 100644
--- a/library/stdarch/src/x86/bmi.rs
+++ b/library/stdarch/src/x86/bmi.rs
@@ -1,11 +1,16 @@
 //! Bit Manipulation Instruction (BMI) Set 1.0.
 //!
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
-//! Manual Volume 2: Instruction Set Reference,
-//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
-//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI1_.28Bit_Manipulation_Instruction_Set_1.29)
-//! provides a quick overview of the available instructions.
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.
+//! 28Advanced_Bit_Manipulation.29
+
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -32,8 +37,8 @@ pub unsafe fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
 /// Extracts bits of `a` specified by `control` into
 /// the least significant bits of the result.
 ///
-/// Bits [7,0] of `control` specify the index to the first bit in the range to be
-/// extracted, and bits [15,8] specify the length of the range.
+/// Bits [7,0] of `control` specify the index to the first bit in the range to
+/// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
 #[target_feature = "+bmi"]
 #[cfg_attr(test, assert_instr(bextr))]
@@ -44,8 +49,8 @@ pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 {
 /// Extracts bits of `a` specified by `control` into
 /// the least significant bits of the result.
 ///
-/// Bits [7,0] of `control` specify the index to the first bit in the range to be
-/// extracted, and bits [15,8] specify the length of the range.
+/// Bits [7,0] of `control` specify the index to the first bit in the range to
+/// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
 #[target_feature = "+bmi"]
 #[cfg_attr(test, assert_instr(bextr))]
@@ -177,9 +182,9 @@ pub unsafe fn _mm_tzcnt_u64(x: u64) -> u64 {
 
 #[allow(dead_code)]
 extern "C" {
-    #[link_name="llvm.x86.bmi.bextr.32"]
+    #[link_name = "llvm.x86.bmi.bextr.32"]
     fn x86_bmi_bextr_32(x: u32, y: u32) -> u32;
-    #[link_name="llvm.x86.bmi.bextr.64"]
+    #[link_name = "llvm.x86.bmi.bextr.64"]
     fn x86_bmi_bextr_64(x: u64, y: u64) -> u64;
 }
 
diff --git a/library/stdarch/src/x86/bmi2.rs b/library/stdarch/src/x86/bmi2.rs
index 22cd3b4dc14e..700c6d0e436e 100644
--- a/library/stdarch/src/x86/bmi2.rs
+++ b/library/stdarch/src/x86/bmi2.rs
@@ -1,11 +1,15 @@
 //! Bit Manipulation Instruction (BMI) Set 2.0.
 //!
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
-//! Manual Volume 2: Instruction Set Reference,
-//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectu res-software-developer-instruction-set-reference-manual-325383.pdf).
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
-//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29)
-//! provides a quick overview of the available instructions.
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.
+//! 28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -96,17 +100,17 @@ pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 {
 
 #[allow(dead_code)]
 extern "C" {
-    #[link_name="llvm.x86.bmi.bzhi.32"]
+    #[link_name = "llvm.x86.bmi.bzhi.32"]
     fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32;
-    #[link_name="llvm.x86.bmi.bzhi.64"]
+    #[link_name = "llvm.x86.bmi.bzhi.64"]
     fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64;
-    #[link_name="llvm.x86.bmi.pdep.32"]
+    #[link_name = "llvm.x86.bmi.pdep.32"]
     fn x86_bmi2_pdep_32(x: u32, y: u32) -> u32;
-    #[link_name="llvm.x86.bmi.pdep.64"]
+    #[link_name = "llvm.x86.bmi.pdep.64"]
     fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64;
-    #[link_name="llvm.x86.bmi.pext.32"]
+    #[link_name = "llvm.x86.bmi.pext.32"]
     fn x86_bmi2_pext_32(x: u32, y: u32) -> u32;
-    #[link_name="llvm.x86.bmi.pext.64"]
+    #[link_name = "llvm.x86.bmi.pext.64"]
     fn x86_bmi2_pext_64(x: u64, y: u64) -> u64;
 }
 
@@ -118,7 +122,7 @@ mod tests {
 
     #[simd_test = "bmi2"]
     unsafe fn _pext_u32() {
-        let n  = 0b1011_1110_1001_0011u32;
+        let n = 0b1011_1110_1001_0011u32;
 
         let m0 = 0b0110_0011_1000_0101u32;
         let s0 = 0b0000_0000_0011_0101u32;
@@ -133,7 +137,7 @@ mod tests {
     #[simd_test = "bmi2"]
     #[cfg(not(target_arch = "x86"))]
     unsafe fn _pext_u64() {
-        let n  = 0b1011_1110_1001_0011u64;
+        let n = 0b1011_1110_1001_0011u64;
 
         let m0 = 0b0110_0011_1000_0101u64;
         let s0 = 0b0000_0000_0011_0101u64;
@@ -147,7 +151,7 @@ mod tests {
 
     #[simd_test = "bmi2"]
     unsafe fn _pdep_u32() {
-        let n  = 0b1011_1110_1001_0011u32;
+        let n = 0b1011_1110_1001_0011u32;
 
         let m0 = 0b0110_0011_1000_0101u32;
         let s0 = 0b0000_0010_0000_0101u32;
@@ -162,7 +166,7 @@ mod tests {
     #[simd_test = "bmi2"]
     #[cfg(not(target_arch = "x86"))]
     unsafe fn _pdep_u64() {
-        let n  = 0b1011_1110_1001_0011u64;
+        let n = 0b1011_1110_1001_0011u64;
 
         let m0 = 0b0110_0011_1000_0101u64;
         let s0 = 0b0000_0010_0000_0101u64;
@@ -194,23 +198,31 @@ mod tests {
         let a: u32 = 4_294_967_200;
         let b: u32 = 2;
         let (lo, hi): (u32, u32) = bmi2::_mulx_u32(a, b);
-        // result = 8589934400
-        //        = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64
-        //            ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        /*
+result = 8589934400
+       = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64
+           ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        */
         assert_eq!(lo, 0b1111_1111_1111_1111_1111_1111_0100_0000u32);
         assert_eq!(hi, 0b0001u32);
     }
 
     #[simd_test = "bmi2"]
     #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
     unsafe fn _mulx_u64() {
         let a: u64 = 9_223_372_036_854_775_800;
         let b: u64 = 100;
         let (lo, hi): (u64, u64) = bmi2::_mulx_u64(a, b);
-        // result = 922337203685477580000
-        //        = 0b00110001_11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u128
-        //            ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-        assert_eq!(lo, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64);
+        /*
+result = 922337203685477580000 =
+0b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000
+  ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        */
+        assert_eq!(
+            lo,
+            0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64
+        );
         assert_eq!(hi, 0b00110001u64);
     }
 }
diff --git a/library/stdarch/src/x86/macros.rs b/library/stdarch/src/x86/macros.rs
index 46b3f97d8a60..e835fe210709 100644
--- a/library/stdarch/src/x86/macros.rs
+++ b/library/stdarch/src/x86/macros.rs
@@ -348,5 +348,3 @@ macro_rules! assert_approx_eq {
                  *a, *b, $eps, (*a - *b).abs());
     })
 }
-
-
diff --git a/library/stdarch/src/x86/mod.rs b/library/stdarch/src/x86/mod.rs
index 6dbdb3e247e4..a046c453c766 100644
--- a/library/stdarch/src/x86/mod.rs
+++ b/library/stdarch/src/x86/mod.rs
@@ -12,7 +12,7 @@ pub use self::bmi::*;
 pub use self::bmi2::*;
 pub use self::tbm::*;
 
-pub use self::runtime::{__Feature, __unstable_detect_feature};
+pub use self::runtime::{__unstable_detect_feature, __Feature};
 
 #[allow(non_camel_case_types)]
 pub type __m128i = ::v128::i8x16;
diff --git a/library/stdarch/src/x86/runtime.rs b/library/stdarch/src/x86/runtime.rs
index 4071a1ce587d..5d44c8793435 100644
--- a/library/stdarch/src/x86/runtime.rs
+++ b/library/stdarch/src/x86/runtime.rs
@@ -1,9 +1,9 @@
 //! This module implements minimal run-time feature detection for x86.
 //!
-//! The features are detected using the `detect_features` function below. This function
-//! uses the CPUID instruction to read the feature flags from the CPU and encodes them in
-//! an `usize` where each bit position represents whether a feature is available (bit is set)
-//! or unavaiable (bit is cleared).
+//! The features are detected using the `detect_features` function below.
+//! This function uses the CPUID instruction to read the feature flags from the
+//! CPU and encodes them in an `usize` where each bit position represents
+//! whether a feature is available (bit is set) or unavaiable (bit is cleared).
 //!
 //! The enum `__Feature` is used to map bit positions to feature names, and the
 //! the `__unstable_detect_feature!` macro is used to map string literals (e.g.
@@ -12,10 +12,10 @@
 //!
 //! The run-time feature detection is performed by the
 //! `__unstable_detect_feature(__Feature) -> bool` function. On its first call,
-//! this functions queries the CPU for the available features and stores them in
-//! a global `AtomicUsize` variable. The query is performed by just checking whether the
-//! feature bit in this global variable is set or cleared.
-use ::std::sync::atomic::{AtomicUsize, Ordering};
+//! this functions queries the CPU for the available features and stores them
+//! in a global `AtomicUsize` variable. The query is performed by just checking
+//! whether the feature bit in this global variable is set or cleared.
+use std::sync::atomic::{AtomicUsize, Ordering};
 
 /// This macro maps the string-literal feature names to values of the
 /// `__Feature` enum at compile-time. The feature names used are the same as
@@ -26,22 +26,68 @@ use ::std::sync::atomic::{AtomicUsize, Ordering};
 #[macro_export]
 #[doc(hidden)]
 macro_rules! __unstable_detect_feature {
-    ("sse") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse{}) };
-    ("sse2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse2{}) };
-    ("sse3") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse3{}) };
-    ("ssse3") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::ssse3{}) };
-    ("sse4.1") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse4_1{}) };
-    ("sse4.2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse4_2{}) };
-    ("avx") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::avx{}) };
-    ("avx2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::avx2{}) };
-    ("fma") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::fma{}) };
-    ("bmi") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::bmi{}) };
-    ("bmi2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::bmi2{}) };
-    ("abm") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::abm{}) };
-    ("lzcnt") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::abm{}) };
-    ("tbm") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::tbm{}) };
-    ("popcnt") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::popcnt{}) };
-    ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
+    ("sse") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse{})  };
+    ("sse2") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse2{})
+    };
+    ("sse3") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse3{})
+    };
+    ("ssse3") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::ssse3{})
+    };
+    ("sse4.1") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse4_1{})
+    };
+    ("sse4.2") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse4_2{})
+    };
+    ("avx") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::avx{})
+    };
+    ("avx2") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::avx2{})
+    };
+    ("fma") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::fma{})
+    };
+    ("bmi") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::bmi{})
+    };
+    ("bmi2") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::bmi2{})
+    };
+    ("abm") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::abm{})
+    };
+    ("lzcnt") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::abm{})
+    };
+    ("tbm") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::tbm{})
+    };
+    ("popcnt") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::popcnt{})
+    };
+    ($t:tt) => {
+        compile_error!(concat!("unknown target feature: ", $t))
+    };
 }
 
 /// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
@@ -74,15 +120,15 @@ pub enum __Feature {
     bmi,
     /// BMI1 (Bit Manipulation Instructions 2)
     bmi2,
-    /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero Count) on Intel
+    /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
+    /// Count) on Intel
     abm,
     /// TBM (Trailing Bit Manipulation)
     tbm,
     /// POPCNT (Population Count)
     popcnt,
 
-    #[doc(hidden)]
-    __NonExhaustive
+    #[doc(hidden)] __NonExhaustive,
 }
 
 fn set_bit(x: usize, bit: u32) -> usize {
@@ -102,14 +148,19 @@ fn inv_test_bit(v: usize, idx: u32) -> bool {
 
 /// Run-time feature detection on x86 works by using the CPUID instruction.
 ///
-/// The [CPUID Wikipedia page](https://en.wikipedia.org/wiki/CPUID) contains all
-/// the information about which flags to set to query which values, and in which
-/// registers these are reported.
+/// The [CPUID Wikipedia page][wiki_cpuid] contains
+/// all the information about which flags to set to query which values, and in
+/// which registers these are reported.
 ///
 /// The definitive references are:
-/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
-/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
+/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+///   Instruction Set Reference, A-Z][intel64_ref].
+/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+///   System Instructions][amd64_ref].
 ///
+/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
+/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
 fn detect_features() -> usize {
     let ebx;
     let ecx;
@@ -119,14 +170,16 @@ fn detect_features() -> usize {
         /// To obtain all feature flags we need two CPUID queries:
 
         /// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
-        /// This gives us most of the CPU features in ECX and EDX (see below),
+        /// This gives us most of the CPU features in ECX and EDX (see
+        /// below),
         asm!("cpuid"
              : "={ecx}"(ecx), "={edx}"(edx)
              : "{eax}"(0x00000001u32), "{ecx}"(0 as u32)
              : :);
 
         /// 2. EAX=7, ECX=0: Queries "Extended Features"
-        /// This gives us information about bmi,bmi2, and avx2 support (see below).
+        /// This gives us information about bmi,bmi2, and avx2 support
+        /// (see below).
         asm!("cpuid"
              : "={ebx}"(ebx)
              : "{eax}"(0x00000007u32), "{ecx}"(0 as u32)
@@ -135,36 +188,65 @@ fn detect_features() -> usize {
 
     let mut value: usize = 0;
 
-    // CPUID call with EAX=7, ECX=0 => Extended Features in EBX and ECX (unneeded):
-    if inv_test_bit(ebx, 3) { value = set_bit(value, __Feature::bmi as u32); }
-    if inv_test_bit(ebx, 5) { value = set_bit(value, __Feature::avx2 as u32); }
-    if inv_test_bit(ebx, 8) { value = set_bit(value, __Feature::bmi2 as u32); }
+    // CPUID call with EAX=7, ECX=0 => Extended Features in EBX and ECX
+    // (the result in ECX is not currently needed):
+    if inv_test_bit(ebx, 3) {
+        value = set_bit(value, __Feature::bmi as u32);
+    }
+    if inv_test_bit(ebx, 5) {
+        value = set_bit(value, __Feature::avx2 as u32);
+    }
+    if inv_test_bit(ebx, 8) {
+        value = set_bit(value, __Feature::bmi2 as u32);
+    }
 
     // CPUID call with EAX=1 => feature bits in ECX and EDX:
-    if inv_test_bit(ecx, 0) { value = set_bit(value, __Feature::sse3 as u32); }
-    if inv_test_bit(ecx, 5) { value = set_bit(value, __Feature::abm as u32); }
-    if inv_test_bit(ecx, 9) { value = set_bit(value, __Feature::ssse3 as u32); }
-    if inv_test_bit(ecx, 12) { value = set_bit(value, __Feature::fma as u32); }
-    if inv_test_bit(ecx, 19) { value = set_bit(value, __Feature::sse4_1 as u32); }
-    if inv_test_bit(ecx, 20) { value = set_bit(value, __Feature::sse4_2 as u32); }
-    if inv_test_bit(ecx, 21) { value = set_bit(value, __Feature::tbm as u32); }
-    if inv_test_bit(ecx, 23) { value = set_bit(value, __Feature::popcnt as u32); }
-    if inv_test_bit(ecx, 28) { value = set_bit(value, __Feature::avx as u32); }
+    if inv_test_bit(ecx, 0) {
+        value = set_bit(value, __Feature::sse3 as u32);
+    }
+    if inv_test_bit(ecx, 5) {
+        value = set_bit(value, __Feature::abm as u32);
+    }
+    if inv_test_bit(ecx, 9) {
+        value = set_bit(value, __Feature::ssse3 as u32);
+    }
+    if inv_test_bit(ecx, 12) {
+        value = set_bit(value, __Feature::fma as u32);
+    }
+    if inv_test_bit(ecx, 19) {
+        value = set_bit(value, __Feature::sse4_1 as u32);
+    }
+    if inv_test_bit(ecx, 20) {
+        value = set_bit(value, __Feature::sse4_2 as u32);
+    }
+    if inv_test_bit(ecx, 21) {
+        value = set_bit(value, __Feature::tbm as u32);
+    }
+    if inv_test_bit(ecx, 23) {
+        value = set_bit(value, __Feature::popcnt as u32);
+    }
+    if inv_test_bit(ecx, 28) {
+        value = set_bit(value, __Feature::avx as u32);
+    }
 
-    if inv_test_bit(edx, 25) { value = set_bit(value, __Feature::sse as u32); }
-    if inv_test_bit(edx, 26) { value = set_bit(value, __Feature::sse2 as u32); }
+    if inv_test_bit(edx, 25) {
+        value = set_bit(value, __Feature::sse as u32);
+    }
+    if inv_test_bit(edx, 26) {
+        value = set_bit(value, __Feature::sse2 as u32);
+    }
 
     value
 }
 
-/// This global variable is a bitset used to cache the features supported by the
-/// CPU.
+/// This global variable is a bitset used to cache the features supported by
+/// the CPU.
 static FEATURES: AtomicUsize = AtomicUsize::new(::std::usize::MAX);
 
 /// Performs run-time feature detection.
 ///
-/// On its first invocation, it detects the CPU features and caches them in the
-/// `FEATURES` global variable as an `AtomicUsize`.
+/// On its first invocation, it detects the CPU features and caches them
+/// in the `FEATURES` global variable as an `AtomicUsize`.
 ///
 /// It uses the `__Feature` variant to index into this variable as a bitset. If
 /// the bit is set, the feature is enabled, and otherwise it is disabled.
@@ -172,7 +254,7 @@ static FEATURES: AtomicUsize = AtomicUsize::new(::std::usize::MAX);
 /// PLEASE: do not use this, it is an implementation detail subject to change.
 #[doc(hidden)]
 pub fn __unstable_detect_feature(x: __Feature) -> bool {
-    if FEATURES.load(Ordering::Relaxed)  == ::std::usize::MAX {
+    if FEATURES.load(Ordering::Relaxed) == ::std::usize::MAX {
         FEATURES.store(detect_features(), Ordering::Relaxed);
     }
     test_bit(FEATURES.load(Ordering::Relaxed), x as u32)
diff --git a/library/stdarch/src/x86/sse.rs b/library/stdarch/src/x86/sse.rs
index f6f45a34de5f..5633d39c0e48 100644
--- a/library/stdarch/src/x86/sse.rs
+++ b/library/stdarch/src/x86/sse.rs
@@ -173,21 +173,23 @@ pub unsafe fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 {
 #[target_feature = "+sse"]
 // i586 only seems to generate plain `and` instructions, so ignore it.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
-    assert_instr(andps))]
+           assert_instr(andps))]
 pub unsafe fn _mm_and_ps(a: f32x4, b: f32x4) -> f32x4 {
     let aa: i32x4 = mem::transmute(a);
     let bb: i32x4 = mem::transmute(b);
     mem::transmute(aa & bb)
 }
 
-/// Bitwise AND-NOT of packed single-precision (32-bit) floating-point elements.
+/// Bitwise AND-NOT of packed single-precision (32-bit) floating-point
+/// elements.
 ///
 /// Computes `!a & b` for each bit in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+sse"]
-// i586 only seems to generate plain `not` and `and` instructions, so ignore it.
+// i586 only seems to generate plain `not` and `and` instructions, so ignore
+// it.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
-    assert_instr(andnps))]
+           assert_instr(andnps))]
 pub unsafe fn _mm_andnot_ps(a: f32x4, b: f32x4) -> f32x4 {
     let aa: i32x4 = mem::transmute(a);
     let bb: i32x4 = mem::transmute(b);
@@ -199,7 +201,7 @@ pub unsafe fn _mm_andnot_ps(a: f32x4, b: f32x4) -> f32x4 {
 #[target_feature = "+sse"]
 // i586 only seems to generate plain `or` instructions, so we ignore it.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
-    assert_instr(orps))]
+           assert_instr(orps))]
 pub unsafe fn _mm_or_ps(a: f32x4, b: f32x4) -> f32x4 {
     let aa: i32x4 = mem::transmute(a);
     let bb: i32x4 = mem::transmute(b);
@@ -212,7 +214,7 @@ pub unsafe fn _mm_or_ps(a: f32x4, b: f32x4) -> f32x4 {
 #[target_feature = "+sse"]
 // i586 only seems to generate plain `xor` instructions, so we ignore it.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
-    assert_instr(xorps))]
+           assert_instr(xorps))]
 pub unsafe fn _mm_xor_ps(a: f32x4, b: f32x4) -> f32x4 {
     let aa: i32x4 = mem::transmute(a);
     let bb: i32x4 = mem::transmute(b);
@@ -229,8 +231,8 @@ pub unsafe fn _mm_cmpeq_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 0)
 }
 
-/// Compare the lowest `f32` of both inputs for less than. The lowest 32 bits of
-/// the result will be `0xffffffff` if `a.extract(0)` is less than
+/// Compare the lowest `f32` of both inputs for less than. The lowest 32 bits
+/// of the result will be `0xffffffff` if `a.extract(0)` is less than
 /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
 /// upper 96 bits of `a`.
 #[inline(always)]
@@ -241,9 +243,9 @@ pub unsafe fn _mm_cmplt_ss(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare the lowest `f32` of both inputs for less than or equal. The lowest
-/// 32 bits of the result will be `0xffffffff` if `a.extract(0)` is less than or
-/// equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are
-/// the upper 96 bits of `a`.
+/// 32 bits of the result will be `0xffffffff` if `a.extract(0)` is less than
+/// or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
+/// are the upper 96 bits of `a`.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpless))]
@@ -251,10 +253,10 @@ pub unsafe fn _mm_cmple_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 2)
 }
 
-/// Compare the lowest `f32` of both inputs for greater than. The lowest 32 bits of
-/// the result will be `0xffffffff` if `a.extract(0)` is greater than
-/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
-/// upper 96 bits of `a`.
+/// Compare the lowest `f32` of both inputs for greater than. The lowest 32
+/// bits of the result will be `0xffffffff` if `a.extract(0)` is greater
+/// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
+/// are the upper 96 bits of `a`.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpltss))]
@@ -264,8 +266,8 @@ pub unsafe fn _mm_cmpgt_ss(a: f32x4, b: f32x4) -> f32x4 {
 
 /// Compare the lowest `f32` of both inputs for greater than or equal. The
 /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is
-/// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of
-/// the result are the upper 96 bits of `a`.
+/// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits
+/// of the result are the upper 96 bits of `a`.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpless))]
@@ -297,8 +299,8 @@ pub unsafe fn _mm_cmpnlt_ss(a: f32x4, b: f32x4) -> f32x4 {
 
 /// Compare the lowest `f32` of both inputs for not-less-than-or-equal. The
 /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not
-/// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits of
-/// the result are the upper 96 bits of `a`.
+/// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits
+/// of the result are the upper 96 bits of `a`.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpnless))]
@@ -319,8 +321,8 @@ pub unsafe fn _mm_cmpngt_ss(a: f32x4, b: f32x4) -> f32x4 {
 
 /// Compare the lowest `f32` of both inputs for not-greater-than-or-equal. The
 /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not
-/// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits
-/// of the result are the upper 96 bits of `a`.
+/// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96
+/// bits of the result are the upper 96 bits of `a`.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpnless))]
@@ -361,8 +363,8 @@ pub unsafe fn _mm_cmpeq_ps(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare each of the four floats in `a` to the corresponding element in `b`.
-/// The result in the output vector will be `0xffffffff` if the input element in
-/// `a` is less than the corresponding element in `b`, or `0` otherwise.
+/// The result in the output vector will be `0xffffffff` if the input element
+/// in `a` is less than the corresponding element in `b`, or `0` otherwise.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpltps))]
@@ -371,8 +373,8 @@ pub unsafe fn _mm_cmplt_ps(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare each of the four floats in `a` to the corresponding element in `b`.
-/// The result in the output vector will be `0xffffffff` if the input element in
-/// `a` is less than or equal to the corresponding element in `b`, or `0`
+/// The result in the output vector will be `0xffffffff` if the input element
+/// in `a` is less than or equal to the corresponding element in `b`, or `0`
 /// otherwise.
 #[inline(always)]
 #[target_feature = "+sse"]
@@ -382,8 +384,8 @@ pub unsafe fn _mm_cmple_ps(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare each of the four floats in `a` to the corresponding element in `b`.
-/// The result in the output vector will be `0xffffffff` if the input element in
-/// `a` is greater than the corresponding element in `b`, or `0` otherwise.
+/// The result in the output vector will be `0xffffffff` if the input element
+/// in `a` is greater than the corresponding element in `b`, or `0` otherwise.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpltps))]
@@ -392,8 +394,8 @@ pub unsafe fn _mm_cmpgt_ps(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare each of the four floats in `a` to the corresponding element in `b`.
-/// The result in the output vector will be `0xffffffff` if the input element in
-/// `a` is greater than or equal to the corresponding element in `b`, or `0`
+/// The result in the output vector will be `0xffffffff` if the input element
+/// in `a` is greater than or equal to the corresponding element in `b`, or `0`
 /// otherwise.
 #[inline(always)]
 #[target_feature = "+sse"]
@@ -413,8 +415,9 @@ pub unsafe fn _mm_cmpneq_ps(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare each of the four floats in `a` to the corresponding element in `b`.
-/// The result in the output vector will be `0xffffffff` if the input element in
-/// `a` is *not* less than the corresponding element in `b`, or `0` otherwise.
+/// The result in the output vector will be `0xffffffff` if the input element
+/// in `a` is *not* less than the corresponding element in `b`, or `0`
+/// otherwise.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpnltps))]
@@ -423,9 +426,9 @@ pub unsafe fn _mm_cmpnlt_ps(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare each of the four floats in `a` to the corresponding element in `b`.
-/// The result in the output vector will be `0xffffffff` if the input element in
-/// `a` is *not* less than or equal to the corresponding element in `b`, or `0`
-/// otherwise.
+/// The result in the output vector will be `0xffffffff` if the input element
+/// in `a` is *not* less than or equal to the corresponding element in `b`, or
+/// `0` otherwise.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpnleps))]
@@ -434,8 +437,8 @@ pub unsafe fn _mm_cmpnle_ps(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare each of the four floats in `a` to the corresponding element in `b`.
-/// The result in the output vector will be `0xffffffff` if the input element in
-/// `a` is *not* greater than the corresponding element in `b`, or `0`
+/// The result in the output vector will be `0xffffffff` if the input element
+/// in `a` is *not* greater than the corresponding element in `b`, or `0`
 /// otherwise.
 #[inline(always)]
 #[target_feature = "+sse"]
@@ -445,9 +448,9 @@ pub unsafe fn _mm_cmpngt_ps(a: f32x4, b: f32x4) -> f32x4 {
 }
 
 /// Compare each of the four floats in `a` to the corresponding element in `b`.
-/// The result in the output vector will be `0xffffffff` if the input element in
-/// `a` is *not* greater than or equal to the corresponding element in `b`, or
-/// `0` otherwise.
+/// The result in the output vector will be `0xffffffff` if the input element
+/// in `a` is *not* greater than or equal to the corresponding element in `b`,
+/// or `0` otherwise.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(test, assert_instr(cmpnleps))]
@@ -642,7 +645,8 @@ pub unsafe fn _mm_cvtss_si64(a: f32x4) -> i64 {
 // pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2
 // pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { _mm_cvtps_pi32(a) }
 
-/// Convert the lowest 32 bit float in the input vector to a 32 bit integer with
+/// Convert the lowest 32 bit float in the input vector to a 32 bit integer
+/// with
 /// truncation.
 ///
 /// The result is rounded always using truncation (round towards zero). If the
@@ -666,8 +670,8 @@ pub unsafe fn _mm_cvtt_ss2si(a: f32x4) -> i32 {
     _mm_cvttss_si32(a)
 }
 
-/// Convert the lowest 32 bit float in the input vector to a 64 bit integer with
-/// truncation.
+/// Convert the lowest 32 bit float in the input vector to a 64 bit integer
+/// with truncation.
 ///
 /// The result is rounded always using truncation (round towards zero). If the
 /// result cannot be represented as a 64 bit integer the result will be
@@ -765,8 +769,8 @@ pub unsafe fn _mm_set_ps1(a: f32) -> f32x4 {
 
 /// Construct a `f32x4` from four floating point values highest to lowest.
 ///
-/// Note that `a` will be the highest 32 bits of the result, and `d` the lowest.
-/// This matches the standard way of writing bit patterns on x86:
+/// Note that `a` will be the highest 32 bits of the result, and `d` the
+/// lowest. This matches the standard way of writing bit patterns on x86:
 ///
 /// ```text
 ///  bit    127 .. 96  95 .. 64  63 .. 32  31 .. 0
@@ -884,8 +888,8 @@ pub unsafe fn _mm_unpacklo_ps(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, b, [0, 4, 1, 5])
 }
 
-/// Combine higher half of `a` and `b`. The highwe half of `b` occupies the lower
-/// half of result.
+/// Combine higher half of `a` and `b`. The highwe half of `b` occupies the
+/// lower half of result.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(all(test, not(windows)), assert_instr(movhlps))]
@@ -895,8 +899,8 @@ pub unsafe fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, b, [6, 7, 2, 3])
 }
 
-/// Combine lower half of `a` and `b`. The lower half of `b` occupies the higher
-/// half of result.
+/// Combine lower half of `a` and `b`. The lower half of `b` occupies the
+/// higher half of result.
 #[inline(always)]
 #[target_feature = "+sse"]
 #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
@@ -957,9 +961,9 @@ pub unsafe fn _mm_movemask_ps(a: f32x4) -> i32 {
 // 32-bit codegen does not generate `movhps` or `movhpd`, but instead
 // `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2).
 #[cfg_attr(all(test, target_arch = "x86", target_feature = "sse2"),
-    assert_instr(unpcklpd))]
+           assert_instr(unpcklpd))]
 #[cfg_attr(all(test, target_arch = "x86", not(target_feature = "sse2")),
-    assert_instr(unpcklps))]
+           assert_instr(unpcklps))]
 // TODO: This function is actually not limited to floats, but that's what
 // what matches the C type most closely: (__m128, *const __m64) -> __m128
 pub unsafe fn _mm_loadh_pi(a: f32x4, p: *const f32) -> f32x4 {
@@ -1008,10 +1012,10 @@ pub unsafe fn _mm_loadh_pi(a: f32x4, p: *const f32) -> f32x4 {
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))]
 // On 32-bit targets with SSE2, it just generates two `movsd`.
 #[cfg_attr(all(test, target_arch = "x86", target_feature = "sse2"),
-    assert_instr(movsd))]
+           assert_instr(movsd))]
 // It should really generate "movlps", but oh well...
 #[cfg_attr(all(test, target_arch = "x86", not(target_feature = "sse2")),
-    assert_instr(movss))]
+           assert_instr(movss))]
 // TODO: Like _mm_loadh_pi, this also isn't limited to floats.
 pub unsafe fn _mm_loadl_pi(a: f32x4, p: *const f32) -> f32x4 {
     let q = p as *const f32x2;
@@ -1056,7 +1060,8 @@ pub unsafe fn _mm_load_ps1(p: *const f32) -> f32x4 {
 /// is not aligned to a 128-bit boundary (16 bytes) a general protection fault
 /// will be triggered (fatal program crash).
 ///
-/// Use [`_mm_loadu_ps`](fn._mm_loadu_ps.html) for potentially unaligned memory.
+/// Use [`_mm_loadu_ps`](fn._mm_loadu_ps.html) for potentially unaligned
+/// memory.
 ///
 /// This corresponds to instructions `VMOVAPS` / `MOVAPS`.
 #[inline(always)]
@@ -1066,8 +1071,10 @@ pub unsafe fn _mm_load_ps(p: *const f32) -> f32x4 {
     *(p as *const f32x4)
 }
 
-/// Load four `f32` values from memory into a `f32x4`. There are no restrictions
-/// on memory alignment. For aligned memory [`_mm_load_ps`](fn._mm_load_ps.html)
+/// Load four `f32` values from memory into a `f32x4`. There are no
+/// restrictions
+/// on memory alignment. For aligned memory
+/// [`_mm_load_ps`](fn._mm_load_ps.html)
 /// may be faster.
 ///
 /// This corresponds to instructions `VMOVUPS` / `MOVUPS`.
@@ -1081,7 +1088,8 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> f32x4 {
     ptr::copy_nonoverlapping(
         p as *const u8,
         &mut dst as *mut f32x4 as *mut u8,
-        mem::size_of::<f32x4>());
+        mem::size_of::<f32x4>(),
+    );
     dst
 }
 
@@ -1117,10 +1125,11 @@ pub unsafe fn _mm_loadr_ps(p: *const f32) -> f32x4 {
 /// choose to generate an equivalent sequence of other instructions.
 #[inline(always)]
 #[target_feature = "+sse"]
-// On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's fine.
+// On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's
+// fine.
 // On i586 (no SSE2) it just generates plain MOV instructions.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
-    assert_instr(movhpd))]
+           assert_instr(movhpd))]
 pub unsafe fn _mm_storeh_pi(p: *mut u64, a: f32x4) {
     if cfg!(target_arch = "x86") {
         // If this is a `f64x2` then on i586, LLVM generates fldl & fstpl which
@@ -1128,7 +1137,8 @@ pub unsafe fn _mm_storeh_pi(p: *mut u64, a: f32x4) {
         let a64: u64x2 = mem::transmute(a);
         let a_hi = a64.extract(1);
         *p = mem::transmute(a_hi);
-    } else { // target_arch = "x86_64"
+    } else {
+        // target_arch = "x86_64"
         // If this is a `u64x2` LLVM generates a pshufd + movq, but we really
         // want a a MOVHPD or MOVHPS here.
         let a64: f64x2 = mem::transmute(a);
@@ -1146,11 +1156,11 @@ pub unsafe fn _mm_storeh_pi(p: *mut u64, a: f32x4) {
 // On i586 the codegen just generates plane MOVs. No need to test for that.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2"),
                not(target_family = "windows")),
-    assert_instr(movlps))]
+           assert_instr(movlps))]
 // Win64 passes `a` by reference, which causes it to generate two 64 bit moves.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2"),
                target_family = "windows"),
-    assert_instr(movsd))]
+           assert_instr(movsd))]
 pub unsafe fn _mm_storel_pi(p: *mut u64, a: f32x4) {
     if cfg!(target_arch = "x86") {
         // Same as for _mm_storeh_pi: i586 code gen would use floating point
@@ -1158,7 +1168,8 @@ pub unsafe fn _mm_storel_pi(p: *mut u64, a: f32x4) {
         let a64: u64x2 = mem::transmute(a);
         let a_hi = a64.extract(0);
         *p = mem::transmute(a_hi);
-    } else { // target_arch = "x86_64"
+    } else {
+        // target_arch = "x86_64"
         let a64: f64x2 = mem::transmute(a);
         let a_hi = a64.extract(0);
         *p = mem::transmute(a_hi);
@@ -1235,7 +1246,8 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: f32x4) {
     ptr::copy_nonoverlapping(
         &a as *const f32x4 as *const u8,
         p as *mut u8,
-        mem::size_of::<f32x4>());
+        mem::size_of::<f32x4>(),
+    );
 }
 
 /// Store four 32-bit floats into *aligned* memory in reverse order.
@@ -1309,7 +1321,8 @@ pub unsafe fn _mm_getcsr() -> u32 {
 /// * *Exception flags* report which exceptions occurred since last they were
 /// reset.
 ///
-/// * *Masking flags* can be used to mask (ignore) certain exceptions. By default
+/// * *Masking flags* can be used to mask (ignore) certain exceptions. By
+/// default
 /// these flags are all set to 1, so all exceptions are masked. When an
 /// an exception is masked, the processor simply sets the exception flag and
 /// continues the operation. If the exception is unmasked, the flag is also set
@@ -1332,11 +1345,13 @@ pub unsafe fn _mm_getcsr() -> u32 {
 /// * `_MM_EXCEPT_DIV_ZERO`: Division by zero occured.
 ///
 /// * `_MM_EXCEPT_OVERFLOW`: A numeric overflow exception occured, i.e., a
-///   result was too large to be represented (e.g., an `f32` with absolute value
+/// result was too large to be represented (e.g., an `f32` with absolute
+/// value
 ///   greater than `2^128`).
 ///
 /// * `_MM_EXCEPT_UNDERFLOW`: A numeric underflow exception occured, i.e., a
-///   result was too small to be represented in a normalized way (e.g., an `f32`
+/// result was too small to be represented in a normalized way (e.g., an
+/// `f32`
 ///   with absulte value smaller than `2^-126`.)
 ///
 /// * `_MM_EXCEPT_INEXACT`: An inexact-result exception occured (a.k.a.
@@ -1374,10 +1389,12 @@ pub unsafe fn _mm_getcsr() -> u32 {
 /// exception, use:
 ///
 /// ```rust,ignore
-/// _mm_setcsr(_mm_getcsr() & !_MM_MASK_UNDERFLOW);  // unmask underflow exception
+/// _mm_setcsr(_mm_getcsr() & !_MM_MASK_UNDERFLOW);  // unmask underflow
+/// exception
 /// ```
 ///
-/// Warning: an unmasked exception will cause an exception handler to be called.
+/// Warning: an unmasked exception will cause an exception handler to be
+/// called.
 /// The standard handler will simply terminate the process. So, in this case
 /// any underflow exception would terminate the current process with something
 /// like `signal: 8, SIGFPE: erroneous arithmetic operation`.
@@ -1427,48 +1444,48 @@ pub unsafe fn _mm_setcsr(val: u32) {
 }
 
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_EXCEPT_INVALID: u32    = 0x0001;
+pub const _MM_EXCEPT_INVALID: u32 = 0x0001;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_EXCEPT_DENORM: u32     = 0x0002;
+pub const _MM_EXCEPT_DENORM: u32 = 0x0002;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_EXCEPT_DIV_ZERO: u32   = 0x0004;
+pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_EXCEPT_OVERFLOW: u32   = 0x0008;
+pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_EXCEPT_UNDERFLOW: u32  = 0x0010;
+pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_EXCEPT_INEXACT: u32    = 0x0020;
-pub const _MM_EXCEPT_MASK: u32       = 0x003f;
+pub const _MM_EXCEPT_INEXACT: u32 = 0x0020;
+pub const _MM_EXCEPT_MASK: u32 = 0x003f;
 
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_MASK_INVALID: u32      = 0x0080;
+pub const _MM_MASK_INVALID: u32 = 0x0080;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_MASK_DENORM: u32       = 0x0100;
+pub const _MM_MASK_DENORM: u32 = 0x0100;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_MASK_DIV_ZERO: u32     = 0x0200;
+pub const _MM_MASK_DIV_ZERO: u32 = 0x0200;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_MASK_OVERFLOW: u32     = 0x0400;
+pub const _MM_MASK_OVERFLOW: u32 = 0x0400;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_MASK_UNDERFLOW: u32    = 0x0800;
+pub const _MM_MASK_UNDERFLOW: u32 = 0x0800;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_MASK_INEXACT: u32      = 0x1000;
-pub const _MM_MASK_MASK: u32         = 0x1f80;
+pub const _MM_MASK_INEXACT: u32 = 0x1000;
+pub const _MM_MASK_MASK: u32 = 0x1f80;
 
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_ROUND_NEAREST: u32     = 0x0000;
+pub const _MM_ROUND_NEAREST: u32 = 0x0000;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_ROUND_DOWN: u32        = 0x2000;
+pub const _MM_ROUND_DOWN: u32 = 0x2000;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_ROUND_UP: u32          = 0x4000;
+pub const _MM_ROUND_UP: u32 = 0x4000;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
-pub const _MM_ROUND_MASK: u32        = 0x6000;
+pub const _MM_ROUND_MASK: u32 = 0x6000;
 
-pub const _MM_FLUSH_ZERO_MASK: u32   = 0x8000;
+pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_FLUSH_ZERO_ON: u32     = 0x8000;
+pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
-pub const _MM_FLUSH_ZERO_OFF: u32    = 0x0000;
+pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
 
 #[inline(always)]
 #[allow(non_snake_case)]
@@ -1517,7 +1534,7 @@ pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
 #[target_feature = "+sse"]
 pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
     let val = (_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | x;
-    //println!("setting csr={:x}", val);
+    // println!("setting csr={:x}", val);
     _mm_setcsr(val)
 }
 
@@ -1584,8 +1601,8 @@ pub const _MM_HINT_NTA: i8 = 0;
 #[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))]
 #[cfg_attr(test, assert_instr(prefetchnta, strategy = _MM_HINT_NTA))]
 pub unsafe fn _mm_prefetch(p: *const c_void, strategy: i8) {
-    // The `strategy` must be a compile-time constant, so we use a short form of
-    // `constify_imm8!` for now.
+    // The `strategy` must be a compile-time constant, so we use a short form
+    // of `constify_imm8!` for now.
     // We use the `llvm.prefetch` instrinsic with `rw` = 0 (read), and
     // `cache type` = 1 (data cache). `locality` is based on our `strategy`.
     macro_rules! pref {
@@ -1612,7 +1629,9 @@ pub unsafe fn _mm_undefined_ps() -> f32x4 {
 #[inline(always)]
 #[allow(non_snake_case)]
 #[target_feature = "+sse"]
-pub unsafe fn _MM_TRANSPOSE4_PS(row0: &mut f32x4, row1: &mut f32x4, row2: &mut f32x4, row3: &mut f32x4) {
+pub unsafe fn _MM_TRANSPOSE4_PS(
+    row0: &mut f32x4, row1: &mut f32x4, row2: &mut f32x4, row3: &mut f32x4
+) {
     let tmp0 = _mm_unpacklo_ps(*row0, *row1);
     let tmp2 = _mm_unpacklo_ps(*row2, *row3);
     let tmp1 = _mm_unpackhi_ps(*row0, *row1);
@@ -1625,7 +1644,7 @@ pub unsafe fn _MM_TRANSPOSE4_PS(row0: &mut f32x4, row1: &mut f32x4, row2: &mut f
 }
 
 #[allow(improper_ctypes)]
-extern {
+extern "C" {
     #[link_name = "llvm.x86.sse.add.ss"]
     fn addss(a: f32x4, b: f32x4) -> f32x4;
     #[link_name = "llvm.x86.sse.sub.ss"]
@@ -1709,7 +1728,7 @@ mod tests {
     use v128::*;
     use x86::sse;
     use stdsimd_test::simd_test;
-    use test::black_box;  // Used to inhibit constant-folding.
+    use test::black_box; // Used to inhibit constant-folding.
 
     #[simd_test = "sse"]
     unsafe fn _mm_add_ps() {
@@ -1934,9 +1953,9 @@ mod tests {
         let c = f32x4::new(1.0, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = 0u32;  // a.extract(0) < b.extract(0)
-        let c1 = 0u32;  // a.extract(0) < c.extract(0)
-        let d1 = !0u32;  // a.extract(0) < d.extract(0)
+        let b1 = 0u32; // a.extract(0) < b.extract(0)
+        let c1 = 0u32; // a.extract(0) < c.extract(0)
+        let d1 = !0u32; // a.extract(0) < d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmplt_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -1960,9 +1979,9 @@ mod tests {
         let c = f32x4::new(1.0, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = 0u32;  // a.extract(0) <= b.extract(0)
-        let c1 = !0u32;  // a.extract(0) <= c.extract(0)
-        let d1 = !0u32;  // a.extract(0) <= d.extract(0)
+        let b1 = 0u32; // a.extract(0) <= b.extract(0)
+        let c1 = !0u32; // a.extract(0) <= c.extract(0)
+        let d1 = !0u32; // a.extract(0) <= d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmple_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -1986,9 +2005,9 @@ mod tests {
         let c = f32x4::new(1.0, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = !0u32;  // a.extract(0) > b.extract(0)
-        let c1 = 0u32;  // a.extract(0) > c.extract(0)
-        let d1 = 0u32;  // a.extract(0) > d.extract(0)
+        let b1 = !0u32; // a.extract(0) > b.extract(0)
+        let c1 = 0u32; // a.extract(0) > c.extract(0)
+        let d1 = 0u32; // a.extract(0) > d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmpgt_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -2012,9 +2031,9 @@ mod tests {
         let c = f32x4::new(1.0, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = !0u32;  // a.extract(0) >= b.extract(0)
-        let c1 = !0u32;  // a.extract(0) >= c.extract(0)
-        let d1 = 0u32;  // a.extract(0) >= d.extract(0)
+        let b1 = !0u32; // a.extract(0) >= b.extract(0)
+        let c1 = !0u32; // a.extract(0) >= c.extract(0)
+        let d1 = 0u32; // a.extract(0) >= d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmpge_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -2038,9 +2057,9 @@ mod tests {
         let c = f32x4::new(1.0, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = !0u32;  // a.extract(0) != b.extract(0)
-        let c1 = 0u32;  // a.extract(0) != c.extract(0)
-        let d1 = !0u32;  // a.extract(0) != d.extract(0)
+        let b1 = !0u32; // a.extract(0) != b.extract(0)
+        let c1 = 0u32; // a.extract(0) != c.extract(0)
+        let d1 = !0u32; // a.extract(0) != d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmpneq_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -2058,8 +2077,9 @@ mod tests {
     #[simd_test = "sse"]
     unsafe fn _mm_cmpnlt_ss() {
         // TODO: This test is exactly the same as for _mm_cmpge_ss, but there
-        // must be a difference. It may have to do with behavior in the presence
-        // of NaNs (signaling or quiet). If so, we should add tests for those.
+        // must be a difference. It may have to do with behavior in the
+        // presence of NaNs (signaling or quiet). If so, we should add tests
+        // for those.
         use std::mem::transmute;
 
         let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
@@ -2067,9 +2087,9 @@ mod tests {
         let c = f32x4::new(1.0, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = !0u32;  // a.extract(0) >= b.extract(0)
-        let c1 = !0u32;  // a.extract(0) >= c.extract(0)
-        let d1 = 0u32;  // a.extract(0) >= d.extract(0)
+        let b1 = !0u32; // a.extract(0) >= b.extract(0)
+        let c1 = !0u32; // a.extract(0) >= c.extract(0)
+        let d1 = 0u32; // a.extract(0) >= d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmpnlt_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -2087,7 +2107,8 @@ mod tests {
     #[simd_test = "sse"]
     unsafe fn _mm_cmpnle_ss() {
         // TODO: This test is exactly the same as for _mm_cmpgt_ss, but there
-        // must be a difference. It may have to do with behavior in the presence
+        // must be a difference. It may have to do with behavior in the
+        // presence
         // of NaNs (signaling or quiet). If so, we should add tests for those.
         use std::mem::transmute;
 
@@ -2096,9 +2117,9 @@ mod tests {
         let c = f32x4::new(1.0, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = !0u32;  // a.extract(0) > b.extract(0)
-        let c1 = 0u32;  // a.extract(0) > c.extract(0)
-        let d1 = 0u32;  // a.extract(0) > d.extract(0)
+        let b1 = !0u32; // a.extract(0) > b.extract(0)
+        let c1 = 0u32; // a.extract(0) > c.extract(0)
+        let d1 = 0u32; // a.extract(0) > d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmpnle_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -2116,8 +2137,9 @@ mod tests {
     #[simd_test = "sse"]
     unsafe fn _mm_cmpngt_ss() {
         // TODO: This test is exactly the same as for _mm_cmple_ss, but there
-        // must be a difference. It may have to do with behavior in the presence
-        // of NaNs (signaling or quiet). If so, we should add tests for those.
+        // must be a difference. It may have to do with behavior in the
+        // presence of NaNs (signaling or quiet). If so, we should add tests
+        // for those.
         use std::mem::transmute;
 
         let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
@@ -2125,9 +2147,9 @@ mod tests {
         let c = f32x4::new(1.0, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = 0u32;  // a.extract(0) <= b.extract(0)
-        let c1 = !0u32;  // a.extract(0) <= c.extract(0)
-        let d1 = !0u32;  // a.extract(0) <= d.extract(0)
+        let b1 = 0u32; // a.extract(0) <= b.extract(0)
+        let c1 = !0u32; // a.extract(0) <= c.extract(0)
+        let d1 = !0u32; // a.extract(0) <= d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmpngt_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -2145,8 +2167,9 @@ mod tests {
     #[simd_test = "sse"]
     unsafe fn _mm_cmpnge_ss() {
         // TODO: This test is exactly the same as for _mm_cmplt_ss, but there
-        // must be a difference. It may have to do with behavior in the presence
-        // of NaNs (signaling or quiet). If so, we should add tests for those.
+        // must be a difference. It may have to do with behavior in the
+        // presence of NaNs (signaling or quiet). If so, we should add tests
+        // for those.
         use std::mem::transmute;
 
         let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
@@ -2154,9 +2177,9 @@ mod tests {
         let c = f32x4::new(1.0, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = 0u32;  // a.extract(0) < b.extract(0)
-        let c1 = 0u32;  // a.extract(0) < c.extract(0)
-        let d1 = !0u32;  // a.extract(0) < d.extract(0)
+        let b1 = 0u32; // a.extract(0) < b.extract(0)
+        let c1 = 0u32; // a.extract(0) < c.extract(0)
+        let d1 = !0u32; // a.extract(0) < d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmpnge_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -2181,9 +2204,9 @@ mod tests {
         let c = f32x4::new(NAN, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = !0u32;  // a.extract(0) ord b.extract(0)
-        let c1 = 0u32;  // a.extract(0) ord c.extract(0)
-        let d1 = !0u32;  // a.extract(0) ord d.extract(0)
+        let b1 = !0u32; // a.extract(0) ord b.extract(0)
+        let c1 = 0u32; // a.extract(0) ord c.extract(0)
+        let d1 = !0u32; // a.extract(0) ord d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmpord_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -2208,9 +2231,9 @@ mod tests {
         let c = f32x4::new(NAN, 5.0, 6.0, 7.0);
         let d = f32x4::new(2.0, 5.0, 6.0, 7.0);
 
-        let b1 = 0u32;  // a.extract(0) unord b.extract(0)
-        let c1 = !0u32;  // a.extract(0) unord c.extract(0)
-        let d1 = 0u32;  // a.extract(0) unord d.extract(0)
+        let b1 = 0u32; // a.extract(0) unord b.extract(0)
+        let c1 = !0u32; // a.extract(0) unord c.extract(0)
+        let d1 = 0u32; // a.extract(0) unord d.extract(0)
 
         let rb: u32x4 = transmute(sse::_mm_cmpunord_ss(a, b));
         let eb: u32x4 = transmute(f32x4::new(transmute(b1), 2.0, 3.0, 4.0));
@@ -2418,9 +2441,16 @@ mod tests {
 
             let r = sse::_mm_comieq_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2439,9 +2469,16 @@ mod tests {
 
             let r = sse::_mm_comilt_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2460,9 +2497,16 @@ mod tests {
 
             let r = sse::_mm_comile_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2481,9 +2525,16 @@ mod tests {
 
             let r = sse::_mm_comigt_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_comigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2502,9 +2553,16 @@ mod tests {
 
             let r = sse::_mm_comige_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2523,9 +2581,16 @@ mod tests {
 
             let r = sse::_mm_comineq_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2544,9 +2609,16 @@ mod tests {
 
             let r = sse::_mm_ucomieq_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2565,9 +2637,16 @@ mod tests {
 
             let r = sse::_mm_ucomilt_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2586,9 +2665,16 @@ mod tests {
 
             let r = sse::_mm_ucomile_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2607,9 +2693,16 @@ mod tests {
 
             let r = sse::_mm_ucomigt_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2628,9 +2721,16 @@ mod tests {
 
             let r = sse::_mm_ucomige_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2649,9 +2749,16 @@ mod tests {
 
             let r = sse::_mm_ucomineq_ss(a, b);
 
-            assert_eq!(ee[i], r,
+            assert_eq!(
+                ee[i],
+                r,
                 "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r, ee[i], i);
+                a,
+                b,
+                r,
+                ee[i],
+                i
+            );
         }
     }
 
@@ -2659,12 +2766,12 @@ mod tests {
     unsafe fn _mm_comieq_ss_vs_ucomieq_ss() {
         // If one of the arguments is a quiet NaN `comieq_ss` should signal an
         // Invalid Operation Exception while `ucomieq_ss` should not.
-        use std::f32::NAN;  // This is a quiet NaN.
+        use std::f32::NAN; // This is a quiet NaN.
         let aa = &[3.0f32, NAN, 23.0, NAN];
         let bb = &[3.0f32, 47.5, NAN, NAN];
 
         let ee = &[1i32, 0, 0, 0];
-        let exc = &[0u32, 1, 1, 1];  // Should comieq_ss signal an exception?
+        let exc = &[0u32, 1, 1, 1]; // Should comieq_ss signal an exception?
 
         for i in 0..4 {
             let a = f32x4::new(aa[i], 1.0, 2.0, 3.0);
@@ -2678,16 +2785,40 @@ mod tests {
             let r2 = sse::_mm_ucomieq_ss(*black_box(&a), b);
             let s2 = sse::_MM_GET_EXCEPTION_STATE();
 
-            assert_eq!(ee[i], r1,
+            assert_eq!(
+                ee[i],
+                r1,
                 "_mm_comeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r1, ee[i], i);
-            assert_eq!(ee[i], r2,
+                a,
+                b,
+                r1,
+                ee[i],
+                i
+            );
+            assert_eq!(
+                ee[i],
+                r2,
                 "_mm_ucomeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                a, b, r2, ee[i], i);
-            assert_eq!(s1, exc[i] * sse::_MM_EXCEPT_INVALID,
-                "_mm_comieq_ss() set exception flags: {} (i={})", s1, i);
-            assert_eq!(s2, 0,  // ucomieq_ss should not signal an exception
-                "_mm_ucomieq_ss() set exception flags: {} (i={})", s2, i);
+                a,
+                b,
+                r2,
+                ee[i],
+                i
+            );
+            assert_eq!(
+                s1,
+                exc[i] * sse::_MM_EXCEPT_INVALID,
+                "_mm_comieq_ss() set exception flags: {} (i={})",
+                s1,
+                i
+            );
+            assert_eq!(
+                s2,
+                0, // ucomieq_ss should not signal an exception
+                "_mm_ucomieq_ss() set exception flags: {} (i={})",
+                s2,
+                i
+            );
         }
     }
 
@@ -2696,14 +2827,20 @@ mod tests {
         use std::f32::NAN;
         use std::i32::MIN;
         let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
-        let result = &[42i32,   -3,   MIN,    0,       MIN, 2147483520];
+        let result = &[42i32, -3, MIN, 0, MIN, 2147483520];
         for i in 0..inputs.len() {
             let x = f32x4::new(inputs[i], 1.0, 3.0, 4.0);
             let e = result[i];
             let r = sse::_mm_cvtss_si32(x);
-            assert_eq!(e, r,
+            assert_eq!(
+                e,
+                r,
                 "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
-                i, x, r, e);
+                i,
+                x,
+                r,
+                e
+            );
         }
     }
 
@@ -2713,23 +2850,29 @@ mod tests {
         use std::f32::NAN;
         use std::i64::MIN;
         let inputs = &[
-            (42.0f32,  42i64),
-            (-31.4,   -31),
-            (-33.5,   -34),
-            (-34.5,   -34),
-            (4.0e10,  40_000_000_000),
+            (42.0f32, 42i64),
+            (-31.4, -31),
+            (-33.5, -34),
+            (-34.5, -34),
+            (4.0e10, 40_000_000_000),
             (4.0e-10, 0),
             (NAN, MIN),
             (2147483500.1, 2147483520),
-            (9.223371e18, 9223370937343148032)
+            (9.223371e18, 9223370937343148032),
         ];
         for i in 0..inputs.len() {
             let (xi, e) = inputs[i];
             let x = f32x4::new(xi, 1.0, 3.0, 4.0);
             let r = sse::_mm_cvtss_si64(x);
-            assert_eq!(e, r,
+            assert_eq!(
+                e,
+                r,
                 "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}",
-                i, x, r, e);
+                i,
+                x,
+                r,
+                e
+            );
         }
     }
 
@@ -2738,13 +2881,13 @@ mod tests {
         use std::f32::NAN;
         use std::i32::MIN;
         let inputs = &[
-            (42.0f32,  42i32),
-            (-31.4,   -31),
-            (-33.5,   -33),
-            (-34.5,   -34),
-            (10.999,   10),
-            (-5.99,    -5),
-            (4.0e10,  MIN),
+            (42.0f32, 42i32),
+            (-31.4, -31),
+            (-33.5, -33),
+            (-34.5, -34),
+            (10.999, 10),
+            (-5.99, -5),
+            (4.0e10, MIN),
             (4.0e-10, 0),
             (NAN, MIN),
             (2147483500.1, 2147483520),
@@ -2753,9 +2896,15 @@ mod tests {
             let (xi, e) = inputs[i];
             let x = f32x4::new(xi, 1.0, 3.0, 4.0);
             let r = sse::_mm_cvttss_si32(x);
-            assert_eq!(e, r,
+            assert_eq!(
+                e,
+                r,
                 "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
-                i, x, r, e);
+                i,
+                x,
+                r,
+                e
+            );
         }
     }
 
@@ -2765,13 +2914,13 @@ mod tests {
         use std::f32::NAN;
         use std::i64::MIN;
         let inputs = &[
-            (42.0f32,  42i64),
-            (-31.4,   -31),
-            (-33.5,   -33),
-            (-34.5,   -34),
-            (10.999,   10),
-            (-5.99,    -5),
-            (4.0e10,  40_000_000_000),
+            (42.0f32, 42i64),
+            (-31.4, -31),
+            (-33.5, -33),
+            (-34.5, -34),
+            (10.999, 10),
+            (-5.99, -5),
+            (4.0e10, 40_000_000_000),
             (4.0e-10, 0),
             (NAN, MIN),
             (2147483500.1, 2147483520),
@@ -2782,19 +2931,25 @@ mod tests {
             let (xi, e) = inputs[i];
             let x = f32x4::new(xi, 1.0, 3.0, 4.0);
             let r = sse::_mm_cvttss_si64(x);
-            assert_eq!(e, r,
+            assert_eq!(
+                e,
+                r,
                 "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}",
-                i, x, r, e);
+                i,
+                x,
+                r,
+                e
+            );
         }
     }
 
     #[simd_test = "sse"]
     pub unsafe fn _mm_cvtsi32_ss() {
         let inputs = &[
-            (4555i32,   4555.0f32),
+            (4555i32, 4555.0f32),
             (322223333, 322223330.0),
-            (-432,      -432.0),
-            (-322223333, -322223330.0)
+            (-432, -432.0),
+            (-322223333, -322223330.0),
         ];
 
         for i in 0..inputs.len() {
@@ -2802,9 +2957,16 @@ mod tests {
             let a = f32x4::new(5.0, 6.0, 7.0, 8.0);
             let r = sse::_mm_cvtsi32_ss(a, x);
             let e = a.replace(0, f);
-            assert_eq!(e, r,
+            assert_eq!(
+                e,
+                r,
                 "TestCase #{} _mm_cvtsi32_ss({:?}, {}) = {:?}, expected: {:?}",
-                i, a, x, r, e);
+                i,
+                a,
+                x,
+                r,
+                e
+            );
         }
     }
 
@@ -2812,12 +2974,12 @@ mod tests {
     #[cfg(target_arch = "x86_64")]
     pub unsafe fn _mm_cvtsi64_ss() {
         let inputs = &[
-            (4555i64,   4555.0f32),
+            (4555i64, 4555.0f32),
             (322223333, 322223330.0),
-            (-432,      -432.0),
+            (-432, -432.0),
             (-322223333, -322223330.0),
             (9223372036854775807, 9.223372e18),
-            (-9223372036854775808, -9.223372e18)
+            (-9223372036854775808, -9.223372e18),
         ];
 
         for i in 0..inputs.len() {
@@ -2825,9 +2987,16 @@ mod tests {
             let a = f32x4::new(5.0, 6.0, 7.0, 8.0);
             let r = sse::_mm_cvtsi64_ss(a, x);
             let e = a.replace(0, f);
-            assert_eq!(e, r,
+            assert_eq!(
+                e,
+                r,
                 "TestCase #{} _mm_cvtsi64_ss({:?}, {}) = {:?}, expected: {:?}",
-                i, a, x, r, e);
+                i,
+                a,
+                x,
+                r,
+                e
+            );
         }
     }
 
@@ -2854,14 +3023,22 @@ mod tests {
     #[simd_test = "sse"]
     unsafe fn _mm_set_ps() {
         let r = sse::_mm_set_ps(
-            black_box(1.0), black_box(2.0), black_box(3.0), black_box(4.0));
+            black_box(1.0),
+            black_box(2.0),
+            black_box(3.0),
+            black_box(4.0),
+        );
         assert_eq!(r, f32x4::new(4.0, 3.0, 2.0, 1.0));
     }
 
     #[simd_test = "sse"]
     unsafe fn _mm_setr_ps() {
         let r = sse::_mm_setr_ps(
-            black_box(1.0), black_box(2.0), black_box(3.0), black_box(4.0));
+            black_box(1.0),
+            black_box(2.0),
+            black_box(3.0),
+            black_box(4.0),
+        );
         assert_eq!(r, f32x4::new(1.0, 2.0, 3.0, 4.0));
     }
 
@@ -3170,7 +3347,7 @@ mod tests {
         sse::_mm_setcsr(saved_csr);
 
         let exp = f32x4::new(0.0, 0.0, 0.0, 1.0);
-        assert_eq!(r, exp);  // first component is a denormalized f32
+        assert_eq!(r, exp); // first component is a denormalized f32
     }
 
     #[simd_test = "sse"]
@@ -3188,7 +3365,7 @@ mod tests {
         sse::_mm_setcsr(saved_csr);
 
         let exp = f32x4::new(1.1e-39, 0.0, 0.0, 1.0);
-        assert_eq!(r, exp);  // first component is a denormalized f32
+        assert_eq!(r, exp); // first component is a denormalized f32
     }
 
     #[simd_test = "sse"]
@@ -3198,7 +3375,7 @@ mod tests {
         let a = f32x4::new(1.1e-36, 0.0, 0.0, 1.0);
         let b = f32x4::new(1e-5, 0.0, 0.0, 1.0);
 
-        assert_eq!(sse::_MM_GET_EXCEPTION_STATE(), 0);  // just to be sure
+        assert_eq!(sse::_MM_GET_EXCEPTION_STATE(), 0); // just to be sure
 
         let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs
index 8b992896d5ea..c08fab33b69b 100644
--- a/library/stdarch/src/x86/sse2.rs
+++ b/library/stdarch/src/x86/sse2.rs
@@ -5,9 +5,8 @@ use std::mem;
 use std::os::raw::c_void;
 use std::ptr;
 
-use simd_llvm::{
-    simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16,
-};
+use simd_llvm::{simd_cast, simd_shuffle16, simd_shuffle2, simd_shuffle4,
+                simd_shuffle8};
 use x86::__m128i;
 use v128::*;
 use v64::*;
@@ -317,7 +316,9 @@ pub unsafe fn _mm_subs_epu16(a: u16x8, b: u16x8) -> u16x8 {
 #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
 pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i {
     let (zero, imm8) = (__m128i::splat(0), imm8 as u32);
-    const fn sub(a: u32, b: u32) -> u32 { a - b }
+    const fn sub(a: u32, b: u32) -> u32 {
+        a - b
+    }
     macro_rules! shuffle {
         ($shift:expr) => {
             simd_shuffle16::<__m128i, __m128i>(zero, a, [
@@ -333,14 +334,22 @@ pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i {
         }
     }
     match imm8 {
-        0 => shuffle!(0), 1 => shuffle!(1),
-        2 => shuffle!(2), 3 => shuffle!(3),
-        4 => shuffle!(4), 5 => shuffle!(5),
-        6 => shuffle!(6), 7 => shuffle!(7),
-        8 => shuffle!(8), 9 => shuffle!(9),
-        10 => shuffle!(10), 11 => shuffle!(11),
-        12 => shuffle!(12), 13 => shuffle!(13),
-        14 => shuffle!(14), 15 => shuffle!(15),
+        0 => shuffle!(0),
+        1 => shuffle!(1),
+        2 => shuffle!(2),
+        3 => shuffle!(3),
+        4 => shuffle!(4),
+        5 => shuffle!(5),
+        6 => shuffle!(6),
+        7 => shuffle!(7),
+        8 => shuffle!(8),
+        9 => shuffle!(9),
+        10 => shuffle!(10),
+        11 => shuffle!(11),
+        12 => shuffle!(12),
+        13 => shuffle!(13),
+        14 => shuffle!(14),
+        15 => shuffle!(15),
         _ => shuffle!(16),
     }
 }
@@ -365,7 +374,7 @@ pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(psllw))]
-pub unsafe fn _mm_slli_epi16(a: i16x8, imm8: i32) -> i16x8  {
+pub unsafe fn _mm_slli_epi16(a: i16x8, imm8: i32) -> i16x8 {
     pslliw(a, imm8)
 }
 
@@ -454,7 +463,9 @@ pub unsafe fn _mm_sra_epi32(a: i32x4, count: i32x4) -> i32x4 {
 #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
 pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
     let (zero, imm8) = (__m128i::splat(0), imm8 as u32);
-    const fn add(a: u32, b: u32) -> u32 { a + b }
+    const fn add(a: u32, b: u32) -> u32 {
+        a + b
+    }
     macro_rules! shuffle {
         ($shift:expr) => {
             simd_shuffle16::<__m128i, __m128i>(a, zero, [
@@ -470,14 +481,22 @@ pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
         }
     }
     match imm8 {
-        0 => shuffle!(0), 1 => shuffle!(1),
-        2 => shuffle!(2), 3 => shuffle!(3),
-        4 => shuffle!(4), 5 => shuffle!(5),
-        6 => shuffle!(6), 7 => shuffle!(7),
-        8 => shuffle!(8), 9 => shuffle!(9),
-        10 => shuffle!(10), 11 => shuffle!(11),
-        12 => shuffle!(12), 13 => shuffle!(13),
-        14 => shuffle!(14), 15 => shuffle!(15),
+        0 => shuffle!(0),
+        1 => shuffle!(1),
+        2 => shuffle!(2),
+        3 => shuffle!(3),
+        4 => shuffle!(4),
+        5 => shuffle!(5),
+        6 => shuffle!(6),
+        7 => shuffle!(7),
+        8 => shuffle!(8),
+        9 => shuffle!(9),
+        10 => shuffle!(10),
+        11 => shuffle!(11),
+        12 => shuffle!(12),
+        13 => shuffle!(13),
+        14 => shuffle!(14),
+        15 => shuffle!(15),
         _ => shuffle!(16),
     }
 }
@@ -487,7 +506,7 @@ pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(psrlw))]
-pub unsafe fn _mm_srli_epi16(a: i16x8, imm8: i32) -> i16x8  {
+pub unsafe fn _mm_srli_epi16(a: i16x8, imm8: i32) -> i16x8 {
     psrliw(a, imm8)
 }
 
@@ -649,7 +668,7 @@ pub unsafe fn _mm_cmplt_epi32(a: i32x4, b: i32x4) -> i32x4 {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtdq2pd))]
-pub unsafe fn _mm_cvtepi32_pd(a: i32x4) -> f64x2  {
+pub unsafe fn _mm_cvtepi32_pd(a: i32x4) -> f64x2 {
     simd_cast::<i32x2, f64x2>(simd_shuffle2(a, a, [0, 1]))
 }
 
@@ -777,7 +796,7 @@ pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 {
 #[target_feature = "+sse2"]
 // no particular instruction to test
 pub unsafe fn _mm_set_epi16(
-    e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
+    e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
 ) -> i16x8 {
     i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)
 }
@@ -790,6 +809,7 @@ pub unsafe fn _mm_set_epi8(
     e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8,
     e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8,
 ) -> i8x16 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
     i8x16::new(
         e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
     )
@@ -840,7 +860,7 @@ pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 {
 #[target_feature = "+sse2"]
 // no particular instruction to test
 pub unsafe fn _mm_setr_epi16(
-    e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
+    e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
 ) -> i16x8 {
     i16x8::new(e7, e6, e5, e4, e3, e2, e1, e0)
 }
@@ -853,6 +873,7 @@ pub unsafe fn _mm_setr_epi8(
     e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8,
     e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8,
 ) -> i8x16 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
     i8x16::new(
         e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
     )
@@ -895,7 +916,8 @@ pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
         &mut dst as *mut __m128i as *mut u8,
-        mem::size_of::<__m128i>());
+        mem::size_of::<__m128i>(),
+    );
     dst
 }
 
@@ -934,7 +956,8 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
     ptr::copy_nonoverlapping(
         &a as *const _ as *const u8,
         mem_addr as *mut u8,
-        mem::size_of::<__m128i>());
+        mem::size_of::<__m128i>(),
+    );
 }
 
 /// Store the lower 64-bit integer `a` to a memory location.
@@ -945,7 +968,10 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
 // no particular instruction to test
 pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
     ptr::copy_nonoverlapping(
-        &a as *const _ as *const u8, mem_addr as *mut u8, 8);
+        &a as *const _ as *const u8,
+        mem_addr as *mut u8,
+        8,
+    );
 }
 
 /// Return a vector where the low element is extracted from `a` and its upper
@@ -1076,7 +1102,9 @@ pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 {
 pub unsafe fn _mm_shufflehi_epi16(a: i16x8, imm8: i32) -> i16x8 {
     // See _mm_shuffle_epi32.
     let imm8 = (imm8 & 0xFF) as u8;
-    const fn add4(x: u32) -> u32 { x + 4 }
+    const fn add4(x: u32) -> u32 {
+        x + 4
+    }
 
     macro_rules! shuffle_done {
         ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
@@ -1183,10 +1211,11 @@ pub unsafe fn _mm_shufflelo_epi16(a: i16x8, imm8: i32) -> i16x8 {
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(punpckhbw))]
 pub unsafe fn _mm_unpackhi_epi8(a: i8x16, b: i8x16) -> i8x16 {
-    simd_shuffle16(a, b, [
-        8, 24, 9, 25, 10, 26, 11, 27,
-        12, 28, 13, 29, 14, 30, 15, 31,
-    ])
+    simd_shuffle16(
+        a,
+        b,
+        [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
+    )
 }
 
 /// Unpack and interleave 16-bit integers from the high half of `a` and `b`.
@@ -1218,10 +1247,11 @@ pub unsafe fn _mm_unpackhi_epi64(a: i64x2, b: i64x2) -> i64x2 {
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(punpcklbw))]
 pub unsafe fn _mm_unpacklo_epi8(a: i8x16, b: i8x16) -> i8x16 {
-    simd_shuffle16(a, b, [
-        0, 16, 1, 17, 2, 18, 3, 19,
-        4, 20, 5, 21, 6, 22, 7, 23,
-    ])
+    simd_shuffle16(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
+    )
 }
 
 /// Unpack and interleave 16-bit integers from the low half of `a` and `b`.
@@ -1718,7 +1748,8 @@ pub unsafe fn _mm_ucomineq_sd(a: f64x2, b: f64x2) -> bool {
     mem::transmute(ucomineqsd(a, b) as u8)
 }
 
-/// Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements
+/// Convert packed double-precision (64-bit) floating-point elements in "a" to
+/// packed single-precision (32-bit) floating-point elements
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtpd2ps))]
@@ -1726,8 +1757,8 @@ pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 {
     cvtpd2ps(a)
 }
 
-
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to packed
+/// Convert packed single-precision (32-bit) floating-point elements in `a` to
+/// packed
 /// double-precision (64-bit) floating-point elements.
 #[inline(always)]
 #[target_feature = "+sse2"]
@@ -1736,7 +1767,8 @@ pub unsafe fn _mm_cvtps_pd(a: f32x4) -> f64x2 {
     cvtps2pd(a)
 }
 
-/// Convert packed double-precision (64-bit) floating-point elements in `a` to packed 32-bit integers.
+/// Convert packed double-precision (64-bit) floating-point elements in `a` to
+/// packed 32-bit integers.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtpd2dq))]
@@ -1744,7 +1776,8 @@ pub unsafe fn _mm_cvtpd_epi32(a: f64x2) -> i32x4 {
     cvtpd2dq(a)
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer.
+/// Convert the lower double-precision (64-bit) floating-point element in a to
+/// a 32-bit integer.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtsd2si))]
@@ -1752,7 +1785,8 @@ pub unsafe fn _mm_cvtsd_si32(a: f64x2) -> i32 {
     cvtsd2si(a)
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer.
+/// Convert the lower double-precision (64-bit) floating-point element in a to
+/// a 64-bit integer.
 #[cfg(target_arch = "x86_64")]
 #[inline(always)]
 #[target_feature = "+sse2"]
@@ -1761,9 +1795,10 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
     cvtsd2si64(a)
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in `b` to a
-/// single-precision (32-bit) floating-point element, store the result in the lower element
-/// of the return value, and copy the upper element from `a` to the upper element the return value.
+/// Convert the lower double-precision (64-bit) floating-point element in `b`
+/// to a single-precision (32-bit) floating-point element, store the result in
+/// the lower element of the return value, and copy the upper element from `a`
+/// to the upper element the return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtsd2ss))]
@@ -1771,18 +1806,19 @@ pub unsafe fn _mm_cvtsd_ss(a: f32x4, b: f64x2) -> f32x4 {
     cvtsd2ss(a, b)
 }
 
-/// Convert the lower single-precision (32-bit) floating-point element in `b` to a
-/// double-precision (64-bit) floating-point element, store the result in the lower element
-/// of the return value, and copy the upper element from `a` to the upper element the return value.
+/// Convert the lower single-precision (32-bit) floating-point element in `b`
+/// to a double-precision (64-bit) floating-point element, store the result in
+/// the lower element of the return value, and copy the upper element from `a`
+/// to the upper element the return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtss2sd))]
-pub unsafe fn _mm_cvtss_sd(a: f64x2, b: f32x4 ) -> f64x2 {
+pub unsafe fn _mm_cvtss_sd(a: f64x2, b: f32x4) -> f64x2 {
     cvtss2sd(a, b)
 }
 
-/// Convert packed double-precision (64-bit) floating-point elements in `a` to packed
-/// 32-bit integers with truncation.
+/// Convert packed double-precision (64-bit) floating-point elements in `a` to
+/// packed 32-bit integers with truncation.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvttpd2dq))]
@@ -1790,8 +1826,8 @@ pub unsafe fn _mm_cvttpd_epi32(a: f64x2) -> i32x4 {
     cvttpd2dq(a)
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in `a` to a 32-bit integer
-/// with truncation.
+/// Convert the lower double-precision (64-bit) floating-point element in `a`
+/// to a 32-bit integer with truncation.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvttsd2si))]
@@ -1799,8 +1835,8 @@ pub unsafe fn _mm_cvttsd_si32(a: f64x2) -> i32 {
     cvttsd2si(a)
 }
 
-/// Convert the lower double-precision (64-bit) floating-point element in `a` to a 64-bit integer
-/// with truncation.
+/// Convert the lower double-precision (64-bit) floating-point element in `a`
+/// to a 64-bit integer with truncation.
 #[cfg(target_arch = "x86_64")]
 #[inline(always)]
 #[target_feature = "+sse2"]
@@ -1809,8 +1845,8 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
     cvttsd2si64(a)
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to packed 32-bit
-/// integers with truncation
+/// Convert packed single-precision (32-bit) floating-point elements in `a` to
+/// packed 32-bit integers with truncation.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvttps2dq))]
@@ -1818,45 +1854,48 @@ pub unsafe fn _mm_cvttps_epi32(a: f32x4) -> i32x4 {
     cvttps2dq(a)
 }
 
-/// Copy double-precision (64-bit) floating-point element `a` to the lower element of the
-/// packed 64-bit return value
+/// Copy double-precision (64-bit) floating-point element `a` to the lower
+/// element of the packed 64-bit return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_set_sd(a: f64) -> f64x2 {
     f64x2::new(a, 0_f64)
 }
 
-/// Broadcast double-precision (64-bit) floating-point value a to all elements of the return value
+/// Broadcast double-precision (64-bit) floating-point value a to all elements
+/// of the return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_set1_pd(a: f64) -> f64x2 {
     f64x2::new(a, a)
 }
 
-/// Broadcast double-precision (64-bit) floating-point value a to all elements of the return value
+/// Broadcast double-precision (64-bit) floating-point value a to all elements
+/// of the return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_set_pd1(a: f64) -> f64x2 {
     f64x2::new(a, a)
 }
 
-/// Set packed double-precision (64-bit) floating-point elements in the return value with the
-/// supplied values.
+/// Set packed double-precision (64-bit) floating-point elements in the return
+/// value with the supplied values.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_set_pd(a: f64, b: f64) -> f64x2 {
     f64x2::new(b, a)
 }
 
-/// Set packed double-precision (64-bit) floating-point elements in the return value with the
-/// supplied values in reverse order.
+/// Set packed double-precision (64-bit) floating-point elements in the return
+/// value with the supplied values in reverse order.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 {
     f64x2::new(a, b)
 }
 
-/// returns packed double-precision (64-bit) floating-point elements with all zeros.
+/// returns packed double-precision (64-bit) floating-point elements with all
+/// zeros.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_setzero_pd() -> f64x2 {
@@ -1876,9 +1915,10 @@ pub unsafe fn _mm_movemask_pd(a: f64x2) -> i32 {
 
 
 
-/// Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements)
-/// from memory into the returned vector. mem_addr must be aligned on a 16-byte boundary or
-/// a general-protection exception may be generated.
+/// Load 128-bits (composed of 2 packed double-precision (64-bit)
+/// floating-point elements) from memory into the returned vector.
+/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
+/// exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movaps))]
@@ -1886,9 +1926,9 @@ pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 {
     *(mem_addr as *const f64x2)
 }
 
-/// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from `a`
-/// into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
-/// may be generated.
+/// Store 128-bits (composed of 2 packed double-precision (64-bit)
+/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
+/// on a 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movaps))]
@@ -1906,12 +1946,13 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: f64x2) {
     ptr::copy_nonoverlapping(
         &a as *const f64x2 as *const u8,
         mem_addr as *mut u8,
-        mem::size_of::<f64x2>());
+        mem::size_of::<f64x2>(),
+    );
 }
 
-/// Store the lower double-precision (64-bit) floating-point element from `a` into 2 contiguous
-/// elements in memory. `mem_addr` must be aligned on a 16-byte boundary or a general-protection
-/// exception may be generated.
+/// Store the lower double-precision (64-bit) floating-point element from `a`
+/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
+/// 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: f64x2) {
@@ -1919,9 +1960,9 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: f64x2) {
     *(mem_addr as *mut f64x2) = b;
 }
 
-/// Store the lower double-precision (64-bit) floating-point element from `a` into 2 contiguous
-/// elements in memory. `mem_addr` must be aligned on a 16-byte boundary or a general-protection
-/// exception may be generated.
+/// Store the lower double-precision (64-bit) floating-point element from `a`
+/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
+/// 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: f64x2) {
@@ -1929,8 +1970,10 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: f64x2) {
     *(mem_addr as *mut f64x2) = b;
 }
 
-/// Store 2 double-precision (64-bit) floating-point elements from `a` into memory in reverse order.
-/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+/// Store 2 double-precision (64-bit) floating-point elements from `a` into
+/// memory in reverse order.
+/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
+/// exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: f64x2) {
@@ -1956,9 +1999,9 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> f64x2 {
     f64x2::new(d, d)
 }
 
-/// Load 2 double-precision (64-bit) floating-point elements from memory into the returned vector
-/// in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection
-/// exception may be generated.
+/// Load 2 double-precision (64-bit) floating-point elements from memory into
+/// the returned vector in reverse order. `mem_addr` must be aligned on a
+/// 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movapd))]
@@ -1967,9 +2010,9 @@ pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> f64x2 {
     simd_shuffle2(a, a, [1, 0])
 }
 
-/// Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements)
-/// from memory into the returned vector. mem_addr does not need to be aligned on any particular
-/// oundary.
+/// Load 128-bits (composed of 2 packed double-precision (64-bit)
+/// floating-point elements) from memory into the returned vector.
+/// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movups))]
@@ -1978,7 +2021,8 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> f64x2 {
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
         &mut dst as *mut f64x2 as *mut u8,
-        mem::size_of::<f64x2>());
+        mem::size_of::<f64x2>(),
+    );
     dst
 }
 
@@ -1997,7 +2041,7 @@ pub unsafe fn _mm_undefined_si128() -> __m128i {
 }
 
 #[allow(improper_ctypes)]
-extern {
+extern "C" {
     #[link_name = "llvm.x86.sse2.pause"]
     fn pause();
     #[link_name = "llvm.x86.sse2.clflush"]
@@ -2145,7 +2189,7 @@ extern {
     #[link_name = "llvm.x86.sse2.cvtsd2ss"]
     fn cvtsd2ss(a: f32x4, b: f64x2) -> f32x4;
     #[link_name = "llvm.x86.sse2.cvtss2sd"]
-    fn cvtss2sd(a: f64x2, b: f32x4 ) -> f64x2;
+    fn cvtss2sd(a: f64x2, b: f32x4) -> f64x2;
     #[link_name = "llvm.x86.sse2.cvttpd2dq"]
     fn cvttpd2dq(a: f64x2) -> i32x4;
     #[link_name = "llvm.x86.sse2.cvttsd2si"]
@@ -2160,7 +2204,7 @@ extern {
 mod tests {
     use std::os::raw::c_void;
     use stdsimd_test::simd_test;
-    use test::black_box;  // Used to inhibit constant-folding.
+    use test::black_box; // Used to inhibit constant-folding.
 
     use v128::*;
     use x86::{__m128i, sse2};
@@ -2188,13 +2232,17 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_add_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
         let r = sse2::_mm_add_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x16::new(
-            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46);
+            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
+        );
         assert_eq!(r, e);
     }
 
@@ -2235,13 +2283,17 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_adds_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
         let r = sse2::_mm_adds_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x16::new(
-            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46);
+            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
+        );
         assert_eq!(r, e);
     }
 
@@ -2288,13 +2340,17 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_adds_epu8() {
-        let a = u8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = u8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
         let r = sse2::_mm_adds_epu8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = u8x16::new(
-            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46);
+            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
+        );
         assert_eq!(r, e);
     }
 
@@ -2410,12 +2466,11 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_sad_epu8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = u8x16::new(
-            255, 254, 253, 252, 1, 2, 3, 4,
-            155, 154, 153, 152, 1, 2, 3, 4);
-        let b = u8x16::new(
-            0, 0, 0, 0, 2, 1, 2, 1,
-            1, 1, 1, 1, 1, 2, 1, 2);
+            255, 254, 253, 252, 1, 2, 3, 4, 155, 154, 153, 152, 1, 2, 3, 4,
+        );
+        let b = u8x16::new(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
         let r = sse2::_mm_sad_epu8(a, b);
         let e = u64x2::new(1020, 614);
         assert_eq!(r, e);
@@ -2527,44 +2582,58 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_slli_si128() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_slli_si128(a, 1);
-        let e = __m128i::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e =
+            __m128i::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         assert_eq!(r, e);
 
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_slli_si128(a, 15);
-        let e = __m128i::new(
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+        let e = __m128i::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
         assert_eq!(r, e);
 
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_slli_si128(a, 16);
         assert_eq!(r, __m128i::splat(0));
 
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_slli_si128(a, -1);
         assert_eq!(r, __m128i::splat(0));
 
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_slli_si128(a, -0x80000000);
         assert_eq!(r, __m128i::splat(0));
     }
 
     #[simd_test = "sse2"]
     unsafe fn _mm_slli_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i16x8::new(
-            0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0);
+            0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
+        );
         let r = sse2::_mm_slli_epi16(a, 4);
+
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i16x8::new(
-            0xFFF0 as u16 as i16,
-            0xFFF0 as u16 as i16, 0x0FF0, 0x00F0, 0, 0, 0, 0);
+            0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0,
+            0, 0, 0, 0,
+        );
         assert_eq!(r, e);
     }
 
@@ -2635,44 +2704,58 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_srli_si128() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_srli_si128(a, 1);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = __m128i::new(
-            2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0);
+            2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
+        );
         assert_eq!(r, e);
 
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_srli_si128(a, 15);
-        let e = __m128i::new(
-            16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e = __m128i::new(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
         assert_eq!(r, e);
 
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_srli_si128(a, 16);
         assert_eq!(r, __m128i::splat(0));
 
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_srli_si128(a, -1);
         assert_eq!(r, __m128i::splat(0));
 
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
         let r = sse2::_mm_srli_si128(a, -0x80000000);
         assert_eq!(r, __m128i::splat(0));
     }
 
     #[simd_test = "sse2"]
     unsafe fn _mm_srli_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let a = i16x8::new(
-            0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0);
+            0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
+        );
         let r = sse2::_mm_srli_epi16(a, 4);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i16x8::new(
-            0xFFF as u16 as i16,
-            0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0);
+            0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0,
+        );
         assert_eq!(r, e);
     }
 
@@ -2747,13 +2830,18 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_cmpeq_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b = i8x16::new(
-            15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b =
+            i8x16::new(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = sse2::_mm_cmpeq_epi8(a, b);
-        assert_eq!(r, i8x16::new(
-            0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+        assert_eq!(
+            r,
+            #[cfg_attr(rustfmt, rustfmt_skip)]
+            i8x16::new(
+                0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            )
+        );
     }
 
     #[simd_test = "sse2"]
@@ -2902,18 +2990,12 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_set_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let r = sse2::_mm_set_epi8(
-            0, 1, 2, 3,
-            4, 5, 6, 7,
-            8, 9, 10, 11,
-            12, 13, 14, 15,
-        );
-        let e = i8x16::new(
-            15, 14, 13, 12,
-            11, 10, 9, 8,
-            7, 6, 5, 4,
-            3, 2, 1, 0,
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
         );
+        let e =
+            i8x16::new(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         assert_eq!(r, e);
     }
 
@@ -2955,18 +3037,12 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_setr_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let r = sse2::_mm_setr_epi8(
-            0, 1, 2, 3,
-            4, 5, 6, 7,
-            8, 9, 10, 11,
-            12, 13, 14, 15,
-        );
-        let e = i8x16::new(
-            0, 1, 2, 3,
-            4, 5, 6, 7,
-            8, 9, 10, 11,
-            12, 13, 14, 15,
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
         );
+        let e =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         assert_eq!(r, e);
     }
 
@@ -3042,9 +3118,13 @@ mod tests {
         let a = i16x8::new(0x80, -0x81, 0, 0, 0, 0, 0, 0);
         let b = i16x8::new(0, 0, 0, 0, 0, 0, -0x81, 0x80);
         let r = sse2::_mm_packs_epi16(a, b);
-        assert_eq!(r, i8x16::new(
-            0x7F, -0x80, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, -0x80, 0x7F));
+        assert_eq!(
+            r,
+            #[cfg_attr(rustfmt, rustfmt_skip)]
+            i8x16::new(
+                0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
+            )
+        );
     }
 
     #[simd_test = "sse2"]
@@ -3053,7 +3133,9 @@ mod tests {
         let b = i32x4::new(0, 0, -0x8001, 0x8000);
         let r = sse2::_mm_packs_epi32(a, b);
         assert_eq!(
-            r, i16x8::new(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF));
+            r,
+            i16x8::new(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF)
+        );
     }
 
     #[simd_test = "sse2"]
@@ -3061,9 +3143,10 @@ mod tests {
         let a = i16x8::new(0x100, -1, 0, 0, 0, 0, 0, 0);
         let b = i16x8::new(0, 0, 0, 0, 0, 0, -1, 0x100);
         let r = sse2::_mm_packus_epi16(a, b);
-        assert_eq!(r, u8x16::new(
-            0xFF, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0xFF));
+        assert_eq!(
+            r,
+            u8x16::new(0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF)
+        );
     }
 
     #[simd_test = "sse2"]
@@ -3082,9 +3165,9 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_movemask_epi8() {
-        let a = i8x16::from(u8x16::new(
+        let a = i8x16::from(#[cfg_attr(rustfmt, rustfmt_skip)] u8x16::new(
             0b1000_0000, 0b0, 0b1000_0000, 0b01, 0b0101, 0b1111_0000, 0, 0,
-            0, 0, 0b1111_0000, 0b0101, 0b01, 0b1000_0000, 0b0, 0b1000_0000));
+                0, 0, 0b1111_0000, 0b0101, 0b01, 0b1000_0000, 0b0, 0b1000_0000, ));
         let r = sse2::_mm_movemask_epi8(a);
         assert_eq!(r, 0b10100100_00100101);
     }
@@ -3115,13 +3198,17 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_unpackhi_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
         let r = sse2::_mm_unpackhi_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x16::new(
-            8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);
+            8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
+        );
         assert_eq!(r, e);
     }
 
@@ -3154,13 +3241,15 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_unpacklo_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
         let r = sse2::_mm_unpacklo_epi8(a, b);
-        let e = i8x16::new(
-            0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
+        let e =
+            i8x16::new(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
         assert_eq!(r, e);
     }
 
@@ -3825,7 +3914,7 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_cvtpd_ps() {
-        use std::{f64,f32};
+        use std::{f32, f64};
 
         let r = sse2::_mm_cvtpd_ps(f64x2::new(-1.0, 5.0));
         assert_eq!(r, f32x4::new(-1.0, 5.0, 0.0, 0.0));
@@ -3834,20 +3923,23 @@ mod tests {
         assert_eq!(r, f32x4::new(-1.0, -5.0, 0.0, 0.0));
 
         let r = sse2::_mm_cvtpd_ps(f64x2::new(f64::MAX, f64::MIN));
-        assert_eq!(r, f32x4::new(f32::INFINITY, f32::NEG_INFINITY, 0.0,0.0));
+        assert_eq!(r, f32x4::new(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
 
-        let r = sse2::_mm_cvtpd_ps(f64x2::new(f32::MAX as f64, f32::MIN as f64));
-        assert_eq!(r, f32x4::new(f32::MAX, f32::MIN, 0.0,0.0));
+        let r =
+            sse2::_mm_cvtpd_ps(f64x2::new(f32::MAX as f64, f32::MIN as f64));
+        assert_eq!(r, f32x4::new(f32::MAX, f32::MIN, 0.0, 0.0));
     }
 
     #[simd_test = "sse2"]
     unsafe fn _mm_cvtps_pd() {
-        use std::{f64, f32};
+        use std::{f32, f64};
 
         let r = sse2::_mm_cvtps_pd(f32x4::new(-1.0, 2.0, -3.0, 5.0));
         assert_eq!(r, f64x2::new(-1.0, 2.0));
 
-        let r = sse2::_mm_cvtps_pd(f32x4::new(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN));
+        let r = sse2::_mm_cvtps_pd(
+            f32x4::new(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN),
+        );
         assert_eq!(r, f64x2::new(f32::MAX as f64, f64::INFINITY));
     }
 
@@ -3864,7 +3956,9 @@ mod tests {
         let r = sse2::_mm_cvtpd_epi32(f64x2::new(f64::MAX, f64::MIN));
         assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, 0, 0));
 
-        let r = sse2::_mm_cvtpd_epi32(f64x2::new(f64::INFINITY, f64::NEG_INFINITY));
+        let r = sse2::_mm_cvtpd_epi32(
+            f64x2::new(f64::INFINITY, f64::NEG_INFINITY),
+        );
         assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, 0, 0));
 
         let r = sse2::_mm_cvtpd_epi32(f64x2::new(f64::NAN, f64::NAN));
@@ -3902,7 +3996,7 @@ mod tests {
 
     #[simd_test = "sse2"]
     unsafe fn _mm_cvtsd_ss() {
-        use std::{f64, f32};
+        use std::{f32, f64};
 
         let a = f32x4::new(-1.1, -2.2, 3.3, 4.4);
         let b = f64x2::new(2.0, -5.0);
@@ -3911,17 +4005,26 @@ mod tests {
 
         assert_eq!(r, f32x4::new(2.0, -2.2, 3.3, 4.4));
 
-        let a = f32x4::new(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
+        let a =
+            f32x4::new(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
         let b = f64x2::new(f64::INFINITY, -5.0);
 
         let r = sse2::_mm_cvtsd_ss(a, b);
 
-        assert_eq!(r, f32x4::new(f32::INFINITY, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY));
+        assert_eq!(
+            r,
+            f32x4::new(
+                f32::INFINITY,
+                f32::NEG_INFINITY,
+                f32::MAX,
+                f32::NEG_INFINITY
+            )
+        );
     }
 
     #[simd_test = "sse2"]
     unsafe fn _mm_cvtss_sd() {
-        use std::{f64, f32};
+        use std::{f32, f64};
 
         let a = f64x2::new(-1.1, 2.2);
         let b = f32x4::new(1.0, 2.0, 3.0, 4.0);
@@ -3984,7 +4087,8 @@ mod tests {
         let r = sse2::_mm_cvttps_epi32(a);
         assert_eq!(r, i32x4::new(-1, 2, -3, 6));
 
-        let a = f32x4::new(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
+        let a =
+            f32x4::new(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
         let r = sse2::_mm_cvttps_epi32(a);
         assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
     }
diff --git a/library/stdarch/src/x86/sse3.rs b/library/stdarch/src/x86/sse3.rs
index 21164507f5d4..cba3129d1bcb 100644
--- a/library/stdarch/src/x86/sse3.rs
+++ b/library/stdarch/src/x86/sse3.rs
@@ -106,7 +106,7 @@ pub unsafe fn _mm_moveldup_ps(a: f32x4) -> f32x4 {
 }
 
 #[allow(improper_ctypes)]
-extern {
+extern "C" {
     #[link_name = "llvm.x86.sse3.addsub.ps"]
     fn addsubps(a: f32x4, b: f32x4) -> f32x4;
     #[link_name = "llvm.x86.sse3.addsub.pd"]
@@ -129,7 +129,7 @@ mod tests {
     use stdsimd_test::simd_test;
 
     use v128::*;
-    use x86::sse3 as sse3;
+    use x86::sse3;
 
     #[simd_test = "sse3"]
     unsafe fn _mm_addsub_ps() {
@@ -181,7 +181,8 @@ mod tests {
 
     #[simd_test = "sse3"]
     unsafe fn _mm_lddqu_si128() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let a =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = sse3::_mm_lddqu_si128(&a);
         assert_eq!(a, r);
     }
@@ -213,4 +214,4 @@ mod tests {
         let r = sse3::_mm_loaddup_pd(&d);
         assert_eq!(r, f64x2::new(d, d));
     }
-}
\ No newline at end of file
+}
diff --git a/library/stdarch/src/x86/sse41.rs b/library/stdarch/src/x86/sse41.rs
index a804ed2e9edc..e3cf9154bd1c 100644
--- a/library/stdarch/src/x86/sse41.rs
+++ b/library/stdarch/src/x86/sse41.rs
@@ -15,7 +15,7 @@ pub unsafe fn _mm_blendv_epi8(a: i8x16, b: i8x16, mask: i8x16) -> i8x16 {
 
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pblendw, imm8=0xF0))]
+#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
 pub unsafe fn _mm_blend_epi16(a: i16x8, b: i16x8, imm8: u8) -> i16x8 {
     macro_rules! call {
         ($imm8:expr) => { pblendw(a, b, $imm8) }
@@ -23,7 +23,8 @@ pub unsafe fn _mm_blend_epi16(a: i16x8, b: i16x8, imm8: u8) -> i16x8 {
     constify_imm8!(imm8, call)
 }
 
-/// Blend packed double-precision (64-bit) floating-point elements from `a` and `b` using `mask`
+/// Blend packed double-precision (64-bit) floating-point elements from `a`
+/// and `b` using `mask`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 #[cfg_attr(test, assert_instr(blendvpd))]
@@ -31,7 +32,8 @@ pub unsafe fn _mm_blendv_pd(a: f64x2, b: f64x2, mask: f64x2) -> f64x2 {
     blendvpd(a, b, mask)
 }
 
-/// Blend packed single-precision (32-bit) floating-point elements from `a` and `b` using `mask`
+/// Blend packed single-precision (32-bit) floating-point elements from `a`
+/// and `b` using `mask`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 #[cfg_attr(test, assert_instr(blendvps))]
@@ -39,10 +41,11 @@ pub unsafe fn _mm_blendv_ps(a: f32x4, b: f32x4, mask: f32x4) -> f32x4 {
     blendvps(a, b, mask)
 }
 
-/// Blend packed double-precision (64-bit) floating-point elements from `a` and `b` using control mask `imm2`
+/// Blend packed double-precision (64-bit) floating-point elements from `a`
+/// and `b` using control mask `imm2`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(blendpd, imm2=0b10))]
+#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
 pub unsafe fn _mm_blend_pd(a: f64x2, b: f64x2, imm2: u8) -> f64x2 {
     macro_rules! call {
         ($imm2:expr) => { blendpd(a, b, $imm2) }
@@ -50,10 +53,11 @@ pub unsafe fn _mm_blend_pd(a: f64x2, b: f64x2, imm2: u8) -> f64x2 {
     constify_imm2!(imm2, call)
 }
 
-/// Blend packed single-precision (32-bit) floating-point elements from `a` and `b` using mask `imm4`
+/// Blend packed single-precision (32-bit) floating-point elements from `a`
+/// and `b` using mask `imm4`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(blendps, imm4=0b0101))]
+#[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))]
 pub unsafe fn _mm_blend_ps(a: f32x4, b: f32x4, imm4: u8) -> f32x4 {
     macro_rules! call {
         ($imm4:expr) => { blendps(a, b, $imm4) }
@@ -61,11 +65,12 @@ pub unsafe fn _mm_blend_ps(a: f32x4, b: f32x4, imm4: u8) -> f32x4 {
     constify_imm4!(imm4, call)
 }
 
-/// Extract a single-precision (32-bit) floating-point element from `a`, selected with `imm8`
+/// Extract a single-precision (32-bit) floating-point element from `a`,
+/// selected with `imm8`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 // TODO: Add test for Windows
-#[cfg_attr(all(test, not(windows)), assert_instr(extractps, imm8=0))]
+#[cfg_attr(all(test, not(windows)), assert_instr(extractps, imm8 = 0))]
 pub unsafe fn _mm_extract_ps(a: f32x4, imm8: u8) -> i32 {
     mem::transmute(a.extract(imm8 as u32 & 0b11))
 }
@@ -73,7 +78,7 @@ pub unsafe fn _mm_extract_ps(a: f32x4, imm8: u8) -> i32 {
 /// Extract an 8-bit integer from `a` selected with `imm8`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pextrb, imm8=0))]
+#[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
 pub unsafe fn _mm_extract_epi8(a: i8x16, imm8: u8) -> i8 {
     a.extract((imm8 & 0b1111) as u32)
 }
@@ -82,7 +87,7 @@ pub unsafe fn _mm_extract_epi8(a: i8x16, imm8: u8) -> i8 {
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 // TODO: Add test for Windows
-#[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8=1))]
+#[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8 = 1))]
 pub unsafe fn _mm_extract_epi32(a: i32x4, imm8: u8) -> i32 {
     a.extract((imm8 & 0b11) as u32)
 }
@@ -92,15 +97,16 @@ pub unsafe fn _mm_extract_epi32(a: i32x4, imm8: u8) -> i32 {
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 // TODO: Add test for Windows
-#[cfg_attr(all(test, not(windows)), assert_instr(pextrq, imm8=1))]
+#[cfg_attr(all(test, not(windows)), assert_instr(pextrq, imm8 = 1))]
 pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: u8) -> i64 {
     a.extract((imm8 & 0b1) as u32)
 }
 
-/// Select a single value in `a` to store at some position in `b`, 
+/// Select a single value in `a` to store at some position in `b`,
 /// Then zero elements according to `imm8`.
-/// 
-/// `imm8` specifies which bits from operand `a` will be copied, which bits in the 
+///
+/// `imm8` specifies which bits from operand `a` will be copied, which bits in
+/// the
 /// result they will be copied to, and which bits in the result will be
 /// cleared. The following assignments are made:
 ///
@@ -121,7 +127,7 @@ pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: u8) -> i64 {
 /// element is cleared.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(insertps, imm8=0b1010))]
+#[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))]
 pub unsafe fn _mm_insert_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 {
     macro_rules! call {
         ($imm8:expr) => { insertps(a, b, $imm8) }
@@ -129,59 +135,66 @@ pub unsafe fn _mm_insert_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 {
     constify_imm8!(imm8, call)
 }
 
-/// Return a copy of `a` with the 8-bit integer from `i` inserted at a location specified by `imm8`. 
+/// Return a copy of `a` with the 8-bit integer from `i` inserted at a
+/// location specified by `imm8`.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pinsrb, imm8=0))]
+#[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
 pub unsafe fn _mm_insert_epi8(a: i8x16, i: i8, imm8: u8) -> i8x16 {
     a.replace((imm8 & 0b1111) as u32, i)
 }
 
-/// Return a copy of `a` with the 32-bit integer from `i` inserted at a location specified by `imm8`. 
+/// Return a copy of `a` with the 32-bit integer from `i` inserted at a
+/// location specified by `imm8`.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pinsrd, imm8=0))]
+#[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))]
 pub unsafe fn _mm_insert_epi32(a: i32x4, i: i32, imm8: u8) -> i32x4 {
     a.replace((imm8 & 0b11) as u32, i)
 }
 
-/// Return a copy of `a` with the 64-bit integer from `i` inserted at a location specified by `imm8`. 
+/// Return a copy of `a` with the 64-bit integer from `i` inserted at a
+/// location specified by `imm8`.
 #[cfg(target_arch = "x86_64")]
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pinsrq, imm8=0))]
+#[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))]
 pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: u8) -> i64x2 {
     a.replace((imm8 & 0b1) as u32, i)
 }
 
-/// Compare packed 8-bit integers in `a` and `b`,87 and return packed maximum values in dst. 
+/// Compare packed 8-bit integers in `a` and `b`,87 and return packed maximum
+/// values in dst.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pmaxsb, imm8=0))]
+#[cfg_attr(test, assert_instr(pmaxsb, imm8 = 0))]
 pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
     pmaxsb(a, b)
 }
 
-/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed maximum.
+/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
+/// maximum.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pmaxuw, imm8=0))]
+#[cfg_attr(test, assert_instr(pmaxuw, imm8 = 0))]
 pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
     pmaxuw(a, b)
 }
 
-// Compare packed 32-bit integers in `a` and `b`, and return packed maximum values.
+// Compare packed 32-bit integers in `a` and `b`, and return packed maximum
+// values.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pmaxsd, imm8=0))]
+#[cfg_attr(test, assert_instr(pmaxsd, imm8 = 0))]
 pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
     pmaxsd(a, b)
 }
 
-// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed maximum values.
+// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
+// maximum values.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pmaxud, imm8=0))]
+#[cfg_attr(test, assert_instr(pmaxud, imm8 = 0))]
 pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
     pmaxud(a, b)
 }
@@ -221,7 +234,7 @@ pub unsafe fn _mm_dp_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 {
 }
 
 #[allow(improper_ctypes)]
-extern {
+extern "C" {
     #[link_name = "llvm.x86.sse41.pblendvb"]
     fn pblendvb(a: i8x16, b: i8x16, mask: i8x16) -> i8x16;
     #[link_name = "llvm.x86.sse41.blendvpd"]
@@ -261,14 +274,18 @@ mod tests {
 
     #[simd_test = "sse4.1"]
     unsafe fn _mm_blendv_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
-        let mask = i8x16::new(
-            0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        let mask =
+            i8x16::new(0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = i8x16::new(
-            0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
+            0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
+        );
         assert_eq!(sse41::_mm_blendv_epi8(a, b, mask), e);
     }
 
@@ -286,7 +303,7 @@ mod tests {
     unsafe fn _mm_blendv_ps() {
         let a = f32x4::splat(0.0);
         let b = f32x4::splat(1.0);
-        let mask = mem::transmute(i32x4::new(0,-1, 0, -1));
+        let mask = mem::transmute(i32x4::new(0, -1, 0, -1));
         let r = sse41::_mm_blendv_ps(a, b, mask);
         let e = f32x4::new(0.0, 1.0, 0.0, 1.0);
         assert_eq!(r, e);
@@ -330,7 +347,8 @@ mod tests {
 
     #[simd_test = "sse4.1"]
     unsafe fn _mm_extract_epi8() {
-        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = sse41::_mm_extract_epi8(a, 1);
         assert_eq!(r, 1);
         let r = sse41::_mm_extract_epi8(a, 17);
@@ -398,10 +416,22 @@ mod tests {
 
     #[simd_test = "sse4.1"]
     unsafe fn _mm_max_epi8() {
-        let a = i8x16::new(1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29, 32);
-        let b = i8x16::new(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = i8x16::new(
+            1, 4, 5, 8, 9, 12, 13, 16,
+            17, 20, 21, 24, 25, 28, 29, 32,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b = i8x16::new(
+            2, 3, 6, 7, 10, 11, 14, 15,
+            18, 19, 22, 23, 26, 27, 30, 31,
+        );
         let r = sse41::_mm_max_epi8(a, b);
-        let e = i8x16::new(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x16::new(
+            2, 4, 6, 8, 10, 12, 14, 16,
+            18, 20, 22, 24, 26, 28, 30, 32,
+        );
         assert_eq!(r, e);
     }
 
diff --git a/library/stdarch/src/x86/sse42.rs b/library/stdarch/src/x86/sse42.rs
index cc5a827d35f6..eb551684c9a2 100644
--- a/library/stdarch/src/x86/sse42.rs
+++ b/library/stdarch/src/x86/sse42.rs
@@ -15,7 +15,8 @@ pub const _SIDD_SWORD_OPS: i8 = 0b00000011;
 
 /// For each character in `a`, find if it is in `b` *(Default)*
 pub const _SIDD_CMP_EQUAL_ANY: i8 = 0b00000000;
-/// For each character in `a`, determine if `b[0] <= c <= b[1] or b[1] <= c <= b[2]...`
+/// For each character in `a`, determine if `b[0] <= c <= b[1] or b[1] <= c <=
+/// b[2]...`
 pub const _SIDD_CMP_RANGES: i8 = 0b00000100;
 /// The strings defined by `a` and `b` are equal
 pub const _SIDD_CMP_EQUAL_EACH: i8 = 0b00001000;
@@ -46,11 +47,7 @@ pub const _SIDD_UNIT_MASK: i8 = 0b01000000;
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))]
-pub unsafe fn _mm_cmpistrm(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> u8x16 {
+pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 {
     macro_rules! call {
         ($imm8:expr) => { pcmpistrm128(a, b, $imm8) }
     }
@@ -58,9 +55,9 @@ pub unsafe fn _mm_cmpistrm(
 }
 
 /// Compare packed strings with implicit lengths in `a` and `b` using the
-/// control in `imm8`, and return the generated index. Similar to [`_mm_cmpestri`]
-/// with the excception that [`_mm_cmpestri`] requires the lengths of `a` and
-/// `b` to be explicitly specified.
+/// control in `imm8`, and return the generated index. Similar to
+/// [`_mm_cmpestri`] with the excception that [`_mm_cmpestri`] requires the
+/// lengths of `a` and `b` to be explicitly specified.
 ///
 /// # Control modes
 ///
@@ -105,7 +102,8 @@ pub unsafe fn _mm_cmpistrm(
 /// use stdsimd::simd::u8x16;
 /// use stdsimd::vendor::{__m128i, _mm_cmpistri, _SIDD_CMP_EQUAL_ORDERED};
 ///
-/// let haystack = b"This is a long string of text data\r\n\tthat extends multiple lines";
+/// let haystack = b"This is a long string of text data\r\n\tthat extends
+/// multiple lines";
 /// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0";
 ///
 /// let a = __m128i::from(u8x16::load(needle, 0));
@@ -171,8 +169,8 @@ pub unsafe fn _mm_cmpistrm(
 /// # }
 /// ```
 ///
-/// Find the index of the first character in the haystack that is within a range
-/// of characters.
+/// Find the index of the first character in the haystack that is within a
+/// range of characters.
 ///
 /// ```
 /// # #![feature(cfg_target_feature)]
@@ -269,11 +267,7 @@ pub unsafe fn _mm_cmpistrm(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistri(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i8) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpistri128(a, b, $imm8) }
     }
@@ -286,11 +280,7 @@ pub unsafe fn _mm_cmpistri(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistrz(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i8) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpistriz128(a, b, $imm8) }
     }
@@ -303,11 +293,7 @@ pub unsafe fn _mm_cmpistrz(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistrc(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i8) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpistric128(a, b, $imm8) }
     }
@@ -320,11 +306,7 @@ pub unsafe fn _mm_cmpistrc(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistrs(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i8) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpistris128(a, b, $imm8) }
     }
@@ -336,11 +318,7 @@ pub unsafe fn _mm_cmpistrs(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistro(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i8) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpistrio128(a, b, $imm8) }
     }
@@ -353,11 +331,7 @@ pub unsafe fn _mm_cmpistro(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistra(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i8) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpistria128(a, b, $imm8) }
     }
@@ -370,11 +344,7 @@ pub unsafe fn _mm_cmpistra(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))]
 pub unsafe fn _mm_cmpestrm(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> u8x16 {
     macro_rules! call {
         ($imm8:expr) => { pcmpestrm128(a, la, b, lb, $imm8) }
@@ -383,9 +353,9 @@ pub unsafe fn _mm_cmpestrm(
 }
 
 /// Compare packed strings `a` and `b` with lengths `la` and `lb` using the
-/// control in `imm8`, and return the generated index. Similar to [`_mm_cmpistri`]
-/// with the excception that [`_mm_cmpistri`] implicityly determines the length of
-/// `a` and `b`.
+/// control in `imm8`, and return the generated index. Similar to
+/// [`_mm_cmpistri`] with the excception that [`_mm_cmpistri`] implicityly
+/// determines the length of `a` and `b`.
 ///
 /// # Control modes
 ///
@@ -468,11 +438,7 @@ pub unsafe fn _mm_cmpestrm(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestri(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpestri128(a, la, b, lb, $imm8) }
@@ -487,11 +453,7 @@ pub unsafe fn _mm_cmpestri(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestrz(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpestriz128(a, la, b, lb, $imm8) }
@@ -506,11 +468,7 @@ pub unsafe fn _mm_cmpestrz(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestrc(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpestric128(a, la, b, lb, $imm8) }
@@ -525,11 +483,7 @@ pub unsafe fn _mm_cmpestrc(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestrs(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpestris128(a, la, b, lb, $imm8) }
@@ -544,11 +498,7 @@ pub unsafe fn _mm_cmpestrs(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestro(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpestrio128(a, la, b, lb, $imm8) }
@@ -564,11 +514,7 @@ pub unsafe fn _mm_cmpestro(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestra(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
     macro_rules! call {
         ($imm8:expr) => { pcmpestria128(a, la, b, lb, $imm8) }
@@ -624,22 +570,35 @@ pub unsafe fn _mm_cmpgt_epi64(a: i64x2, b: i64x2) -> i64x2 {
 }
 
 #[allow(improper_ctypes)]
-extern {
+extern "C" {
     // SSE 4.2 string and text comparison ops
     #[link_name = "llvm.x86.sse42.pcmpestrm128"]
-    fn pcmpestrm128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> u8x16;
+    fn pcmpestrm128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> u8x16;
     #[link_name = "llvm.x86.sse42.pcmpestri128"]
-    fn pcmpestri128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestri128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8)
+        -> i32;
     #[link_name = "llvm.x86.sse42.pcmpestriz128"]
-    fn pcmpestriz128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestriz128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
     #[link_name = "llvm.x86.sse42.pcmpestric128"]
-    fn pcmpestric128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestric128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
     #[link_name = "llvm.x86.sse42.pcmpestris128"]
-    fn pcmpestris128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestris128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
     #[link_name = "llvm.x86.sse42.pcmpestrio128"]
-    fn pcmpestrio128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestrio128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
     #[link_name = "llvm.x86.sse42.pcmpestria128"]
-    fn pcmpestria128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestria128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
     #[link_name = "llvm.x86.sse42.pcmpistrm128"]
     fn pcmpistrm128(a: __m128i, b: __m128i, imm8: i8) -> u8x16;
     #[link_name = "llvm.x86.sse42.pcmpistri128"]
@@ -685,7 +644,8 @@ mod tests {
         ptr::copy_nonoverlapping(
             s.get_unchecked(0) as *const u8 as *const u8,
             slice.get_unchecked_mut(0) as *mut u8 as *mut u8,
-            s.len());
+            s.len(),
+        );
         __m128i::from(u8x16::load(slice, 0))
     }
 
@@ -694,8 +654,11 @@ mod tests {
         let a = str_to_m128i(b"Hello! Good-Bye!");
         let b = str_to_m128i(b"hello! good-bye!");
         let i = sse42::_mm_cmpistrm(a, b, sse42::_SIDD_UNIT_MASK);
-        let res = u8x16::new(0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
-                             0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let res = u8x16::new(
+            0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+            0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff,
+        );
         assert_eq!(i, res);
     }
 
@@ -733,14 +696,23 @@ mod tests {
 
     #[simd_test = "sse4.2"]
     unsafe fn _mm_cmpistro() {
-        let a_bytes = u8x16::new(0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
-                                 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let b_bytes = u8x16::new(0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
-                                 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a_bytes = u8x16::new(
+            0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
+            0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b_bytes = u8x16::new(
+            0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
+            0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        );
         let a = __m128i::from(a_bytes);
         let b = __m128i::from(b_bytes);
         let i = sse42::_mm_cmpistro(
-                a, b, sse42::_SIDD_UWORD_OPS | sse42::_SIDD_UNIT_MASK);
+            a,
+            b,
+            sse42::_SIDD_UWORD_OPS | sse42::_SIDD_UNIT_MASK,
+        );
         assert_eq!(0, i);
     }
 
@@ -757,15 +729,20 @@ mod tests {
         let a = str_to_m128i(b"Hello!");
         let b = str_to_m128i(b"Hello.");
         let i = sse42::_mm_cmpestrm(a, 5, b, 5, sse42::_SIDD_UNIT_MASK);
-        assert_eq!(i, u8x16::new(0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00,
-                                 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00));
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let r = u8x16::new(
+            0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+        );
+        assert_eq!(i, r);
     }
 
     #[simd_test = "sse4.2"]
     unsafe fn _mm_cmpestri() {
         let a = str_to_m128i(b"bar - garbage");
         let b = str_to_m128i(b"foobar");
-        let i = sse42::_mm_cmpestri(a, 3, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED);
+        let i =
+            sse42::_mm_cmpestri(a, 3, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED);
         assert_eq!(3, i);
     }
 
@@ -773,8 +750,8 @@ mod tests {
     unsafe fn _mm_cmpestrz() {
         let a = str_to_m128i(b"");
         let b = str_to_m128i(b"Hello");
-        let i = sse42::_mm_cmpestrz(
-                a, 16, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED);
+        let i =
+            sse42::_mm_cmpestrz(a, 16, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED);
         assert_eq!(1, i);
     }
 
@@ -782,19 +759,20 @@ mod tests {
     unsafe fn _mm_cmpestrc() {
         let va = str_to_m128i(b"!!!!!!!!");
         let vb = str_to_m128i(b"        ");
-        let i = sse42::_mm_cmpestrc(
-                va, 7, vb, 7, sse42::_SIDD_UNIT_MASK);
+        let i = sse42::_mm_cmpestrc(va, 7, vb, 7, sse42::_SIDD_UNIT_MASK);
         assert_eq!(0, i);
     }
 
     #[simd_test = "sse4.2"]
     unsafe fn _mm_cmpestrs() {
-        let a_bytes = u8x16::new(0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
-                                 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a_bytes = u8x16::new(
+            0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
+            0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        );
         let a = __m128i::from(a_bytes);
         let b = __m128i::from(u8x16::splat(0x00));
-        let i = sse42::_mm_cmpestrs(
-                a, 8, b, 0, sse42::_SIDD_UWORD_OPS);
+        let i = sse42::_mm_cmpestrs(a, 8, b, 0, sse42::_SIDD_UWORD_OPS);
         assert_eq!(0, i);
     }
 
@@ -802,8 +780,7 @@ mod tests {
     unsafe fn _mm_cmpestro() {
         let a = str_to_m128i(b"Hello");
         let b = str_to_m128i(b"World");
-        let i = sse42::_mm_cmpestro(
-                a, 5, b, 5, sse42::_SIDD_UBYTE_OPS);
+        let i = sse42::_mm_cmpestro(a, 5, b, 5, sse42::_SIDD_UBYTE_OPS);
         assert_eq!(0, i);
     }
 
@@ -812,7 +789,12 @@ mod tests {
         let a = str_to_m128i(b"Cannot match a");
         let b = str_to_m128i(b"Null after 14");
         let i = sse42::_mm_cmpestra(
-                a, 14, b, 16, sse42::_SIDD_CMP_EQUAL_EACH | sse42::_SIDD_UNIT_MASK);
+            a,
+            14,
+            b,
+            16,
+            sse42::_SIDD_CMP_EQUAL_EACH | sse42::_SIDD_UNIT_MASK,
+        );
         assert_eq!(1, i);
     }
 
diff --git a/library/stdarch/src/x86/ssse3.rs b/library/stdarch/src/x86/ssse3.rs
index 111bc8ff99fd..b5c9d3ae9ef6 100644
--- a/library/stdarch/src/x86/ssse3.rs
+++ b/library/stdarch/src/x86/ssse3.rs
@@ -13,7 +13,8 @@ pub unsafe fn _mm_abs_epi8(a: i8x16) -> u8x16 {
     pabsb128(a)
 }
 
-/// Compute the absolute value of each of the packed 16-bit signed integers in `a` and
+/// Compute the absolute value of each of the packed 16-bit signed integers in
+/// `a` and
 /// return the 16-bit unsigned integer
 #[inline(always)]
 #[target_feature = "+ssse3"]
@@ -22,7 +23,8 @@ pub unsafe fn _mm_abs_epi16(a: i16x8) -> u16x8 {
     pabsw128(a)
 }
 
-/// Compute the absolute value of each of the packed 32-bit signed integers in `a` and
+/// Compute the absolute value of each of the packed 32-bit signed integers in
+/// `a` and
 /// return the 32-bit unsigned integer
 #[inline(always)]
 #[target_feature = "+ssse3"]
@@ -82,7 +84,9 @@ pub unsafe fn _mm_alignr_epi8(a: i8x16, b: i8x16, n: i32) -> i8x16 {
         (a, b, n)
     };
 
-    const fn add(a: u32, b: u32) -> u32 { a + b }
+    const fn add(a: u32, b: u32) -> u32 {
+        a + b
+    }
     macro_rules! shuffle {
         ($shift:expr) => {
             simd_shuffle16(b, a, [
@@ -98,14 +102,22 @@ pub unsafe fn _mm_alignr_epi8(a: i8x16, b: i8x16, n: i32) -> i8x16 {
         }
     }
     match n {
-        0 => shuffle!(0), 1 => shuffle!(1),
-        2 => shuffle!(2), 3 => shuffle!(3),
-        4 => shuffle!(4), 5 => shuffle!(5),
-        6 => shuffle!(6), 7 => shuffle!(7),
-        8 => shuffle!(8), 9 => shuffle!(9),
-        10 => shuffle!(10), 11 => shuffle!(11),
-        12 => shuffle!(12), 13 => shuffle!(13),
-        14 => shuffle!(14), 15 => shuffle!(15),
+        0 => shuffle!(0),
+        1 => shuffle!(1),
+        2 => shuffle!(2),
+        3 => shuffle!(3),
+        4 => shuffle!(4),
+        5 => shuffle!(5),
+        6 => shuffle!(6),
+        7 => shuffle!(7),
+        8 => shuffle!(8),
+        9 => shuffle!(9),
+        10 => shuffle!(10),
+        11 => shuffle!(11),
+        12 => shuffle!(12),
+        13 => shuffle!(13),
+        14 => shuffle!(14),
+        15 => shuffle!(15),
         _ => shuffle!(16),
     }
 }
@@ -223,7 +235,7 @@ pub unsafe fn _mm_sign_epi32(a: i32x4, b: i32x4) -> i32x4 {
 }
 
 #[allow(improper_ctypes)]
-extern {
+extern "C" {
     #[link_name = "llvm.x86.ssse3.pabs.b.128"]
     fn pabsb128(a: i8x16) -> u8x16;
 
@@ -275,7 +287,7 @@ mod tests {
     use stdsimd_test::simd_test;
 
     use v128::*;
-    use x86::ssse3 as ssse3;
+    use x86::ssse3;
 
     #[simd_test = "ssse3"]
     unsafe fn _mm_abs_epi8() {
@@ -297,44 +309,36 @@ mod tests {
 
     #[simd_test = "ssse3"]
     unsafe fn _mm_shuffle_epi8() {
-        let a = u8x16::new(
-            1, 2, 3, 4,
-            5, 6, 7, 8,
-            9, 10, 11, 12,
-            13, 14, 15, 16,
-        );
-        let b = u8x16::new(
-            4, 128, 4, 3,
-            24, 12, 6, 19,
-            12, 5, 5, 10,
-            4, 1, 8, 0,
-        );
-        let expected = u8x16::new(
-            5, 0, 5, 4,
-            9, 13, 7, 4,
-            13, 6, 6, 11,
-            5, 2, 9, 1,
-        );
+        let a =
+            u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b =
+            u8x16::new(4, 128, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        let expected =
+            u8x16::new(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
         let r = ssse3::_mm_shuffle_epi8(a, b);
         assert_eq!(r, expected);
     }
 
     #[simd_test = "ssse3"]
     unsafe fn _mm_alignr_epi8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b = i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        let a =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b =
+            i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
         let r = ssse3::_mm_alignr_epi8(a, b, 33);
         assert_eq!(r, i8x16::splat(0));
 
         let r = ssse3::_mm_alignr_epi8(a, b, 17);
-        let expected = i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0);
+        let expected =
+            i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0);
         assert_eq!(r, expected);
 
         let r = ssse3::_mm_alignr_epi8(a, b, 16);
         assert_eq!(r, a);
 
         let r = ssse3::_mm_alignr_epi8(a, b, 15);
-        let expected = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let expected =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         assert_eq!(r, expected);
 
         let r = ssse3::_mm_alignr_epi8(a, b, 0);
@@ -397,8 +401,10 @@ mod tests {
 
     #[simd_test = "ssse3"]
     unsafe fn _mm_maddubs_epi16() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b = i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        let a =
+            u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b =
+            i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
         let expected = i16x8::new(130, 24, 192, 194, 158, 175, 66, 120);
         let r = ssse3::_mm_maddubs_epi16(a, b);
         assert_eq!(r, expected);
@@ -415,9 +421,21 @@ mod tests {
 
     #[simd_test = "ssse3"]
     unsafe fn _mm_sign_epi8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16);
-        let b = i8x16::new(4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0);
-        let expected = i8x16::new(1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = i8x16::new(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, -14, -15, 16,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b = i8x16::new(
+            4, 63, -4, 3, 24, 12, -6, -19,
+            12, 5, -5, 10, 4, 1, -8, 0,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let expected = i8x16::new(
+            1, 2, -3, 4, 5, 6, -7, -8,
+            9, 10, -11, 12, 13, -14, 15, 0,
+        );
         let r = ssse3::_mm_sign_epi8(a, b);
         assert_eq!(r, expected);
     }
diff --git a/library/stdarch/src/x86/tbm.rs b/library/stdarch/src/x86/tbm.rs
index d38fc3fa10f4..c6be38adc8d8 100644
--- a/library/stdarch/src/x86/tbm.rs
+++ b/library/stdarch/src/x86/tbm.rs
@@ -1,16 +1,21 @@
 //! Trailing Bit Manipulation (TBM) instruction set.
 //!
 //! The reference is [AMD64 Architecture Programmer's Manual, Volume 3:
-//! General-Purpose and System
-//! Instructions](http://support.amd.com/TechDocs/24594.pdf).
+//! General-Purpose and System Instructions][amd64_ref].
 //!
-//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_.28Trailing_Bit_Manipulation.29)
-//! provides a quick overview of the available instructions.
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the available
+//! instructions.
+//!
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.
+//! 28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32
+// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select:
+// intrinsic %llvm.x86.tbm.bextri.u32
 /*
 #[allow(dead_code)]
 extern "C" {
@@ -39,8 +44,8 @@ pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
 /// Extracts bits of `a` specified by `control` into
 /// the least significant bits of the result.
 ///
-/// Bits [7,0] of `control` specify the index to the first bit in the range to be
-/// extracted, and bits [15,8] specify the length of the range.
+/// Bits [7,0] of `control` specify the index to the first bit in the range to
+/// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
 #[target_feature = "+tbm"]
 pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
@@ -50,8 +55,8 @@ pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
 /// Extracts bits of `a` specified by `control` into
 /// the least significant bits of the result.
 ///
-/// Bits [7,0] of `control` specify the index to the first bit in the range to be
-/// extracted, and bits [15,8] specify the length of the range.
+/// Bits [7,0] of `control` specify the index to the first bit in the range to
+/// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
 #[target_feature = "+tbm"]
 pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
@@ -122,7 +127,8 @@ pub unsafe fn _blcic_u64(x: u64) -> u64 {
     !x & (x.wrapping_add(1))
 }
 
-/// Sets the least significant zero bit of `x` and clears all bits above that bit.
+/// Sets the least significant zero bit of `x` and clears all bits above
+/// that bit.
 ///
 /// If there is no zero bit in `x`, it sets all the bits.
 #[inline(always)]
@@ -132,7 +138,8 @@ pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
     x ^ (x.wrapping_add(1))
 }
 
-/// Sets the least significant zero bit of `x` and clears all bits above that bit.
+/// Sets the least significant zero bit of `x` and clears all bits above
+/// that bit.
 ///
 /// If there is no zero bit in `x`, it sets all the bits.
 #[inline(always)]
@@ -272,162 +279,152 @@ mod tests {
 
     #[simd_test = "tbm"]
     unsafe fn _blcfill_u32() {
-        assert_eq!(
-            tbm::_blcfill_u32(0b0101_0111u32),
-            0b0101_0000u32);
-        assert_eq!(
-            tbm::_blcfill_u32(0b1111_1111u32),
-            0u32);
+        assert_eq!(tbm::_blcfill_u32(0b0101_0111u32), 0b0101_0000u32);
+        assert_eq!(tbm::_blcfill_u32(0b1111_1111u32), 0u32);
     }
 
     #[simd_test = "tbm"]
     #[cfg(not(target_arch = "x86"))]
     unsafe fn _blcfill_u64() {
-        assert_eq!(
-            tbm::_blcfill_u64(0b0101_0111u64),
-            0b0101_0000u64);
-        assert_eq!(
-            tbm::_blcfill_u64(0b1111_1111u64),
-            0u64);
+        assert_eq!(tbm::_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
+        assert_eq!(tbm::_blcfill_u64(0b1111_1111u64), 0u64);
     }
 
     #[simd_test = "tbm"]
     unsafe fn _blci_u32() {
         assert_eq!(
             tbm::_blci_u32(0b0101_0000u32),
-            0b1111_1111_1111_1111_1111_1111_1111_1110u32);
+            0b1111_1111_1111_1111_1111_1111_1111_1110u32
+        );
         assert_eq!(
             tbm::_blci_u32(0b1111_1111u32),
-            0b1111_1111_1111_1111_1111_1110_1111_1111u32);
+            0b1111_1111_1111_1111_1111_1110_1111_1111u32
+        );
     }
 
     #[simd_test = "tbm"]
     #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
     unsafe fn _blci_u64() {
         assert_eq!(
             tbm::_blci_u64(0b0101_0000u64),
-            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64);
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64
+        );
         assert_eq!(
             tbm::_blci_u64(0b1111_1111u64),
-            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64);
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64
+        );
     }
 
     #[simd_test = "tbm"]
     unsafe fn _blcic_u32() {
-        assert_eq!(
-            tbm::_blcic_u32(0b0101_0001u32),
-            0b0000_0010u32);
-        assert_eq!(
-            tbm::_blcic_u32(0b1111_1111u32),
-            0b1_0000_0000u32);
+        assert_eq!(tbm::_blcic_u32(0b0101_0001u32), 0b0000_0010u32);
+        assert_eq!(tbm::_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32);
     }
 
     #[simd_test = "tbm"]
     #[cfg(not(target_arch = "x86"))]
     unsafe fn _blcic_u64() {
-        assert_eq!(
-            tbm::_blcic_u64(0b0101_0001u64),
-            0b0000_0010u64);
-        assert_eq!(
-            tbm::_blcic_u64(0b1111_1111u64),
-            0b1_0000_0000u64);
+        assert_eq!(tbm::_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
+        assert_eq!(tbm::_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
     }
 
     #[simd_test = "tbm"]
     unsafe fn _blcmsk_u32() {
-        assert_eq!(
-            tbm::_blcmsk_u32(0b0101_0001u32),
-            0b0000_0011u32);
-        assert_eq!(
-            tbm::_blcmsk_u32(0b1111_1111u32),
-            0b1_1111_1111u32);
+        assert_eq!(tbm::_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32);
+        assert_eq!(tbm::_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32);
     }
 
     #[simd_test = "tbm"]
     #[cfg(not(target_arch = "x86"))]
     unsafe fn _blcmsk_u64() {
-        assert_eq!(
-            tbm::_blcmsk_u64(0b0101_0001u64),
-            0b0000_0011u64);
-        assert_eq!(
-            tbm::_blcmsk_u64(0b1111_1111u64),
-            0b1_1111_1111u64);
+        assert_eq!(tbm::_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
+        assert_eq!(tbm::_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
     }
 
     #[simd_test = "tbm"]
     unsafe fn _blcs_u32() {
-       assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
-       assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
+        assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
+        assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
     }
 
     #[simd_test = "tbm"]
     #[cfg(not(target_arch = "x86"))]
     unsafe fn _blcs_u64() {
-       assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
-       assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
+        assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
+        assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
     }
 
     #[simd_test = "tbm"]
     unsafe fn _blsfill_u32() {
-        assert_eq!(
-            tbm::_blsfill_u32(0b0101_0100u32),
-            0b0101_0111u32);
+        assert_eq!(tbm::_blsfill_u32(0b0101_0100u32), 0b0101_0111u32);
         assert_eq!(
             tbm::_blsfill_u32(0u32),
-            0b1111_1111_1111_1111_1111_1111_1111_1111u32);
+            0b1111_1111_1111_1111_1111_1111_1111_1111u32
+        );
     }
 
     #[simd_test = "tbm"]
     #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
     unsafe fn _blsfill_u64() {
-        assert_eq!(
-            tbm::_blsfill_u64(0b0101_0100u64),
-            0b0101_0111u64);
+        assert_eq!(tbm::_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
         assert_eq!(
             tbm::_blsfill_u64(0u64),
-            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
+        );
     }
 
     #[simd_test = "tbm"]
     unsafe fn _blsic_u32() {
         assert_eq!(
             tbm::_blsic_u32(0b0101_0100u32),
-            0b1111_1111_1111_1111_1111_1111_1111_1011u32);
+            0b1111_1111_1111_1111_1111_1111_1111_1011u32
+        );
         assert_eq!(
             tbm::_blsic_u32(0u32),
-            0b1111_1111_1111_1111_1111_1111_1111_1111u32);
+            0b1111_1111_1111_1111_1111_1111_1111_1111u32
+        );
     }
 
     #[simd_test = "tbm"]
     #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
     unsafe fn _blsic_u64() {
         assert_eq!(
             tbm::_blsic_u64(0b0101_0100u64),
-            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64);
-       assert_eq!(
-           tbm::_blsic_u64(0u64),
-           0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64
+        );
+        assert_eq!(
+            tbm::_blsic_u64(0u64),
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
+        );
     }
 
     #[simd_test = "tbm"]
     unsafe fn _t1mskc_u32() {
-       assert_eq!(
-           tbm::_t1mskc_u32(0b0101_0111u32),
-           0b1111_1111_1111_1111_1111_1111_1111_1000u32);
-       assert_eq!(
-           tbm::_t1mskc_u32(0u32),
-           0b1111_1111_1111_1111_1111_1111_1111_1111u32);
+        assert_eq!(
+            tbm::_t1mskc_u32(0b0101_0111u32),
+            0b1111_1111_1111_1111_1111_1111_1111_1000u32
+        );
+        assert_eq!(
+            tbm::_t1mskc_u32(0u32),
+            0b1111_1111_1111_1111_1111_1111_1111_1111u32
+        );
     }
 
     #[simd_test = "tbm"]
     #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
     unsafe fn _t1mksc_u64() {
-       assert_eq!(
-           tbm::_t1mskc_u64(0b0101_0111u64),
-           0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64);
-       assert_eq!(
-           tbm::_t1mskc_u64(0u64),
-           0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
+        assert_eq!(
+            tbm::_t1mskc_u64(0b0101_0111u64),
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64
+        );
+        assert_eq!(
+            tbm::_t1mskc_u64(0u64),
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
+        );
     }
 
     #[simd_test = "tbm"]
diff --git a/library/stdarch/stdsimd-test/assert-instr-macro/build.rs b/library/stdarch/stdsimd-test/assert-instr-macro/build.rs
index dc42e265b737..45a868441c4a 100644
--- a/library/stdarch/stdsimd-test/assert-instr-macro/build.rs
+++ b/library/stdarch/stdsimd-test/assert-instr-macro/build.rs
@@ -2,7 +2,10 @@ use std::env;
 
 fn main() {
     println!("cargo:rerun-if-changed=build.rs");
-    let opt_level = env::var("OPT_LEVEL").ok().and_then(|s| s.parse().ok()).unwrap_or(0);
+    let opt_level = env::var("OPT_LEVEL")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(0);
     let profile = env::var("PROFILE").unwrap_or(String::new());
     if profile == "release" || opt_level >= 2 {
         println!("cargo:rustc-cfg=optimized");
diff --git a/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs b/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs
index 22f5dcef24ea..9e5df683e9a3 100644
--- a/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs
@@ -21,13 +21,13 @@ extern crate synom;
 use proc_macro2::TokenStream;
 
 #[proc_macro_attribute]
-pub fn assert_instr(attr: proc_macro::TokenStream,
-                    item: proc_macro::TokenStream)
-    -> proc_macro::TokenStream
-{
+pub fn assert_instr(
+    attr: proc_macro::TokenStream, item: proc_macro::TokenStream
+) -> proc_macro::TokenStream {
     let invoc = syn::parse::<Invoc>(attr)
         .expect("expected #[assert_instr(instr, a = b, ...)]");
-    let item = syn::parse::<syn::Item>(item).expect("must be attached to an item");
+    let item =
+        syn::parse::<syn::Item>(item).expect("must be attached to an item");
     let func = match item.node {
         syn::ItemKind::Fn(ref f) => f,
         _ => panic!("must be attached to a function"),
@@ -40,10 +40,11 @@ pub fn assert_instr(attr: proc_macro::TokenStream,
         (quote! { #[ignore] }).into()
     };
     let name = &func.ident;
-    let assert_name = syn::Ident::from(&format!("assert_{}_{}",
-                                                name.sym.as_str(),
-                                                instr.sym.as_str())[..]);
-    let shim_name = syn::Ident::from(&format!("{}_shim", name.sym.as_str())[..]);
+    let assert_name = syn::Ident::from(
+        &format!("assert_{}_{}", name.sym.as_str(), instr.sym.as_str())[..],
+    );
+    let shim_name =
+        syn::Ident::from(&format!("{}_shim", name.sym.as_str())[..]);
     let (to_test, test_name) = if invoc.args.len() == 0 {
         (TokenStream::empty(), &func.ident)
     } else {
@@ -69,16 +70,29 @@ pub fn assert_instr(attr: proc_macro::TokenStream,
                 }
             };
         }
-        let attrs = item.attrs.iter().filter(|attr| {
-            attr.path.segments.get(0).item().ident.sym.as_str().starts_with("target")
-        }).collect::<Vec<_>>();
+        let attrs = item.attrs
+            .iter()
+            .filter(|attr| {
+                attr.path
+                    .segments
+                    .get(0)
+                    .item()
+                    .ident
+                    .sym
+                    .as_str()
+                    .starts_with("target")
+            })
+            .collect::<Vec<_>>();
         let attrs = Append(&attrs);
-        (quote! {
-            #attrs
-            unsafe fn #shim_name(#(#inputs),*) #ret {
-                #name(#(#input_vals),*)
-            }
-        }.into(), &shim_name)
+        (
+            quote! {
+                #attrs
+                unsafe fn #shim_name(#(#inputs),*) #ret {
+                    #name(#(#input_vals),*)
+                }
+            }.into(),
+            &shim_name,
+        )
     };
 
     let tts: TokenStream = quote! {
@@ -128,8 +142,9 @@ impl synom::Synom for Invoc {
 struct Append<T>(T);
 
 impl<T> quote::ToTokens for Append<T>
-    where T: Clone + IntoIterator,
-          T::Item: quote::ToTokens
+where
+    T: Clone + IntoIterator,
+    T::Item: quote::ToTokens,
 {
     fn to_tokens(&self, tokens: &mut quote::Tokens) {
         for item in self.0.clone() {
diff --git a/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs b/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs
index 1543a2767710..3777feae798e 100644
--- a/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs
+++ b/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs
@@ -1,16 +1,16 @@
 //! Implementation of the `#[simd_test]` macro
 //!
-//! This macro expands to a `#[test]` function which tests the local machine for
-//! the appropriate cfg before calling the inner test function.
+//! This macro expands to a `#[test]` function which tests the local machine
+//! for the appropriate cfg before calling the inner test function.
 
 #![feature(proc_macro)]
 
+extern crate proc_macro2;
+extern crate proc_macro;
 #[macro_use]
 extern crate quote;
-extern crate proc_macro;
-extern crate proc_macro2;
 
-use proc_macro2::{TokenStream, Term, TokenNode, TokenTree};
+use proc_macro2::{Term, TokenNode, TokenStream, TokenTree};
 use proc_macro2::Literal;
 
 fn string(s: &str) -> TokenTree {
@@ -22,8 +22,9 @@ fn string(s: &str) -> TokenTree {
 }
 
 #[proc_macro_attribute]
-pub fn simd_test(attr: proc_macro::TokenStream,
-                 item: proc_macro::TokenStream) -> proc_macro::TokenStream {
+pub fn simd_test(
+    attr: proc_macro::TokenStream, item: proc_macro::TokenStream
+) -> proc_macro::TokenStream {
     let tokens = TokenStream::from(attr).into_iter().collect::<Vec<_>>();
     if tokens.len() != 2 {
         panic!("expected #[simd_test = \"feature\"]");
@@ -37,8 +38,9 @@ pub fn simd_test(attr: proc_macro::TokenStream,
         TokenNode::Literal(ref l) => l.to_string(),
         _ => panic!("expected #[simd_test = \"feature\"]"),
     };
-    let enable_feature = enable_feature.trim_left_matches('"')
-                                       .trim_right_matches('"');
+    let enable_feature = enable_feature
+        .trim_left_matches('"')
+        .trim_right_matches('"');
     let enable_feature = string(&format!("+{}", enable_feature));
     let item = TokenStream::from(item);
     let name = find_name(item.clone());
@@ -67,7 +69,7 @@ fn find_name(item: TokenStream) -> Term {
     while let Some(tok) = tokens.next() {
         if let TokenNode::Term(word) = tok.kind {
             if word.as_str() == "fn" {
-                break
+                break;
             }
         }
     }
diff --git a/library/stdarch/stdsimd-test/src/lib.rs b/library/stdarch/stdsimd-test/src/lib.rs
index 8bad780acb14..5de401695f61 100644
--- a/library/stdarch/stdsimd-test/src/lib.rs
+++ b/library/stdarch/stdsimd-test/src/lib.rs
@@ -7,12 +7,12 @@
 #![feature(proc_macro)]
 
 extern crate assert_instr_macro;
-extern crate simd_test_macro;
 extern crate backtrace;
 extern crate cc;
-extern crate rustc_demangle;
 #[macro_use]
 extern crate lazy_static;
+extern crate rustc_demangle;
+extern crate simd_test_macro;
 
 use std::collections::HashMap;
 use std::env;
@@ -23,7 +23,8 @@ pub use assert_instr_macro::*;
 pub use simd_test_macro::*;
 
 lazy_static! {
-    static ref DISASSEMBLY: HashMap<String, Vec<Function>> = disassemble_myself();
+    static ref DISASSEMBLY: HashMap<String, Vec<Function>>
+        = disassemble_myself();
 }
 
 struct Function {
@@ -37,14 +38,22 @@ struct Instruction {
 fn disassemble_myself() -> HashMap<String, Vec<Function>> {
     let me = env::current_exe().expect("failed to get current exe");
 
-    if cfg!(target_arch = "x86_64") &&
-        cfg!(target_os = "windows") &&
-        cfg!(target_env = "msvc") {
-        let mut cmd = cc::windows_registry::find("x86_64-pc-windows-msvc", "dumpbin.exe")
-            .expect("failed to find `dumpbin` tool");
-        let output = cmd.arg("/DISASM").arg(&me).output()
+    if cfg!(target_arch = "x86_64") && cfg!(target_os = "windows")
+        && cfg!(target_env = "msvc")
+    {
+        let mut cmd = cc::windows_registry::find(
+            "x86_64-pc-windows-msvc",
+            "dumpbin.exe",
+        ).expect("failed to find `dumpbin` tool");
+        let output = cmd.arg("/DISASM")
+            .arg(&me)
+            .output()
             .expect("failed to execute dumpbin");
-        println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr));
+        println!(
+            "{}\n{}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
         assert!(output.status.success());
         parse_dumpbin(&String::from_utf8_lossy(&output.stdout))
     } else if cfg!(target_os = "windows") {
@@ -55,7 +64,11 @@ fn disassemble_myself() -> HashMap<String, Vec<Function>> {
             .arg(&me)
             .output()
             .expect("failed to execute otool");
-        println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr));
+        println!(
+            "{}\n{}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
         assert!(output.status.success());
 
         parse_otool(&str::from_utf8(&output.stdout).expect("stdout not utf8"))
@@ -66,10 +79,16 @@ fn disassemble_myself() -> HashMap<String, Vec<Function>> {
             .arg(&me)
             .output()
             .expect("failed to execute objdump");
-        println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr));
+        println!(
+            "{}\n{}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
         assert!(output.status.success());
 
-        parse_objdump(&str::from_utf8(&output.stdout).expect("stdout not utf8"))
+        parse_objdump(
+            &str::from_utf8(&output.stdout).expect("stdout not utf8"),
+        )
     }
 }
 
@@ -91,7 +110,7 @@ fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {
     while let Some(header) = lines.next() {
         // symbols should start with `$hex_addr <$name>:`
         if !header.ends_with(">:") {
-            continue
+            continue;
         }
         let start = header.find("<").unwrap();
         let symbol = &header[start + 1..header.len() - 2];
@@ -99,15 +118,17 @@ fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {
         let mut instructions = Vec::new();
         while let Some(instruction) = lines.next() {
             if instruction.is_empty() {
-                break
+                break;
             }
             // Each line of instructions should look like:
             //
             //      $rel_offset: ab cd ef 00    $instruction...
-            let parts = instruction.split_whitespace()
+            let parts = instruction
+                .split_whitespace()
                 .skip(1)
                 .skip_while(|s| {
-                    s.len() == expected_len && usize::from_str_radix(s, 16).is_ok()
+                    s.len() == expected_len
+                        && usize::from_str_radix(s, 16).is_ok()
                 })
                 .map(|s| s.to_string())
                 .collect::<Vec<String>>();
@@ -116,10 +137,12 @@ fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {
 
         ret.entry(normalize(symbol))
             .or_insert(Vec::new())
-            .push(Function { instrs: instructions });
+            .push(Function {
+                instrs: instructions,
+            });
     }
 
-    return ret
+    return ret;
 }
 
 fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
@@ -138,7 +161,7 @@ fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
         };
         // symbols should start with `$symbol:`
         if !header.ends_with(":") {
-            continue
+            continue;
         }
         // strip the leading underscore and the trailing colon
         let symbol = &header[1..header.len() - 1];
@@ -147,12 +170,13 @@ fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
         while let Some(instruction) = lines.next() {
             if instruction.ends_with(":") {
                 cached_header = Some(instruction);
-                break
+                break;
             }
             // Each line of instructions should look like:
             //
             //      $addr    $instruction...
-            let parts = instruction.split_whitespace()
+            let parts = instruction
+                .split_whitespace()
                 .skip(1)
                 .map(|s| s.to_string())
                 .collect::<Vec<String>>();
@@ -161,10 +185,12 @@ fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
 
         ret.entry(normalize(symbol))
             .or_insert(Vec::new())
-            .push(Function { instrs: instructions });
+            .push(Function {
+                instrs: instructions,
+            });
     }
 
-    return ret
+    return ret;
 }
 
 fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
@@ -183,7 +209,7 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
         };
         // symbols should start with `$symbol:`
         if !header.ends_with(":") {
-            continue
+            continue;
         }
         // strip the trailing colon
         let symbol = &header[..header.len() - 1];
@@ -192,20 +218,21 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
         while let Some(instruction) = lines.next() {
             if !instruction.starts_with("  ") {
                 cached_header = Some(instruction);
-                break
+                break;
             }
             // Each line looks like:
             //
             // >  $addr: ab cd ef     $instr..
             // >         00 12          # this line os optional
             if instruction.starts_with("       ") {
-                continue
+                continue;
             }
-            let parts = instruction.split_whitespace()
+            let parts = instruction
+                .split_whitespace()
                 .skip(1)
-                .skip_while(|s| {
-                    s.len() == 2 && usize::from_str_radix(s, 16).is_ok()
-                })
+                .skip_while(
+                    |s| s.len() == 2 && usize::from_str_radix(s, 16).is_ok(),
+                )
                 .map(|s| s.to_string())
                 .collect::<Vec<String>>();
             instructions.push(Instruction { parts });
@@ -213,10 +240,12 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
 
         ret.entry(normalize(symbol))
             .or_insert(Vec::new())
-            .push(Function { instrs: instructions });
+            .push(Function {
+                instrs: instructions,
+            });
     }
 
-    return ret
+    return ret;
 }
 
 fn normalize(symbol: &str) -> String {
@@ -266,7 +295,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
             // instruction: tzcntl => tzcnt and compares that.
             if part.starts_with(expected) {
                 found = true;
-                break
+                break;
             }
         }
     }
@@ -274,7 +303,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
     let probably_only_one_instruction = function.instrs.len() < 30;
 
     if found && probably_only_one_instruction {
-        return
+        return;
     }
 
     // Help debug by printing out the found disassembly, and then panic as we
diff --git a/library/stdarch/tests/cpu-detection.rs b/library/stdarch/tests/cpu-detection.rs
index 1a49a8762b33..36cf7c97aa14 100644
--- a/library/stdarch/tests/cpu-detection.rs
+++ b/library/stdarch/tests/cpu-detection.rs
@@ -1,10 +1,10 @@
 #![cfg_attr(feature = "strict", deny(warnings))]
 #![feature(cfg_target_feature)]
 
+extern crate cupid;
 #[macro_use]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 extern crate stdsimd;
-extern crate cupid;
 
 #[test]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]