[ci] check formatting (#64)

* [ci] check formatting * [rustfmt] reformat the whole library
2017-10-27 17:55:29 +02:00 · 2017-10-27 17:55:29 +02:00 · 69d2ad85f3
commit 69d2ad85f3
parent 5869eca3e9
35 changed files with 2207 additions and 1405 deletions
--- a/library/stdarch/.travis.yml
+++ b/library/stdarch/.travis.yml
@ -17,9 +17,21 @@ matrix:
      script: ci/run.sh
    - install: true
      script: ci/dox.sh
+    - env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1
+      script: |
+        cargo install rustfmt-nightly
+        cargo fmt -- --write-mode=diff
+        cd stdsimd
+        cargo fmt -- --write-mode=diff
+        cd assert-instr-macro
+        cargo fmt -- --write-mode=diff
+        cd ../simd-test-macro
+        cargo fmt -- --write-mode=diff
+  allow_failures:
+    - env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1

 install:
-  - if [ "$NO_ADD" = "" ]; then rustup target add $TARGET; fi
+  - if [ "$NO_ADD" == "" ]; then rustup target add $TARGET; fi

 script:
  - cargo generate-lockfile
--- a/library/stdarch/examples/nbody.rs
+++ b/library/stdarch/examples/nbody.rs
@ -26,7 +26,8 @@ impl Frsqrt for f64x2 {

            let u = unsafe {
                vendor::_mm_rsqrt_ps(
-                    f32x4::new(t.extract(0), t.extract(1), 0., 0.)).as_f64x4()
+                    f32x4::new(t.extract(0), t.extract(1), 0., 0.),
+                ).as_f64x4()
            };
            f64x2::new(u.extract(0), u.extract(1))
        }
@ -36,11 +37,12 @@ impl Frsqrt for f64x2 {
            use self::stdsimd::vendor;
            unsafe { vendor::vrsqrte_f32(self.as_f32x2()).as_f64x2() }
        }
-        #[cfg(not(any(all(any(target_arch = "x86", target_arch = "x86_64"),
+        #[cfg(not(any(all(any(target_arch = "x86",
+                              target_arch = "x86_64"),
                          target_feature = "sse"),
-                      all(any(target_arch = "arm", target_arch = "aarch64"),
-                          target_feature = "neon")
-        )))]
+                      all(any(target_arch = "arm",
+                              target_arch = "aarch64"),
+                          target_feature = "neon"))))]
        {
            self.replace(0, 1. / self.extract(0).sqrt());
            self.replace(1, 1. / self.extract(1).sqrt());
@ -57,9 +59,9 @@ struct Body {
 }

 impl Body {
-    fn new(x0: f64, x1: f64, x2: f64,
-           v0: f64, v1: f64, v2: f64,
-           mass: f64) -> Body {
+    fn new(
+        x0: f64, x1: f64, x2: f64, v0: f64, v1: f64, v2: f64, mass: f64
+    ) -> Body {
        Body {
            x: [x0, x1, x2],
            _fill: 0.0,
@ -91,7 +93,7 @@ fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {

    let mut i = 0;
    for j in 0..N_BODIES {
-        for k in j+1..N_BODIES {
+        for k in j + 1..N_BODIES {
            for m in 0..3 {
                r[i][m] = bodies[j].x[m] - bodies[k].x[m];
            }
@ -102,14 +104,15 @@ fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
    i = 0;
    while i < N {
        for m in 0..3 {
-            dx[m] = f64x2::new(r[i][m], r[i+1][m]);
+            dx[m] = f64x2::new(r[i][m], r[i + 1][m]);
        }

        dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
        distance = dsquared.frsqrt();
        for _ in 0..2 {
-            distance = distance * f64x2::splat(1.5) -
-                ((f64x2::splat(0.5) * dsquared) * distance) * (distance * distance)
+            distance = distance * f64x2::splat(1.5)
+                - ((f64x2::splat(0.5) * dsquared) * distance)
+                    * (distance * distance)
        }
        dmag = f64x2::splat(dt) / dsquared * distance;
        dmag.store(&mut mag, i);
@ -119,7 +122,7 @@ fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {

    i = 0;
    for j in 0..N_BODIES {
-        for k in j+1..N_BODIES {
+        for k in j + 1..N_BODIES {
            for m in 0..3 {
                bodies[j].v[m] -= r[i][m] * bodies[k].mass * mag[i];
                bodies[k].v[m] += r[i][m] * bodies[j].mass * mag[i];
@ -138,15 +141,19 @@ fn energy(bodies: &[Body; N_BODIES]) -> f64 {
    let mut e = 0.0;
    for i in 0..N_BODIES {
        let bi = &bodies[i];
-        e += bi.mass * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2]) / 2.0;
-        for j in i+1..N_BODIES {
+        e += bi.mass
+            * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2])
+            / 2.0;
+        for j in i + 1..N_BODIES {
            let bj = &bodies[j];
            let mut dx = [0.0; 3];
            for k in 0..3 {
                dx[k] = bi.x[k] - bj.x[k];
            }
            let mut distance = 0.0;
-            for &d in &dx { distance += d * d }
+            for &d in &dx {
+                distance += d * d
+            }
            e -= bi.mass * bj.mass / distance.sqrt()
        }
    }
@ -156,48 +163,54 @@ fn energy(bodies: &[Body; N_BODIES]) -> f64 {
 fn main() {
    let mut bodies: [Body; N_BODIES] = [
        /* sun */
-        Body::new(0.0, 0.0, 0.0,
-                  0.0, 0.0, 0.0,
-                  SOLAR_MASS),
+        Body::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SOLAR_MASS),
        /* jupiter */
-        Body::new(4.84143144246472090e+00,
-                  -1.16032004402742839e+00,
-                  -1.03622044471123109e-01 ,
-                  1.66007664274403694e-03 * DAYS_PER_YEAR,
-                  7.69901118419740425e-03 * DAYS_PER_YEAR,
-                  -6.90460016972063023e-05 * DAYS_PER_YEAR ,
-                  9.54791938424326609e-04 * SOLAR_MASS
-                  ),
+        Body::new(
+            4.84143144246472090e+00,
+            -1.16032004402742839e+00,
+            -1.03622044471123109e-01,
+            1.66007664274403694e-03 * DAYS_PER_YEAR,
+            7.69901118419740425e-03 * DAYS_PER_YEAR,
+            -6.90460016972063023e-05 * DAYS_PER_YEAR,
+            9.54791938424326609e-04 * SOLAR_MASS,
+        ),
        /* saturn */
-        Body::new(8.34336671824457987e+00,
-                  4.12479856412430479e+00,
-                  -4.03523417114321381e-01 ,
-                  -2.76742510726862411e-03 * DAYS_PER_YEAR,
-                  4.99852801234917238e-03 * DAYS_PER_YEAR,
-                  2.30417297573763929e-05 * DAYS_PER_YEAR ,
-                  2.85885980666130812e-04 * SOLAR_MASS
-                  ),
+        Body::new(
+            8.34336671824457987e+00,
+            4.12479856412430479e+00,
+            -4.03523417114321381e-01,
+            -2.76742510726862411e-03 * DAYS_PER_YEAR,
+            4.99852801234917238e-03 * DAYS_PER_YEAR,
+            2.30417297573763929e-05 * DAYS_PER_YEAR,
+            2.85885980666130812e-04 * SOLAR_MASS,
+        ),
        /* uranus */
-        Body::new(1.28943695621391310e+01,
-                  -1.51111514016986312e+01,
-                  -2.23307578892655734e-01 ,
-                  2.96460137564761618e-03 * DAYS_PER_YEAR,
-                  2.37847173959480950e-03 * DAYS_PER_YEAR,
-                  -2.96589568540237556e-05 * DAYS_PER_YEAR ,
-                  4.36624404335156298e-05 * SOLAR_MASS
-                  ),
+        Body::new(
+            1.28943695621391310e+01,
+            -1.51111514016986312e+01,
+            -2.23307578892655734e-01,
+            2.96460137564761618e-03 * DAYS_PER_YEAR,
+            2.37847173959480950e-03 * DAYS_PER_YEAR,
+            -2.96589568540237556e-05 * DAYS_PER_YEAR,
+            4.36624404335156298e-05 * SOLAR_MASS,
+        ),
        /* neptune */
-        Body::new(1.53796971148509165e+01,
-                  -2.59193146099879641e+01,
-                  1.79258772950371181e-01 ,
-                  2.68067772490389322e-03 * DAYS_PER_YEAR,
-                  1.62824170038242295e-03 * DAYS_PER_YEAR,
-                  -9.51592254519715870e-05 * DAYS_PER_YEAR ,
-                  5.15138902046611451e-05 * SOLAR_MASS
-                  )
-            ];
+        Body::new(
+            1.53796971148509165e+01,
+            -2.59193146099879641e+01,
+            1.79258772950371181e-01,
+            2.68067772490389322e-03 * DAYS_PER_YEAR,
+            1.62824170038242295e-03 * DAYS_PER_YEAR,
+            -9.51592254519715870e-05 * DAYS_PER_YEAR,
+            5.15138902046611451e-05 * SOLAR_MASS,
+        ),
+    ];

-    let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap();
+    let n: usize = std::env::args()
+        .nth(1)
+        .expect("need one arg")
+        .parse()
+        .unwrap();

    offset_momentum(&mut bodies);
    println!("{:.9}", energy(&bodies));
--- a/library/stdarch/examples/play.rs
+++ b/library/stdarch/examples/play.rs
@ -27,8 +27,12 @@ mod example {

        unsafe {
            vendor::_mm_cmpestri(
-                vneedle, needle_len as i32, vhaystack, hay_len as i32,
-                vendor::_SIDD_CMP_EQUAL_ORDERED) as usize
+                vneedle,
+                needle_len as i32,
+                vhaystack,
+                hay_len as i32,
+                vendor::_SIDD_CMP_EQUAL_ORDERED,
+            ) as usize
        }
    }

--- a/library/stdarch/rustfmt.toml
+++ b/library/stdarch/rustfmt.toml
@ -0,0 +1,5 @@
+max_width = 79
+fn_call_width = 79
+wrap_comments = true
+error_on_line_overflow = false
+fn_args_density = "Compressed"
--- a/library/stdarch/src/arm/mod.rs
+++ b/library/stdarch/src/arm/mod.rs
@ -3,7 +3,9 @@
 //! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
 //! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
 //!
-//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
+//! [arm_ref]:
+//! http://infocenter.arm.com/help/topic/com.arm.doc.
+//! ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
 //! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics

 pub use self::v6::*;
--- a/library/stdarch/src/arm/v6.rs
+++ b/library/stdarch/src/arm/v6.rs
@ -1,7 +1,10 @@
 //! ARMv6 intrinsics.
 //!
-//! The reference is [ARMv6-M Architecture Reference
-//! Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.html).
+//! The reference is [ARMv6-M Architecture Reference Manual][armv6m].
+//!
+//! [armv6m]:
+//! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.
+//! html

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@ -27,16 +30,20 @@ mod tests {
    #[test]
    fn _rev_u16() {
        unsafe {
-            assert_eq!(v6::_rev_u16(0b0000_0000_1111_1111_u16), 0b1111_1111_0000_0000_u16);
+            assert_eq!(
+                v6::_rev_u16(0b0000_0000_1111_1111_u16),
+                0b1111_1111_0000_0000_u16
+            );
        }
    }

    #[test]
    fn _rev_u32() {
        unsafe {
-            assert_eq!(v6::_rev_u32(
-                0b0000_0000_1111_1111_0000_0000_1111_1111_u32
-            ), 0b1111_1111_0000_0000_1111_1111_0000_0000_u32);
+            assert_eq!(
+                v6::_rev_u32(0b0000_0000_1111_1111_0000_0000_1111_1111_u32),
+                0b1111_1111_0000_0000_1111_1111_0000_0000_u32
+            );
        }
    }
 }
--- a/library/stdarch/src/arm/v7.rs
+++ b/library/stdarch/src/arm/v7.rs
@ -1,7 +1,11 @@
 //! ARMv7 intrinsics.
 //!
 //! The reference is [ARMv7-M Architecture Reference Manual (Issue
-//! E.b)](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.b/index.html).
+//! E.b)][armv7m].
+//!
+//! [armv7m]:
+//! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.
+//! b/index.html

 pub use super::v6::*;

@ -39,7 +43,7 @@ pub unsafe fn _rbit_u32(x: u32) -> u32 {

 #[allow(dead_code)]
 extern "C" {
-    #[link_name="llvm.bitreverse.i32"]
+    #[link_name = "llvm.bitreverse.i32"]
    fn rbit_u32(i: i32) -> i32;
 }

@ -72,8 +76,10 @@ mod tests {
    #[test]
    fn _rbit_u32() {
        unsafe {
-            assert_eq!(v7::_rbit_u32(0b0000_1010u32),
-                       0b0101_0000_0000_0000_0000_0000_0000_0000u32);
+            assert_eq!(
+                v7::_rbit_u32(0b0000_1010u32),
+                0b0101_0000_0000_0000_0000_0000_0000_0000u32
+            );
        }
    }
 }
--- a/library/stdarch/src/arm/v7_neon.rs
+++ b/library/stdarch/src/arm/v7_neon.rs
@ -5,10 +5,8 @@ use stdsimd_test::assert_instr;

 use simd_llvm::simd_add;

-use v64::{i8x8, i16x4, i32x2,
-          u8x8, u16x4, u32x2, f32x2};
-use v128::{i8x16, i16x8, i32x4, i64x2,
-           u8x16, u16x8, u32x4, u64x2, f32x4};
+use v64::{f32x2, i16x4, i32x2, i8x8, u16x4, u32x2, u8x8};
+use v128::{f32x4, i16x8, i32x4, i64x2, i8x16, u16x8, u32x4, u64x2, u8x16};

 /// Vector add.
 #[inline(always)]
@ -230,18 +228,9 @@ mod tests {

    #[test]
    fn vaddq_s8_() {
-        let a = i8x16::new(
-            1, 2, 3, 4, 5, 6, 7, 8,
-            1, 2, 3, 4, 5, 6, 7, 8,
-        );
-        let b = i8x16::new(
-            8, 7, 6, 5, 4, 3, 2, 1,
-            8, 7, 6, 5, 4, 3, 2, 1,
-        );
-        let e = i8x16::new(
-            9, 9, 9, 9, 9, 9, 9, 9,
-            9, 9, 9, 9, 9, 9, 9, 9,
-        );
+        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let b = i8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
+        let e = i8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
        let r = unsafe { vaddq_s8(a, b) };
        assert_eq!(r, e);
    }
@ -293,18 +282,9 @@ mod tests {

    #[test]
    fn vaddq_u8_() {
-        let a = u8x16::new(
-            1, 2, 3, 4, 5, 6, 7, 8,
-            1, 2, 3, 4, 5, 6, 7, 8,
-        );
-        let b = u8x16::new(
-            8, 7, 6, 5, 4, 3, 2, 1,
-            8, 7, 6, 5, 4, 3, 2, 1,
-        );
-        let e = u8x16::new(
-            9, 9, 9, 9, 9, 9, 9, 9,
-            9, 9, 9, 9, 9, 9, 9, 9,
-        );
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let b = u8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
+        let e = u8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
        let r = unsafe { vaddq_u8(a, b) };
        assert_eq!(r, e);
    }
@ -366,15 +346,9 @@ mod tests {
    #[test]
    fn vaddl_s8_() {
        let v = ::std::i8::MAX;
-        let a = i8x8::new(
-            v, v, v, v,
-            v, v, v, v,
-        );
+        let a = i8x8::new(v, v, v, v, v, v, v, v);
        let v = 2 * (v as i16);
-        let e = i16x8::new(
-            v, v, v, v,
-            v, v, v, v,
-        );
+        let e = i16x8::new(v, v, v, v, v, v, v, v);
        let r = unsafe { vaddl_s8(a, a) };
        assert_eq!(r, e);
    }
@ -382,13 +356,9 @@ mod tests {
    #[test]
    fn vaddl_s16_() {
        let v = ::std::i16::MAX;
-        let a = i16x4::new(
-            v, v, v, v,
-        );
+        let a = i16x4::new(v, v, v, v);
        let v = 2 * (v as i32);
-        let e = i32x4::new(
-            v, v, v, v,
-        );
+        let e = i32x4::new(v, v, v, v);
        let r = unsafe { vaddl_s16(a, a) };
        assert_eq!(r, e);
    }
@ -396,13 +366,9 @@ mod tests {
    #[test]
    fn vaddl_s32_() {
        let v = ::std::i32::MAX;
-        let a = i32x2::new(
-            v, v,
-        );
+        let a = i32x2::new(v, v);
        let v = 2 * (v as i64);
-        let e = i64x2::new(
-            v, v,
-        );
+        let e = i64x2::new(v, v);
        let r = unsafe { vaddl_s32(a, a) };
        assert_eq!(r, e);
    }
@ -410,15 +376,9 @@ mod tests {
    #[test]
    fn vaddl_u8_() {
        let v = ::std::u8::MAX;
-        let a = u8x8::new(
-            v, v, v, v,
-            v, v, v, v,
-        );
+        let a = u8x8::new(v, v, v, v, v, v, v, v);
        let v = 2 * (v as u16);
-        let e = u16x8::new(
-            v, v, v, v,
-            v, v, v, v,
-        );
+        let e = u16x8::new(v, v, v, v, v, v, v, v);
        let r = unsafe { vaddl_u8(a, a) };
        assert_eq!(r, e);
    }
@ -426,13 +386,9 @@ mod tests {
    #[test]
    fn vaddl_u16_() {
        let v = ::std::u16::MAX;
-        let a = u16x4::new(
-            v, v, v, v,
-        );
+        let a = u16x4::new(v, v, v, v);
        let v = 2 * (v as u32);
-        let e = u32x4::new(
-            v, v, v, v,
-        );
+        let e = u32x4::new(v, v, v, v);
        let r = unsafe { vaddl_u16(a, a) };
        assert_eq!(r, e);
    }
@ -440,13 +396,9 @@ mod tests {
    #[test]
    fn vaddl_u32_() {
        let v = ::std::u32::MAX;
-        let a = u32x2::new(
-            v, v,
-        );
+        let a = u32x2::new(v, v);
        let v = 2 * (v as u64);
-        let e = u64x2::new(
-            v, v,
-        );
+        let e = u64x2::new(v, v);
        let r = unsafe { vaddl_u32(a, a) };
        assert_eq!(r, e);
    }
--- a/library/stdarch/src/arm/v8.rs
+++ b/library/stdarch/src/arm/v8.rs
@ -1,6 +1,9 @@
 //! ARMv8 intrinsics.
 //!
-//! The reference is [ARMv8-A Reference Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.k_10775/index.html).
+//! The reference is [ARMv8-A Reference Manual][armv8].
+//!
+//! [armv8]: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.
+//! ddi0487a.k_10775/index.html

 pub use super::v7::*;

@ -23,7 +26,7 @@ pub unsafe fn _clz_u64(x: u64) -> u64 {

 #[allow(dead_code)]
 extern "C" {
-    #[link_name="llvm.bitreverse.i64"]
+    #[link_name = "llvm.bitreverse.i64"]
    fn rbit_u64(i: i64) -> i64;
 }

@ -61,9 +64,10 @@ mod tests {
    #[test]
    fn _rev_u64() {
        unsafe {
-            assert_eq!(v8::_rev_u64(
-                0b0000_0000_1111_1111_0000_0000_1111_1111_u64
-            ), 0b1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64);
+            assert_eq!(
+                v8::_rev_u64(0b0000_0000_1111_1111_0000_0000_1111_1111_u64),
+                0b1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
+            );
        }
    }

@ -77,27 +81,32 @@ mod tests {
    #[test]
    fn _rbit_u64() {
        unsafe {
-            assert_eq!(v8::_rbit_u64(
-                0b0000_0000_1111_1101_0000_0000_1111_1111_u64
-            ), 0b1111_1111_0000_0000_1011_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64);
+            assert_eq!(
+                v8::_rbit_u64(0b0000_0000_1111_1101_0000_0000_1111_1111_u64),
+                0b1111_1111_0000_0000_1011_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
+            );
        }
    }

    #[test]
    fn _cls_u32() {
        unsafe {
-            assert_eq!(v8::_cls_u32(
-                0b1111_1111_1111_1111_0000_0000_1111_1111_u32
-            ), 15_u32);
+            assert_eq!(
+                v8::_cls_u32(0b1111_1111_1111_1111_0000_0000_1111_1111_u32),
+                15_u32
+            );
        }
    }

    #[test]
    fn _cls_u64() {
        unsafe {
-            assert_eq!(v8::_cls_u64(
-                0b1111_1111_1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_u64
-            ), 15_u64);
+            assert_eq!(
+                v8::_cls_u64(
+                    0b1111_1111_1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_u64
+                ),
+                15_u64
+            );
        }
    }
 }
--- a/library/stdarch/src/lib.rs
+++ b/library/stdarch/src/lib.rs
@ -44,9 +44,9 @@
 //! have no runtime support for whether you CPU actually supports the
 //! instruction.
 //!
-//! CPU target feature detection is done via the `cfg_feature_enabled!` macro at
-//! runtime. This macro will detect at runtime whether the specified feature is
-//! available or not, returning true or false depending on the current CPU.
+//! CPU target feature detection is done via the `cfg_feature_enabled!` macro
+//! at runtime. This macro will detect at runtime whether the specified feature
+//! is available or not, returning true or false depending on the current CPU.
 //!
 //! ```
 //! #![feature(cfg_target_feature)]
@ -58,7 +58,8 @@
 //!     if cfg_feature_enabled!("avx2") {
 //!         println!("avx2 intrinsics will work");
 //!     } else {
-//!         println!("avx2 intrinsics will not work, they may generate SIGILL");
+//!         println!("avx2 intrinsics will not work");
+//!         // undefined behavior: may generate a `SIGILL`.
 //!     }
 //! }
 //! ```
@ -93,29 +94,33 @@
 //!
 //! # Status
 //!
-//! This crate is intended for eventual inclusion into the standard library, but
-//! some work and experimentation is needed to get there! First and foremost you
-//! can help out by kicking the tires on this crate and seeing if it works for
-//! your use case! Next up you can help us fill out the [vendor
-//! intrinsics][vendor] to ensure that we've got all the SIMD support necessary.
+//! This crate is intended for eventual inclusion into the standard library,
+//! but some work and experimentation is needed to get there! First and
+//! foremost you can help out by kicking the tires on this crate and seeing if
+//! it works for your use case! Next up you can help us fill out the [vendor
+//! intrinsics][vendor] to ensure that we've got all the SIMD support
+//! necessary.
 //!
-//! The language support and status of SIMD is also still a little up in the air
-//! right now, you may be interested in a few issues along these lines:
+//! The language support and status of SIMD is also still a little up in the
+//! air right now, you may be interested in a few issues along these lines:
 //!
-//! * [Overal tracking issue for SIMD support](https://github.com/rust-lang/rust/issues/27731)
-//! * [`cfg_target_feature` tracking issue](https://github.com/rust-lang/rust/issues/29717)
-//! * [SIMD types currently not sound](https://github.com/rust-lang/rust/issues/44367)
-//! * [`#[target_feature]` improvements](https://github.com/rust-lang/rust/issues/44839)
+//! * [Overal tracking issue for SIMD support]
+//!   (https://github.com/rust-lang/rust/issues/27731)
+//! * [`cfg_target_feature` tracking issue]
+//!   (https://github.com/rust-lang/rust/issues/29717)
+//! * [SIMD types currently not sound]
+//!   (https://github.com/rust-lang/rust/issues/44367)
+//! * [`#[target_feature]` improvements]
+//!   (https://github.com/rust-lang/rust/issues/44839)
 //!
 //! [vendor]: https://github.com/rust-lang-nursery/stdsimd/issues/40

 #![cfg_attr(feature = "strict", deny(warnings))]
 #![allow(dead_code)]
 #![allow(unused_features)]
-#![feature(
-    const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi,
-    target_feature, cfg_target_feature, i128_type, asm, const_atomic_usize_new
-)]
+#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
+           simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
+           const_atomic_usize_new, stmt_expr_attributes)]
 #![cfg_attr(test, feature(proc_macro, test))]

 #[cfg(test)]
--- a/library/stdarch/src/macros.rs
+++ b/library/stdarch/src/macros.rs
@ -240,9 +240,11 @@ macro_rules! define_integer_ops {
                i8, i16, i32, i64, isize);

            impl ::std::fmt::LowerHex for $ty {
-                fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
+                fn fmt(&self, f: &mut ::std::fmt::Formatter)
+                       -> ::std::fmt::Result {
                    write!(f, "{}(", stringify!($ty))?;
-                    let n = ::std::mem::size_of_val(self) / ::std::mem::size_of::<$elem>();
+                    let n = ::std::mem::size_of_val(self)
+                        / ::std::mem::size_of::<$elem>();
                    for i in 0..n {
                        if i > 0 {
                            write!(f, ", ")?;
@ -292,8 +294,7 @@ macro_rules! cfg_feature_enabled {
 /// On ARM features are only detected at compile-time using
 /// cfg(target_feature), so if this macro is executed the
 /// feature is not supported.
-#[cfg(any(target_arch = "arm",
-          target_arch = "aarch64"))]
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
 #[macro_export]
 #[doc(hidden)]
 macro_rules! __unstable_detect_feature {
@ -302,10 +303,8 @@ macro_rules! __unstable_detect_feature {
 }

 /// In all unsupported architectures using the macro is an error
-#[cfg(not(any(target_arch = "x86",
-              target_arch = "x86_64",
-              target_arch = "arm",
-              target_arch = "aarch64")))]
+#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
+              target_arch = "arm", target_arch = "aarch64")))]
 #[macro_export]
 #[doc(hidden)]
 macro_rules! __unstable_detect_feature {
--- a/library/stdarch/src/v128.rs
+++ b/library/stdarch/src/v128.rs
@ -50,7 +50,17 @@ define_from!(u8x16, u64x2, i64x2, u32x4, i32x4, u16x8, i16x8, i8x16);
 define_from!(i8x16, u64x2, i64x2, u32x4, i32x4, u16x8, i16x8, u8x16);

 define_common_ops!(
-    f64x2, f32x4, u64x2, i64x2, u32x4, i32x4, u16x8, i16x8, u8x16, i8x16);
+    f64x2,
+    f32x4,
+    u64x2,
+    i64x2,
+    u32x4,
+    i32x4,
+    u16x8,
+    i16x8,
+    u8x16,
+    i8x16
+);
 define_float_ops!(f64x2, f32x4);
 define_integer_ops!(
    (u64x2, u64),
@ -60,7 +70,8 @@ define_integer_ops!(
    (u16x8, u16),
    (i16x8, i16),
    (u8x16, u8),
-    (i8x16, i8));
+    (i8x16, i8)
+);
 define_casts!(
    (f64x2, f32x2, as_f32x2),
    (f64x2, u64x2, as_u64x2),
@ -79,4 +90,5 @@ define_casts!(
    (u16x8, i16x8, as_i16x8),
    (i16x8, u16x8, as_u16x8),
    (u8x16, i8x16, as_i8x16),
-    (i8x16, u8x16, as_u8x16));
+    (i8x16, u8x16, as_u8x16)
+);
--- a/library/stdarch/src/v256.rs
+++ b/library/stdarch/src/v256.rs
@ -74,7 +74,17 @@ define_from!(u8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, i8x32);
 define_from!(i8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32);

 define_common_ops!(
-    f64x4, f32x8, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32, i8x32);
+    f64x4,
+    f32x8,
+    u64x4,
+    i64x4,
+    u32x8,
+    i32x8,
+    u16x16,
+    i16x16,
+    u8x32,
+    i8x32
+);
 define_float_ops!(f64x4, f32x8);
 define_integer_ops!(
    (u64x4, u64),
@ -84,7 +94,8 @@ define_integer_ops!(
    (u16x16, u16),
    (i16x16, i16),
    (u8x32, u8),
-    (i8x32, i8));
+    (i8x32, i8)
+);
 define_casts!(
    (f64x4, f32x4, as_f32x4),
    (f64x4, u64x4, as_u64x4),
@ -102,4 +113,5 @@ define_casts!(
    (u16x16, i16x16, as_i16x16),
    (i16x16, u16x16, as_u16x16),
    (u8x32, i8x32, as_i8x32),
-    (i8x32, u8x32, as_u8x32));
+    (i8x32, u8x32, as_u8x32)
+);
--- a/library/stdarch/src/v512.rs
+++ b/library/stdarch/src/v512.rs
@ -120,7 +120,17 @@ define_from!(u8x64, u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, i8x64);
 define_from!(i8x64, u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, u8x64);

 define_common_ops!(
-    f64x8, f32x16, u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, u8x64, i8x64);
+    f64x8,
+    f32x16,
+    u64x8,
+    i64x8,
+    u32x16,
+    i32x16,
+    u16x32,
+    i16x32,
+    u8x64,
+    i8x64
+);
 define_float_ops!(f64x8, f32x16);
 define_integer_ops!(
    (u64x8, u64),
@ -130,7 +140,8 @@ define_integer_ops!(
    (u16x32, u16),
    (i16x32, i16),
    (u8x64, u8),
-    (i8x64, i8));
+    (i8x64, i8)
+);
 define_casts!(
    (f64x8, f32x8, as_f32x8),
    (f64x8, u64x8, as_u64x8),
@ -148,5 +159,5 @@ define_casts!(
    (u16x32, i16x32, as_i16x32),
    (i16x32, u16x32, as_u16x32),
    (u8x64, i8x64, as_i8x64),
-    (i8x64, u8x64, as_u8x64));
-
+    (i8x64, u8x64, as_u8x64)
+);
--- a/library/stdarch/src/v64.rs
+++ b/library/stdarch/src/v64.rs
@ -42,7 +42,8 @@ define_integer_ops!(
    (u16x4, u16),
    (i16x4, i16),
    (u8x8, u8),
-    (i8x8, i8));
+    (i8x8, i8)
+);
 define_casts!(
    (f32x2, f64x2, as_f64x2),
    (f32x2, u32x2, as_u32x2),
@ -61,5 +62,4 @@ define_casts!(
    (u8x8, u16x8, as_u16x8),
    (u16x4, u32x4, as_u32x4),
    (u32x2, u64x2, as_u64x2)
-
 );
--- a/library/stdarch/src/x86/abm.rs
+++ b/library/stdarch/src/x86/abm.rs
@ -4,11 +4,19 @@
 //!
 //! The references are:
 //!
-//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
-//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+//! Instruction Set Reference, A-Z][intel64_ref].
+//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+//! System Instructions][amd64_ref].
 //!
-//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29)
-//! provides a quick overview of the instructions available.
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.
+//! 28Advanced_Bit_Manipulation.29

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@ -19,7 +27,9 @@ use stdsimd_test::assert_instr;
 #[inline(always)]
 #[target_feature = "+lzcnt"]
 #[cfg_attr(test, assert_instr(lzcnt))]
-pub unsafe fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
+pub unsafe fn _lzcnt_u32(x: u32) -> u32 {
+    x.leading_zeros()
+}

 /// Counts the leading most significant zero bits.
 ///
@ -27,19 +37,25 @@ pub unsafe fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() }
 #[inline(always)]
 #[target_feature = "+lzcnt"]
 #[cfg_attr(test, assert_instr(lzcnt))]
-pub unsafe fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 }
+pub unsafe fn _lzcnt_u64(x: u64) -> u64 {
+    x.leading_zeros() as u64
+}

 /// Counts the bits that are set.
 #[inline(always)]
 #[target_feature = "+popcnt"]
 #[cfg_attr(test, assert_instr(popcnt))]
-pub unsafe fn _popcnt32(x: u32) -> u32 { x.count_ones() }
+pub unsafe fn _popcnt32(x: u32) -> u32 {
+    x.count_ones()
+}

 /// Counts the bits that are set.
 #[inline(always)]
 #[target_feature = "+popcnt"]
 #[cfg_attr(test, assert_instr(popcnt))]
-pub unsafe fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 }
+pub unsafe fn _popcnt64(x: u64) -> u64 {
+    x.count_ones() as u64
+}

 #[cfg(test)]
 mod tests {
--- a/library/stdarch/src/x86/avx.rs
+++ b/library/stdarch/src/x86/avx.rs
@ -18,7 +18,8 @@ pub unsafe fn _mm256_add_pd(a: f64x4, b: f64x4) -> f64x4 {
    a + b
 }

-/// Add packed single-precision (32-bit) floating-point elements in `a` and `b`.
+/// Add packed single-precision (32-bit) floating-point elements in `a` and
+/// `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vaddps))]
@ -26,7 +27,8 @@ pub unsafe fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 {
    a + b
 }

-/// Compute the bitwise AND of a packed double-precision (64-bit) floating-point elements
+/// Compute the bitwise AND of a packed double-precision (64-bit)
+/// floating-point elements
 /// in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
@ -39,7 +41,8 @@ pub unsafe fn _mm256_and_pd(a: f64x4, b: f64x4) -> f64x4 {
    mem::transmute(a & b)
 }

-/// Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in `a` and `b`.
+/// Compute the bitwise AND of packed single-precision (32-bit) floating-point
+/// elements in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vandps))]
@ -49,7 +52,8 @@ pub unsafe fn _mm256_and_ps(a: f32x8, b: f32x8) -> f32x8 {
    mem::transmute(a & b)
 }

-/// Compute the bitwise OR packed double-precision (64-bit) floating-point elements
+/// Compute the bitwise OR packed double-precision (64-bit) floating-point
+/// elements
 /// in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
@ -62,7 +66,8 @@ pub unsafe fn _mm256_or_pd(a: f64x4, b: f64x4) -> f64x4 {
    mem::transmute(a | b)
 }

-/// Compute the bitwise OR packed single-precision (32-bit) floating-point elements in `a` and `b`.
+/// Compute the bitwise OR packed single-precision (32-bit) floating-point
+/// elements in `a` and `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vorps))]
@ -114,7 +119,8 @@ pub unsafe fn _mm256_shuffle_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
    }
 }

-/// Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in `a`
+/// Compute the bitwise NOT of packed double-precision (64-bit) floating-point
+/// elements in `a`
 /// and then AND with `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
@ -126,7 +132,8 @@ pub unsafe fn _mm256_andnot_pd(a: f64x4, b: f64x4) -> f64x4 {
    mem::transmute((!a) & b)
 }

-/// Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in `a`
+/// Compute the bitwise NOT of packed single-precision (32-bit) floating-point
+/// elements in `a`
 /// and then AND with `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
@ -146,8 +153,8 @@ pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 {
    maxpd256(a, b)
 }

-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
-/// and return packed maximum values
+/// Compare packed single-precision (32-bit) floating-point elements in `a`
+/// and `b`, and return packed maximum values
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmaxps))]
@ -164,8 +171,8 @@ pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 {
    minpd256(a, b)
 }

-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
-/// and return packed minimum values
+/// Compare packed single-precision (32-bit) floating-point elements in `a`
+/// and `b`, and return packed minimum values
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vminps))]
@ -182,7 +189,8 @@ pub unsafe fn _mm256_mul_pd(a: f64x4, b: f64x4) -> f64x4 {
    a * b
 }

-/// Add packed single-precision (32-bit) floating-point elements in `a` and `b`.
+/// Add packed single-precision (32-bit) floating-point elements in `a` and
+/// `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmulps))]
@ -266,8 +274,8 @@ pub unsafe fn _mm256_round_pd(a: f64x4, b: i32) -> f64x4 {
    constify_imm8!(b, call)
 }

-/// Round packed double-precision (64-bit) floating point elements in `a` toward
-/// positive infinity.
+/// Round packed double-precision (64-bit) floating point elements in `a`
+/// toward positive infinity.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundpd))]
@ -275,8 +283,8 @@ pub unsafe fn _mm256_ceil_pd(a: f64x4) -> f64x4 {
    roundpd256(a, 0x02)
 }

-/// Round packed double-precision (64-bit) floating point elements in `a` toward
-/// negative infinity.
+/// Round packed double-precision (64-bit) floating point elements in `a`
+/// toward negative infinity.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundpd))]
@ -292,9 +300,9 @@ pub unsafe fn _mm256_floor_pd(a: f64x4) -> f64x4 {
 /// - `0x02`: Round up, toward positive infinity.
 /// - `0x03`: Truncate the values.
 ///
-/// For a complete list of options, check the LLVM docs:
+/// For a complete list of options, check [the LLVM docs][llvm_docs].
 ///
-/// https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
+/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundps, b = 0x00))]
@ -307,8 +315,8 @@ pub unsafe fn _mm256_round_ps(a: f32x8, b: i32) -> f32x8 {
    constify_imm8!(b, call)
 }

-/// Round packed single-precision (32-bit) floating point elements in `a` toward
-/// positive infinity.
+/// Round packed single-precision (32-bit) floating point elements in `a`
+/// toward positive infinity.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundps))]
@ -316,8 +324,8 @@ pub unsafe fn _mm256_ceil_ps(a: f32x8) -> f32x8 {
    roundps256(a, 0x02)
 }

-/// Round packed single-precision (32-bit) floating point elements in `a` toward
-/// negative infinity.
+/// Round packed single-precision (32-bit) floating point elements in `a`
+/// toward negative infinity.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vroundps))]
@ -606,7 +614,8 @@ pub unsafe fn _mm256_cmp_ps(a: f32x8, b: f32x8, imm8: u8) -> f32x8 {
 /// Compare the lower double-precision (64-bit) floating-point element in
 /// `a` and `b` based on the comparison operand specified by `imm8`,
 /// store the result in the lower element of returned vector,
-/// and copy the upper element from `a` to the upper element of returned vector.
+/// and copy the upper element from `a` to the upper element of returned
+/// vector.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
 #[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd
@ -811,7 +820,9 @@ pub unsafe fn _mm_permutevar_ps(a: f32x4, b: i32x4) -> f32x4 {
 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
 pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
    let imm8 = (imm8 & 0xFF) as u8;
-    const fn add4(x: u32) -> u32 { x + 4 }
+    const fn add4(x: u32) -> u32 {
+        x + 4
+    }
    macro_rules! shuffle4 {
        ($a:expr, $b:expr, $c:expr, $d:expr) => {
            simd_shuffle8(a, _mm256_undefined_ps(), [
@ -857,7 +868,7 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
    }
 }

-/// Shuffle single-precision (32-bit) floating-point elements in `a` 
+/// Shuffle single-precision (32-bit) floating-point elements in `a`
 /// using the control in `imm8`.
 #[inline(always)]
 #[target_feature = "+avx,+sse"]
@ -1026,7 +1037,9 @@ pub unsafe fn _mm256_permute2f128_pd(a: f64x4, b: f64x4, imm8: i8) -> f64x4 {
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))]
-pub unsafe fn _mm256_permute2f128_si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8 {
+pub unsafe fn _mm256_permute2f128_si256(
+    a: i32x8, b: i32x8, imm8: i8
+) -> i32x8 {
    macro_rules! call {
        ($imm8:expr) => { vperm2f128si256(a, b, $imm8) }
    }
@ -1110,7 +1123,9 @@ pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: f64x2, imm8: i32) -> f64x4 {
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
-pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m256i {
+pub unsafe fn _mm256_insertf128_si256(
+    a: __m256i, b: __m128i, imm8: i32
+) -> __m256i {
    let b = i64x4::from(_mm256_castsi128_si256(b));
    let dst: i64x4 = match imm8 & 1 {
        0 => simd_shuffle4(i64x4::from(a), b, [4, 5, 2, 3]),
@ -1166,7 +1181,8 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> f64x4 {
    ptr::copy_nonoverlapping(
        mem_addr as *const u8,
        &mut dst as *mut f64x4 as *mut u8,
-        mem::size_of::<f64x4>());
+        mem::size_of::<f64x4>(),
+    );
    dst
 }

@ -1191,7 +1207,8 @@ pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> f32x8 {
    ptr::copy_nonoverlapping(
        mem_addr as *const u8,
        &mut dst as *mut f32x8 as *mut u8,
-        mem::size_of::<f32x8>());
+        mem::size_of::<f32x8>(),
+    );
    dst
 }

@ -1215,12 +1232,13 @@ pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
    ptr::copy_nonoverlapping(
        mem_addr as *const u8,
        &mut dst as *mut __m256i as *mut u8,
-        mem::size_of::<__m256i>());
+        mem::size_of::<__m256i>(),
+    );
    dst
 }

 /// Store 256-bits of integer data from `a` into memory.
-///	`mem_addr` does not need to be aligned on any particular boundary.
+/// 	`mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
@ -1234,7 +1252,7 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
-pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: i64x4) -> f64x4  {
+pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: i64x4) -> f64x4 {
    maskloadpd256(mem_addr as *const i8, mask)
 }

@ -1272,7 +1290,7 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: i64x2, a: f64x2) {
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
-pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: i32x8) -> f32x8  {
+pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: i32x8) -> f32x8 {
    maskloadps256(mem_addr as *const i8, mask)
 }

@ -1592,7 +1610,8 @@ pub unsafe fn _mm_testnzc_ps(a: f32x4, b: f32x4) -> i32 {
 }

 /// Set each bit of the returned mask based on the most significant bit of the
-/// corresponding packed double-precision (64-bit) floating-point element in `a`.
+/// corresponding packed double-precision (64-bit) floating-point element in
+/// `a`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmovmskpd))]
@ -1601,7 +1620,8 @@ pub unsafe fn _mm256_movemask_pd(a: f64x4) -> i32 {
 }

 /// Set each bit of the returned mask based on the most significant bit of the
-/// corresponding packed single-precision (32-bit) floating-point element in `a`.
+/// corresponding packed single-precision (32-bit) floating-point element in
+/// `a`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vmovmskps))]
@ -1646,8 +1666,9 @@ pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 {
 /// vector with the supplied values.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32,
-                            e: f32, f: f32, g: f32, h: f32) -> f32x8 {
+pub unsafe fn _mm256_set_ps(
+    a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
+) -> f32x8 {
    f32x8::new(h, g, f, e, d, c, b, a)
 }

@ -1655,44 +1676,45 @@ pub unsafe fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32,
 /// reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_set_epi8(e00: i8, e01: i8, e02: i8, e03: i8,
-                              e04: i8, e05: i8, e06: i8, e07: i8,
-                              e08: i8, e09: i8, e10: i8, e11: i8,
-                              e12: i8, e13: i8, e14: i8, e15: i8,
-                              e16: i8, e17: i8, e18: i8, e19: i8,
-                              e20: i8, e21: i8, e22: i8, e23: i8,
-                              e24: i8, e25: i8, e26: i8, e27: i8,
-                              e28: i8, e29: i8, e30: i8, e31: i8) -> i8x32 {
-    i8x32::new(e31, e30, e29, e28,
-               e27, e26, e25, e24,
-               e23, e22, e21, e20,
-               e19, e18, e17, e16,
-               e15, e14, e13, e12,
-               e11, e10, e09, e08,
-               e07, e06, e05, e04,
-               e03, e02, e01, e00)
+pub unsafe fn _mm256_set_epi8(
+    e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8,
+    e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8,
+    e16: i8, e17: i8, e18: i8, e19: i8, e20: i8, e21: i8, e22: i8, e23: i8,
+    e24: i8, e25: i8, e26: i8, e27: i8, e28: i8, e29: i8, e30: i8, e31: i8,
+) -> i8x32 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i8x32::new(
+        e31, e30, e29, e28, e27, e26, e25, e24,
+        e23, e22, e21, e20, e19, e18, e17, e16,
+        e15, e14, e13, e12, e11, e10, e09, e08,
+        e07, e06, e05, e04, e03, e02, e01, e00,
+    )
 }

 /// Set packed 16-bit integers in returned vector with the supplied values.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_set_epi16(e00: i16, e01: i16, e02: i16, e03: i16,
-                               e04: i16, e05: i16, e06: i16, e07: i16,
-                               e08: i16, e09: i16, e10: i16, e11: i16,
-                               e12: i16, e13: i16, e14: i16, e15: i16) -> i16x16 {
-    i16x16::new(e15, e14, e13, e12,
-                e11, e10, e09, e08,
-                e07, e06, e05, e04,
-                e03, e02, e01, e00)
+pub unsafe fn _mm256_set_epi16(
+    e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16,
+    e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16,
+    e14: i16, e15: i16,
+) -> i16x16 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i16x16::new(
+        e15, e14, e13, e12,
+        e11, e10, e09, e08,
+        e07, e06, e05, e04,
+        e03, e02, e01, e00,
+    )
 }

 /// Set packed 32-bit integers in returned vector with the supplied values.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_set_epi32(e0: i32, e1: i32, e2: i32, e3: i32,
-                               e4: i32, e5: i32, e6: i32, e7: i32) -> i32x8 {
-    i32x8::new(e7, e6, e5, e4,
-               e3, e2, e1, e0)
+pub unsafe fn _mm256_set_epi32(
+    e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
+) -> i32x8 {
+    i32x8::new(e7, e6, e5, e4, e3, e2, e1, e0)
 }

 /// Set packed 64-bit integers in returned vector with the supplied values.
@ -1715,8 +1737,9 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 {
 /// vector with the supplied values in reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32,
-                             e: f32, f: f32, g: f32, h: f32) -> f32x8 {
+pub unsafe fn _mm256_setr_ps(
+    a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
+) -> f32x8 {
    f32x8::new(a, b, c, d, e, f, g, h)
 }

@ -1724,46 +1747,47 @@ pub unsafe fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32,
 /// reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_setr_epi8(e00: i8, e01: i8, e02: i8, e03: i8,
-                               e04: i8, e05: i8, e06: i8, e07: i8,
-                               e08: i8, e09: i8, e10: i8, e11: i8,
-                               e12: i8, e13: i8, e14: i8, e15: i8,
-                               e16: i8, e17: i8, e18: i8, e19: i8,
-                               e20: i8, e21: i8, e22: i8, e23: i8,
-                               e24: i8, e25: i8, e26: i8, e27: i8,
-                               e28: i8, e29: i8, e30: i8, e31: i8) -> i8x32 {
-    i8x32::new(e00, e01, e02, e03,
-               e04, e05, e06, e07,
-               e08, e09, e10, e11,
-               e12, e13, e14, e15,
-               e16, e17, e18, e19,
-               e20, e21, e22, e23,
-               e24, e25, e26, e27,
-               e28, e29, e30, e31)
+pub unsafe fn _mm256_setr_epi8(
+    e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8,
+    e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8,
+    e16: i8, e17: i8, e18: i8, e19: i8, e20: i8, e21: i8, e22: i8, e23: i8,
+    e24: i8, e25: i8, e26: i8, e27: i8, e28: i8, e29: i8, e30: i8, e31: i8,
+) -> i8x32 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i8x32::new(
+        e00, e01, e02, e03, e04, e05, e06, e07,
+        e08, e09, e10, e11, e12, e13, e14, e15,
+        e16, e17, e18, e19, e20, e21, e22, e23,
+        e24, e25, e26, e27, e28, e29, e30, e31,
+    )
 }

 /// Set packed 16-bit integers in returned vector with the supplied values in
 /// reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_setr_epi16(e00: i16, e01: i16, e02: i16, e03: i16,
-                                e04: i16, e05: i16, e06: i16, e07: i16,
-                                e08: i16, e09: i16, e10: i16, e11: i16,
-                                e12: i16, e13: i16, e14: i16, e15: i16) -> i16x16 {
-    i16x16::new(e00, e01, e02, e03,
-                e04, e05, e06, e07,
-                e08, e09, e10, e11,
-                e12, e13, e14, e15)
+pub unsafe fn _mm256_setr_epi16(
+    e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16,
+    e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16,
+    e14: i16, e15: i16,
+) -> i16x16 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i16x16::new(
+        e00, e01, e02, e03,
+        e04, e05, e06, e07,
+        e08, e09, e10, e11,
+        e12, e13, e14, e15,
+    )
 }

 /// Set packed 32-bit integers in returned vector with the supplied values in
 /// reverse order.
 #[inline(always)]
 #[target_feature = "+avx"]
-pub unsafe fn _mm256_setr_epi32(e0: i32, e1: i32, e2: i32, e3: i32,
-                               e4: i32, e5: i32, e6: i32, e7: i32) -> i32x8 {
-    i32x8::new(e0, e1, e2, e3,
-               e4, e5, e6, e7)
+pub unsafe fn _mm256_setr_epi32(
+    e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
+) -> i32x8 {
+    i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)
 }

 /// Set packed 64-bit integers in returned vector with the supplied values in
@ -1798,10 +1822,13 @@ pub unsafe fn _mm256_set1_ps(a: f32) -> f32x8 {
 #[cfg_attr(test, assert_instr(vpshufb))]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_set1_epi8(a: i8) -> i8x32 {
-    i8x32::new(a, a, a, a, a, a, a, a,
-               a, a, a, a, a, a, a, a,
-               a, a, a, a, a, a, a, a,
-               a, a, a, a, a, a, a, a)
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    i8x32::new(
+        a, a, a, a, a, a, a, a,
+        a, a, a, a, a, a, a, a,
+        a, a, a, a, a, a, a, a,
+        a, a, a, a, a, a, a, a,
+    )
 }

 /// Broadcast 16-bit integer `a` to all all elements of returned vector.
@ -1811,8 +1838,7 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> i8x32 {
 //#[cfg_attr(test, assert_instr(vpshufb))]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_set1_epi16(a: i16) -> i16x16 {
-    i16x16::new(a, a, a, a, a, a, a, a,
-               a, a, a, a, a, a, a, a)
+    i16x16::new(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
 }

 /// Broadcast 32-bit integer `a` to all elements of returned vector.
@ -1954,7 +1980,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
 #[target_feature = "+avx,+sse2"]
 pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
    use x86::sse2::_mm_setzero_si128;
-    let b =  mem::transmute(_mm_setzero_si128());
+    let b = mem::transmute(_mm_setzero_si128());
    let dst: i64x4 = simd_shuffle4(i64x2::from(a), b, [0, 1, 2, 3]);
    __m256i::from(dst)
 }
@ -2044,22 +2070,28 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
 }

 /// Load two 128-bit values (composed of 4 packed single-precision (32-bit)
-/// floating-point elements) from memory, and combine them into a 256-bit value.
+/// floating-point elements) from memory, and combine them into a 256-bit
+/// value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse"]
-pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> f32x8 {
+pub unsafe fn _mm256_loadu2_m128(
+    hiaddr: *const f32, loaddr: *const f32
+) -> f32x8 {
    use x86::sse::_mm_loadu_ps;
    let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
    _mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1)
 }

 /// Load two 128-bit values (composed of 2 packed double-precision (64-bit)
-/// floating-point elements) from memory, and combine them into a 256-bit value.
+/// floating-point elements) from memory, and combine them into a 256-bit
+/// value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
-pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> f64x4 {
+pub unsafe fn _mm256_loadu2_m128d(
+    hiaddr: *const f64, loaddr: *const f64
+) -> f64x4 {
    use x86::sse2::_mm_loadu_pd;
    let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
    _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1)
@ -2070,7 +2102,9 @@ pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> f64
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
-pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
+pub unsafe fn _mm256_loadu2_m128i(
+    hiaddr: *const __m128i, loaddr: *const __m128i
+) -> __m256i {
    use x86::sse2::_mm_loadu_si128;
    let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
    _mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
@ -2082,7 +2116,9 @@ pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse"]
-pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: f32x8) {
+pub unsafe fn _mm256_storeu2_m128(
+    hiaddr: *mut f32, loaddr: *mut f32, a: f32x8
+) {
    use x86::sse::_mm_storeu_ps;
    let lo = _mm256_castps256_ps128(a);
    _mm_storeu_ps(loaddr, lo);
@ -2096,7 +2132,9 @@ pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: f32x8)
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
-pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: f64x4) {
+pub unsafe fn _mm256_storeu2_m128d(
+    hiaddr: *mut f64, loaddr: *mut f64, a: f64x4
+) {
    use x86::sse2::_mm_storeu_pd;
    let lo = _mm256_castpd256_pd128(a);
    _mm_storeu_pd(loaddr, lo);
@ -2109,7 +2147,9 @@ pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: f64x4)
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+avx,+sse2"]
-pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
+pub unsafe fn _mm256_storeu2_m128i(
+    hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
+) {
    use x86::sse2::_mm_storeu_si128;
    let lo = _mm256_castsi256_si128(a);
    _mm_storeu_si128(loaddr, lo);
@ -2265,9 +2305,9 @@ extern "C" {
 #[cfg(test)]
 mod tests {
    use stdsimd_test::simd_test;
-    use test::black_box;  // Used to inhibit constant-folding.
+    use test::black_box; // Used to inhibit constant-folding.

-    use v128::{f32x4, f64x2, i8x16, i32x4, i64x2};
+    use v128::{f32x4, f64x2, i32x4, i64x2, i8x16};
    use v256::*;
    use x86::avx;
    use x86::{__m128i, __m256i};
@ -2428,7 +2468,7 @@ mod tests {
        let a = f64x4::new(1., 2., 3., 4.);
        let b = f64x4::new(5., 6., 7., 8.);
        let r = avx::_mm256_sub_pd(a, b);
-        let e = f64x4::new(-4.,-4.,-4.,-4.);
+        let e = f64x4::new(-4., -4., -4., -4.);
        assert_eq!(r, e);
    }

@ -2504,7 +2544,7 @@ mod tests {
    #[simd_test = "avx"]
    unsafe fn _mm256_sqrt_pd() {
        let a = f64x4::new(4., 9., 16., 25.);
-        let r = avx::_mm256_sqrt_pd(a, );
+        let r = avx::_mm256_sqrt_pd(a);
        let e = f64x4::new(2., 3., 4., 5.);
        assert_eq!(r, e);
    }
@ -2561,7 +2601,10 @@ mod tests {
    unsafe fn _mm256_blendv_ps() {
        let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.);
        let b = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.0);
-        let c = f32x8::new(0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let c = f32x8::new(
+            0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
+        );
        let r = avx::_mm256_blendv_ps(a, b, c);
        let e = f32x8::new(4., 9., 16., 25., 8., 9., 64., 50.0);
        assert_eq!(r, e);
@ -2572,7 +2615,8 @@ mod tests {
        let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.);
        let b = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.0);
        let r = avx::_mm256_dp_ps(a, b, 0xFF);
-        let e = f32x8::new(200.0, 200.0, 200.0, 200.0, 2387., 2387., 2387., 2387.);
+        let e =
+            f32x8::new(200.0, 200.0, 200.0, 200.0, 2387., 2387., 2387., 2387.);
        assert_eq!(r, e);
    }

@ -2801,20 +2845,21 @@ mod tests {

    #[simd_test = "avx"]
    unsafe fn _mm256_extract_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
        let r = avx::_mm256_extract_epi8(a, 0);
        assert_eq!(r, 1);
    }

    #[simd_test = "avx"]
    unsafe fn _mm256_extract_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        let r = avx::_mm256_extract_epi16(a, 0);
        assert_eq!(r, 0);
    }
@ -3004,29 +3049,31 @@ mod tests {

    #[simd_test = "avx"]
    unsafe fn _mm256_insert_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
        let r = avx::_mm256_insert_epi8(a, 0, 31);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 0);
+            25, 26, 27, 28, 29, 30, 31, 0,
+        );
        assert_eq!(r, e);
    }

    #[simd_test = "avx"]
    unsafe fn _mm256_insert_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        let r = avx::_mm256_insert_epi16(a, 0, 15);
-        let e = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 0);
+        let e =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0);
        assert_eq!(r, e);
    }

@ -3203,18 +3250,22 @@ mod tests {

    #[simd_test = "avx"]
    unsafe fn _mm256_lddqu_si256() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
        let p = &a as *const _;
        let r = avx::_mm256_lddqu_si256(black_box(p));
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
        assert_eq!(r, e);
    }

@ -3222,8 +3273,11 @@ mod tests {
    unsafe fn _mm256_rcp_ps() {
        let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
        let r = avx::_mm256_rcp_ps(a);
-        let e = f32x8::new(0.99975586, 0.49987793, 0.33325195, 0.24993896,
-                           0.19995117, 0.16662598, 0.14282227, 0.12496948);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = f32x8::new(
+            0.99975586, 0.49987793, 0.33325195, 0.24993896,
+            0.19995117, 0.16662598, 0.14282227, 0.12496948,
+        );
        let rel_err = 0.00048828125;
        for i in 0..8 {
            assert_approx_eq!(r.extract(i), e.extract(i), 2. * rel_err);
@ -3234,8 +3288,11 @@ mod tests {
    unsafe fn _mm256_rsqrt_ps() {
        let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
        let r = avx::_mm256_rsqrt_ps(a);
-        let e = f32x8::new(0.99975586, 0.7069092, 0.5772705, 0.49987793,
-                           0.44714355, 0.40820313, 0.3779297, 0.3534546);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = f32x8::new(
+            0.99975586, 0.7069092, 0.5772705, 0.49987793,
+            0.44714355, 0.40820313, 0.3779297, 0.3534546,
+        );
        let rel_err = 0.00048828125;
        for i in 0..8 {
            assert_approx_eq!(r.extract(i), e.extract(i), 2. * rel_err);
@ -3478,30 +3535,39 @@ mod tests {

    #[simd_test = "avx"]
    unsafe fn _mm256_set_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let r = avx::_mm256_set_epi8(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
-        assert_eq!(r, i8x32::new(32, 31, 30, 29, 28, 27, 26, 25,
-                                 24, 23, 22, 21, 20, 19, 18, 17,
-                                 16, 15, 14, 13, 12, 11, 10, 9,
-                                 8, 7, 6, 5, 4, 3, 2, 1));
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x32::new(
+            32, 31, 30, 29, 28, 27, 26, 25,
+            24, 23, 22, 21, 20, 19, 18, 17,
+            16, 15, 14, 13, 12, 11, 10, 9,
+            8, 7, 6, 5, 4, 3, 2, 1
+        );
+        assert_eq!(r, e);
    }

    #[simd_test = "avx"]
    unsafe fn _mm256_set_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let r = avx::_mm256_set_epi16(
            1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
-        assert_eq!(r, i16x16::new(16, 15, 14, 13, 12, 11, 10, 9,
-                                  8, 7, 6, 5, 4, 3, 2, 1));
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
+        assert_eq!(
+            r,
+            i16x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
+        );
    }

    #[simd_test = "avx"]
    unsafe fn _mm256_set_epi32() {
-        let r = avx::_mm256_set_epi32(
-            1, 2, 3, 4, 5, 6, 7, 8);
+        let r = avx::_mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
        assert_eq!(r, i32x8::new(8, 7, 6, 5, 4, 3, 2, 1));
    }

@ -3525,30 +3591,40 @@ mod tests {

    #[simd_test = "avx"]
    unsafe fn _mm256_setr_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let r = avx::_mm256_setr_epi8(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
-        assert_eq!(r, i8x32::new(1, 2, 3, 4, 5, 6, 7, 8,
-                                 9, 10, 11, 12, 13, 14, 15, 16,
-                                 17, 18, 19, 20, 21, 22, 23, 24,
-                                 25, 26, 27, 28, 29, 30, 31, 32));
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x32::new(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15, 16,
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32
+        );
+
+        assert_eq!(r, e);
    }

    #[simd_test = "avx"]
    unsafe fn _mm256_setr_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let r = avx::_mm256_setr_epi16(
            1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
-        assert_eq!(r, i16x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-                                  9, 10, 11, 12, 13, 14, 15, 16));
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
+        assert_eq!(
+            r,
+            i16x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+        );
    }

    #[simd_test = "avx"]
    unsafe fn _mm256_setr_epi32() {
-        let r = avx::_mm256_setr_epi32(
-            1, 2, 3, 4, 5, 6, 7, 8);
+        let r = avx::_mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
        assert_eq!(r, i32x8::new(1, 2, 3, 4, 5, 6, 7, 8));
    }

@ -3614,19 +3690,25 @@ mod tests {
    unsafe fn _mm256_castps_si256() {
        let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
        let r = avx::_mm256_castps_si256(a);
-        let e = i8x32::new(0, 0, -128, 63, 0, 0, 0, 64,
-                      0, 0, 64, 64, 0, 0, -128, 64,
-                      0, 0, -96, 64, 0, 0, -64, 64,
-                      0, 0, -32, 64, 0, 0, 0, 65);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x32::new(
+            0, 0, -128, 63, 0, 0, 0, 64,
+            0, 0, 64, 64, 0, 0, -128, 64,
+            0, 0, -96, 64, 0, 0, -64, 64,
+            0, 0, -32, 64, 0, 0, 0, 65,
+        );
        assert_eq!(r, e);
    }

    #[simd_test = "avx"]
    unsafe fn _mm256_castsi256_ps() {
-        let a = i8x32::new(0, 0, -128, 63, 0, 0, 0, 64,
-                      0, 0, 64, 64, 0, 0, -128, 64,
-                      0, 0, -96, 64, 0, 0, -64, 64,
-                      0, 0, -32, 64, 0, 0, 0, 65);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = i8x32::new(
+            0, 0, -128, 63, 0, 0, 0, 64,
+            0, 0, 64, 64, 0, 0, -128, 64,
+            0, 0, -96, 64, 0, 0, -64, 64,
+            0, 0, -32, 64, 0, 0, 0, 65,
+        );
        let r = avx::_mm256_castsi256_ps(a);
        let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
        assert_eq!(r, e);
@ -3711,16 +3793,23 @@ mod tests {

    #[simd_test = "avx"]
    unsafe fn _mm256_set_m128i() {
-        let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
-        let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let hi = i8x16::new(
+            17, 18, 19, 20,
+            21, 22, 23, 24,
+            25, 26, 27, 28,
+            29, 30, 31, 32,
+        );
+        let lo =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
        let r = avx::_mm256_set_m128i(hi, lo);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
        assert_eq!(r, e);
    }

@ -3744,16 +3833,21 @@ mod tests {

    #[simd_test = "avx"]
    unsafe fn _mm256_setr_m128i() {
-        let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
-        let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+        let lo =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let hi = i8x16::new(
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
        let r = avx::_mm256_setr_m128i(lo, hi);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
        assert_eq!(r, e);
    }

@ -3781,17 +3875,24 @@ mod tests {

    #[simd_test = "avx"]
    unsafe fn _mm256_loadu2_m128i() {
-        let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
-        let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
-        let r = avx::_mm256_loadu2_m128i(&hi as *const _ as *const _,
-                                         &lo as *const _ as *const _);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let hi = i8x16::new(
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        let lo =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r = avx::_mm256_loadu2_m128i(
+            &hi as *const _ as *const _,
+            &lo as *const _ as *const _,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
        assert_eq!(r, e);
    }

@ -3801,9 +3902,11 @@ mod tests {
        let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
        let mut hi = _mm_undefined_ps();
        let mut lo = _mm_undefined_ps();
-        avx::_mm256_storeu2_m128(&mut hi as *mut _ as *mut f32,
-                              &mut lo as *mut _ as *mut f32,
-                              a);
+        avx::_mm256_storeu2_m128(
+            &mut hi as *mut _ as *mut f32,
+            &mut lo as *mut _ as *mut f32,
+            a,
+        );
        assert_eq!(hi, f32x4::new(5., 6., 7., 8.));
        assert_eq!(lo, f32x4::new(1., 2., 3., 4.));
    }
@ -3814,9 +3917,11 @@ mod tests {
        let a = f64x4::new(1., 2., 3., 4.);
        let mut hi = _mm_undefined_pd();
        let mut lo = _mm_undefined_pd();
-        avx::_mm256_storeu2_m128d(&mut hi as *mut _ as *mut f64,
-                              &mut lo as *mut _ as *mut f64,
-                              a);
+        avx::_mm256_storeu2_m128d(
+            &mut hi as *mut _ as *mut f64,
+            &mut lo as *mut _ as *mut f64,
+            a,
+        );
        assert_eq!(hi, f64x2::new(3., 4.));
        assert_eq!(lo, f64x2::new(1., 2.));
    }
@ -3824,17 +3929,26 @@ mod tests {
    #[simd_test = "avx"]
    unsafe fn _mm256_storeu2_m128i() {
        use x86::sse2::_mm_undefined_si128;
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
        let mut hi = _mm_undefined_si128();
        let mut lo = _mm_undefined_si128();
        avx::_mm256_storeu2_m128i(&mut hi as *mut _, &mut lo as *mut _, a);
-        assert_eq!(hi, i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32));
-        assert_eq!(lo, i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16));
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x16::new(
+            17, 18, 19, 20, 21, 22, 23, 24,
+            25, 26, 27, 28, 29, 30, 31, 32
+        );
+
+        assert_eq!(hi, e);
+        assert_eq!(
+            lo,
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+        );
    }
 }
--- a/library/stdarch/src/x86/avx2.rs
+++ b/library/stdarch/src/x86/avx2.rs
@ -96,8 +96,8 @@ pub unsafe fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
    paddusw(a, b)
 }

-/// Concatenate pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary result,
-/// shift the result right by `n` bytes, and return the low 16 bytes.
+/// Concatenate pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
+/// result, shift the result right by `n` bytes, and return the low 16 bytes.
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpalignr, n = 15))]
@ -116,7 +116,9 @@ pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
        (a, b, n)
    };

-    const fn add(a: u32, b: u32) -> u32 { a + b }
+    const fn add(a: u32, b: u32) -> u32 {
+        a + b
+    }
    macro_rules! shuffle {
        ($shift:expr) => {
            simd_shuffle32(b, a, [
@ -140,14 +142,22 @@ pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
        }
    }
    match n {
-        0 => shuffle!(0), 1 => shuffle!(1),
-        2 => shuffle!(2), 3 => shuffle!(3),
-        4 => shuffle!(4), 5 => shuffle!(5),
-        6 => shuffle!(6), 7 => shuffle!(7),
-        8 => shuffle!(8), 9 => shuffle!(9),
-        10 => shuffle!(10), 11 => shuffle!(11),
-        12 => shuffle!(12), 13 => shuffle!(13),
-        14 => shuffle!(14), 15 => shuffle!(15),
+        0 => shuffle!(0),
+        1 => shuffle!(1),
+        2 => shuffle!(2),
+        3 => shuffle!(3),
+        4 => shuffle!(4),
+        5 => shuffle!(5),
+        6 => shuffle!(6),
+        7 => shuffle!(7),
+        8 => shuffle!(8),
+        9 => shuffle!(9),
+        10 => shuffle!(10),
+        11 => shuffle!(11),
+        12 => shuffle!(12),
+        13 => shuffle!(13),
+        14 => shuffle!(14),
+        15 => shuffle!(15),
        _ => shuffle!(16),
    }
 }
@ -174,7 +184,7 @@ pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpavgw))]
-pub unsafe fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 {
+pub unsafe fn _mm256_avg_epu16(a: u16x16, b: u16x16) -> u16x16 {
    pavgw(a, b)
 }

@ -182,7 +192,7 @@ pub unsafe fn _mm256_avg_epu16 (a: u16x16, b: u16x16) -> u16x16 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpavgb))]
-pub unsafe fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 {
+pub unsafe fn _mm256_avg_epu8(a: u8x32, b: u8x32) -> u8x32 {
    pavgb(a, b)
 }

@ -320,8 +330,8 @@ pub unsafe fn _mm256_blend_epi16(a: i16x16, b: i16x16, imm8: i32) -> i16x16 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpblendvb))]
-pub unsafe fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 {
-    pblendvb(a,b,mask)
+pub unsafe fn _mm256_blendv_epi8(a: i8x32, b: i8x32, mask: __m256i) -> i8x32 {
+    pblendvb(a, b, mask)
 }

 /// Broadcast the low packed 8-bit integer from `a` to all elements of
@ -628,37 +638,83 @@ pub unsafe fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 {
 }


-// TODO _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-// TODO _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale)
-// TODO _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-// TODO _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-// TODO _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale)
-// TODO _mm_i32gather_pd (double const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i32gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale)
-// TODO _mm256_i32gather_pd (double const* base_addr, __m128i vindex, const int scale)
-// TODO _mm256_mask_i32gather_pd (__m256d src, double const* base_addr, __m128i vindex, __m256d mask, const int scale)
-// TODO _mm_i32gather_ps (float const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i32gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale)
-// TODO _mm256_i32gather_ps (float const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i32gather_ps (__m256 src, float const* base_addr, __m256i vindex, __m256 mask, const int scale)
-// TODO _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-// TODO _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale)
-// TODO _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-// TODO _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale)
-// TODO _mm_i64gather_pd (double const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i64gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale)
-// TODO _mm256_i64gather_pd (double const* base_addr, __m256i vindex, const int scale)
-// TODO _mm256_mask_i64gather_pd (__m256d src, double const* base_addr, __m256i vindex, __m256d mask, const int scale)
-// TODO _mm_i64gather_ps (float const* base_addr, __m128i vindex, const int scale)
-// TODO _mm_mask_i64gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale)
-// TODO _mm256_i64gather_ps (float const* base_addr, __m256i vindex, const int scale)
+// TODO _mm_i32gather_epi32 (int const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr,
+//                                __m128i vindex, __m128i mask,
+//                                const int scale)
+// TODO _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex,
+//                              const int scale)
+// TODO _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr,
+//                                   __m256i vindex, __m256i mask,
+//                                   const int scale)
+// TODO _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr,
+//                                __m128i vindex, __m128i mask,
+//                                const int scale)
+// TODO _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex,
+//                              const int scale)
+// TODO _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr,
+//                                   __m128i vindex, __m256i mask,
+//                                   const int scale)
+// TODO _mm_i32gather_pd (double const* base_addr, __m128i vindex,
+//                        const int scale)
+// TODO _mm_mask_i32gather_pd (__m128d src, double const* base_addr,
+//                             __m128i vindex, __m128d mask,
+//                             const int scale)
+// TODO _mm256_i32gather_pd (double const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm256_mask_i32gather_pd (__m256d src, double const* base_addr,
+//                                __m128i vindex, __m256d mask,
+//                                const int scale)
+// TODO _mm_i32gather_ps (float const* base_addr, __m128i vindex,
+//                        const int scale)
+// TODO _mm_mask_i32gather_ps (__m128 src, float const* base_addr,
+//                             __m128i vindex, __m128 mask,
+//                             const int scale)
+// TODO _mm256_i32gather_ps (float const* base_addr, __m256i vindex,
+//                           const int scale)
+// TODO _mm256_mask_i32gather_ps (__m256 src, float const* base_addr,
+//                                __m256i vindex, __m256 mask,
+//                                const int scale)
+// TODO _mm_i64gather_epi32 (int const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr,
+//                                __m128i vindex, __m128i mask,
+//                                const int scale)
+// TODO _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex,
+//                              const int scale)
+// TODO _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr,
+//                                   __m256i vindex, __m128i mask,
+//                                   const int scale)
+// TODO _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex,
+//                           const int scale)
+// TODO _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr,
+//                                __m128i vindex, __m128i mask,
+//                                const int scale)
+// TODO _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex,
+//                              const int scale)
+// TODO _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr,
+//                                   __m256i vindex, __m256i mask,
+//                                   const int scale)
+// TODO _mm_i64gather_pd (double const* base_addr, __m128i vindex,
+//                        const int scale)
+// TODO _mm_mask_i64gather_pd (__m128d src, double const* base_addr,
+//                             __m128i vindex, __m128d mask,
+//                             const int scale)
+// TODO _mm256_i64gather_pd (double const* base_addr, __m256i vindex,
+//                           const int scale)
+// TODO _mm256_mask_i64gather_pd (__m256d src, double const* base_addr,
+//                                __m256i vindex, __m256d mask,
+//                                const int scale)
+// TODO _mm_i64gather_ps (float const* base_addr, __m128i vindex,
+//                        const int scale)
+// TODO _mm_mask_i64gather_ps (__m128 src, float const* base_addr,
+//                             __m128i vindex, __m128 mask,
+//                             const int scale)
+// TODO _mm256_i64gather_ps (float const* base_addr, __m256i vindex,
+//                           const int scale)
 // TODO _mm256_mask_i64gather_ps
 // TODO _mm256_inserti128_si256

@ -946,7 +1002,7 @@ pub unsafe fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpmullw))]
-pub unsafe fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 {
+pub unsafe fn _mm256_mullo_epi16(a: i16x16, b: i16x16) -> i16x16 {
    a * b
 }

@ -957,7 +1013,7 @@ pub unsafe fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpmulld))]
-pub unsafe fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 {
+pub unsafe fn _mm256_mullo_epi32(a: i32x8, b: i32x8) -> i32x8 {
    a * b
 }

@ -968,7 +1024,7 @@ pub unsafe fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpmulhrsw))]
-pub unsafe fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 {
+pub unsafe fn _mm256_mulhrs_epi16(a: i16x16, b: i16x16) -> i16x16 {
    pmulhrsw(a, b)
 }

@ -1088,7 +1144,7 @@ pub unsafe fn _mm256_permute4x64_epi64(a: i64x4, imm8: i32) -> i64x4 {
 #[inline(always)]
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpsadbw))]
-pub unsafe fn _mm256_sad_epu8 (a: u8x32, b: u8x32) -> u64x4 {
+pub unsafe fn _mm256_sad_epu8(a: u8x32, b: u8x32) -> u64x4 {
    psadbw(a, b)
 }

@ -1580,15 +1636,19 @@ pub unsafe fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
 /// use stdsimd::simd::i8x32;
 /// use stdsimd::vendor::_mm256_unpackhi_epi8;
 ///
-/// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
-/// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
+/// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+/// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,
+/// -16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
 ///
 /// let c: i8x32;
 /// unsafe {
 ///     c = _mm256_unpackhi_epi8(a, b);
 /// }
 ///
-/// let expected = i8x32::new(8,-8, 9,-9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15, 24,-24, 25,-25, 26,-26, 27,-27, 28,-28, 29,-29, 30,-30, 31,-31);
+/// let expected = i8x32::new(8,-8, 9,-9, 10,-10, 11,-11, 12,-12, 13,-13,
+/// 14,-14, 15,-15, 24,-24, 25,-25, 26,-26, 27,-27, 28,-28, 29,-29, 30,-30,
+/// 31,-31);
 /// assert_eq!(c, expected);
 ///
 /// #         }
@ -1600,7 +1660,13 @@ pub unsafe fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
 pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 {
-    simd_shuffle32(a, b, [8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63])
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    simd_shuffle32(a, b, [
+            8, 40, 9, 41, 10, 42, 11, 43,
+            12, 44, 13, 45, 14, 46, 15, 47,
+            24, 56, 25, 57, 26, 58, 27, 59,
+            28, 60, 29, 61, 30, 62, 31, 63,
+    ])
 }

 /// Unpack and interleave 8-bit integers from the low half of each
@ -1619,15 +1685,18 @@ pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 {
 /// use stdsimd::simd::i8x32;
 /// use stdsimd::vendor::_mm256_unpacklo_epi8;
 ///
-/// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
-/// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
+/// let a = i8x32::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+/// let b = i8x32::new(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,
+/// -16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
 ///
 /// let c: i8x32;
 /// unsafe {
 ///     c = _mm256_unpacklo_epi8(a, b);
 /// }
 ///
-/// let expected = i8x32::new(0, 0, 1,-1, 2,-2, 3,-3, 4,-4, 5,-5, 6,-6, 7,-7, 16,-16, 17,-17, 18,-18, 19,-19, 20,-20, 21,-21, 22,-22, 23,-23);
+/// let expected = i8x32::new(0, 0, 1,-1, 2,-2, 3,-3, 4,-4, 5,-5, 6,-6, 7,-7,
+/// 16,-16, 17,-17, 18,-18, 19,-19, 20,-20, 21,-21, 22,-22, 23,-23);
 /// assert_eq!(c, expected);
 ///
 /// #         }
@ -1639,7 +1708,13 @@ pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 {
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
 pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 {
-    simd_shuffle32(a, b, [0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55])
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    simd_shuffle32(a, b, [
+        0, 32, 1, 33, 2, 34, 3, 35,
+        4, 36, 5, 37, 6, 38, 7, 39,
+        16, 48, 17, 49, 18, 50, 19, 51,
+        20, 52, 21, 53, 22, 54, 23, 55,
+    ])
 }

 /// Unpack and interleave 16-bit integers from the high half of each
@ -1666,7 +1741,8 @@ pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 {
 ///     c = _mm256_unpackhi_epi16(a, b);
 /// }
 ///
-/// let expected = i16x16::new(4,-4, 5,-5, 6,-6, 7,-7, 12,-12, 13,-13, 14,-14, 15,-15);
+/// let expected = i16x16::new(4,-4, 5,-5, 6,-6, 7,-7, 12,-12, 13,-13, 14,-14,
+/// 15,-15);
 /// assert_eq!(c, expected);
 ///
 /// #         }
@ -1678,7 +1754,11 @@ pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 {
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
 pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
-    simd_shuffle16(a, b, [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31])
+    simd_shuffle16(
+        a,
+        b,
+        [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
+    )
 }

 /// Unpack and interleave 16-bit integers from the low half of each
@ -1705,7 +1785,8 @@ pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
 ///     c = _mm256_unpacklo_epi16(a, b);
 /// }
 ///
-/// let expected = i16x16::new(0, 0, 1,-1, 2,-2, 3,-3, 8,-8, 9,-9, 10,-10, 11,-11);
+/// let expected = i16x16::new(0, 0, 1,-1, 2,-2, 3,-3, 8,-8, 9,-9, 10,-10,
+/// 11,-11);
 /// assert_eq!(c, expected);
 ///
 /// #         }
@ -1717,7 +1798,11 @@ pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
 pub unsafe fn _mm256_unpacklo_epi16(a: i16x16, b: i16x16) -> i16x16 {
-    simd_shuffle16(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27])
+    simd_shuffle16(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
+    )
 }

 /// Unpack and interleave 32-bit integers from the high half of each
@ -1972,9 +2057,9 @@ extern "C" {
    #[link_name = "llvm.x86.avx2.pmulh.w"]
    fn pmulhw(a: i16x16, b: i16x16) -> i16x16;
    #[link_name = "llvm.x86.avx2.pmul.dq"]
-    fn pmuldq(a: i32x8, b:i32x8) -> i64x4;
+    fn pmuldq(a: i32x8, b: i32x8) -> i64x4;
    #[link_name = "llvm.x86.avx2.pmulu.dq"]
-    fn pmuludq(a: u32x8, b:u32x8) -> u64x4;
+    fn pmuludq(a: u32x8, b: u32x8) -> u64x4;
    #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
    fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
    #[link_name = "llvm.x86.avx2.packsswb"]
@ -2006,17 +2091,17 @@ extern "C" {
    #[link_name = "llvm.x86.avx2.pslli.q"]
    fn pslliq(a: i64x4, imm8: i32) -> i64x4;
    #[link_name = "llvm.x86.avx2.psllv.d"]
-    fn psllvd(a:i32x4, count:i32x4) -> i32x4;
+    fn psllvd(a: i32x4, count: i32x4) -> i32x4;
    #[link_name = "llvm.x86.avx2.psllv.d.256"]
-    fn psllvd256(a:i32x8, count:i32x8) -> i32x8;
+    fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
    #[link_name = "llvm.x86.avx2.psllv.q"]
-    fn psllvq(a:i64x2, count:i64x2) -> i64x2;
+    fn psllvq(a: i64x2, count: i64x2) -> i64x2;
    #[link_name = "llvm.x86.avx2.psllv.q.256"]
-    fn psllvq256(a:i64x4, count:i64x4) -> i64x4;
+    fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
    #[link_name = "llvm.x86.avx2.psra.w"]
-    fn psraw(a: i16x16, count:i16x8) -> i16x16;
+    fn psraw(a: i16x16, count: i16x8) -> i16x16;
    #[link_name = "llvm.x86.avx2.psra.d"]
-    fn psrad(a: i32x8, count:i32x4) -> i32x8;
+    fn psrad(a: i32x8, count: i32x4) -> i32x8;
    #[link_name = "llvm.x86.avx2.psrai.w"]
    fn psraiw(a: i16x16, imm8: i32) -> i16x16;
    #[link_name = "llvm.x86.avx2.psrai.d"]
@ -2026,11 +2111,11 @@ extern "C" {
    #[link_name = "llvm.x86.avx2.psrav.d.256"]
    fn psravd256(a: i32x8, count: i32x8) -> i32x8;
    #[link_name = "llvm.x86.avx2.psrl.w"]
-    fn psrlw(a: i16x16, count:i16x8) -> i16x16;
+    fn psrlw(a: i16x16, count: i16x8) -> i16x16;
    #[link_name = "llvm.x86.avx2.psrl.d"]
-    fn psrld(a: i32x8, count:i32x4) -> i32x8;
+    fn psrld(a: i32x8, count: i32x4) -> i32x8;
    #[link_name = "llvm.x86.avx2.psrl.q"]
-    fn psrlq(a: i64x4, count:i64x2) -> i64x4;
+    fn psrlq(a: i64x4, count: i64x2) -> i64x4;
    #[link_name = "llvm.x86.avx2.psrli.w"]
    fn psrliw(a: i16x16, imm8: i32) -> i16x16;
    #[link_name = "llvm.x86.avx2.psrli.d"]
@ -2071,49 +2156,53 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_abs_epi32() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i32x8::new(
            0, 1, -1, std::i32::MAX,
-            std::i32::MIN + 1, 100, -100, -32);
+            std::i32::MIN + 1, 100, -100, -32,
+        );
        let r = avx2::_mm256_abs_epi32(a);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i32x8::new(
            0, 1, 1, std::i32::MAX,
-            (std::i32::MIN + 1).abs(), 100, 100, 32);
+            (std::i32::MIN + 1).abs(), 100, 100, 32,
+        );
        assert_eq!(r, e);
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_abs_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i16x16::new(
-            0, 1, -1, 2,
-            -2, 3, -3, 4,
-            -4, 5, -5, std::i16::MAX,
-            std::i16::MIN + 1, 100, -100, -32);
+            0,  1, -1, 2, -2, 3, -3, 4,
+            -4, 5, -5, std::i16::MAX, std::i16::MIN + 1, 100, -100, -32,
+        );
        let r = avx2::_mm256_abs_epi16(a);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i16x16::new(
-            0, 1, 1, 2,
-            2, 3, 3, 4,
-            4, 5, 5, std::i16::MAX,
-            (std::i16::MIN + 1).abs(), 100, 100, 32);
+            0, 1, 1, 2, 2, 3, 3, 4,
+            4, 5, 5, std::i16::MAX, (std::i16::MIN + 1).abs(), 100, 100, 32,
+        );
        assert_eq!(r, e);
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_abs_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i8x32::new(
-            0, 1, -1, 2,
-            -2, 3, -3, 4,
-            -4, 5, -5, std::i8::MAX,
-            std::i8::MIN + 1, 100, -100, -32,
-            0, 1, -1, 2,
-            -2, 3, -3, 4,
-            -4, 5, -5, std::i8::MAX,
-            std::i8::MIN + 1, 100, -100, -32);
+            0, 1, -1, 2, -2, 3, -3, 4,
+            -4, 5, -5, std::i8::MAX, std::i8::MIN + 1, 100, -100, -32,
+            0, 1, -1, 2, -2, 3, -3, 4,
+            -4, 5, -5, std::i8::MAX, std::i8::MIN + 1, 100, -100, -32,
+        );
        let r = avx2::_mm256_abs_epi8(a);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x32::new(
            0, 1, 1, 2, 2, 3, 3, 4,
            4, 5, 5, std::i8::MAX, (std::i8::MIN + 1).abs(), 100, 100, 32,
            0, 1, 1, 2, 2, 3, 3, 4,
-            4, 5, 5, std::i8::MAX, (std::i8::MIN + 1).abs(), 100, 100, 32);
+            4, 5, 5, std::i8::MAX, (std::i8::MIN + 1).abs(), 100, 100, 32,
+        );
        assert_eq!(r, e);
    }

@ -2137,52 +2226,70 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_add_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
-        let b = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        let r = avx2::_mm256_add_epi16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i16x16::new(
            0, 2, 4, 6, 8, 10, 12, 14,
-            16, 18, 20, 22, 24, 26, 28, 30);
+            16, 18, 20, 22, 24, 26, 28, 30,
+        );
        assert_eq!(r, e);
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_add_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i8x32::new(
            0, 1, 2, 3, 4, 5, 6, 7,
            8, 9, 10, 11, 12, 13, 14, 15,
            16, 17, 18, 19, 20, 21, 22, 23,
-            24, 25, 26, 27, 28, 29, 30, 31);
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i8x32::new(
            0, 1, 2, 3, 4, 5, 6, 7,
            8, 9, 10, 11, 12, 13, 14, 15,
            16, 17, 18, 19, 20, 21, 22, 23,
-            24, 25, 26, 27, 28, 29, 30, 31);
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
        let r = avx2::_mm256_add_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x32::new(
-            0, 2, 4, 6, 8, 10, 12, 14, 16,
-            18, 20, 22, 24, 26, 28, 30, 32,
-            34, 36, 38, 40, 42, 44, 46, 48,
-            50, 52, 54, 56, 58, 60, 62);
+            0, 2, 4, 6, 8, 10, 12, 14,
+            16, 18, 20, 22, 24, 26, 28, 30,
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+        );
        assert_eq!(r, e);
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_adds_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i8x32::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+            16, 17, 18, 19, 20, 21, 22, 23,
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i8x32::new(
-            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-            48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
+            32, 33, 34, 35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44, 45, 46, 47,
+            48, 49, 50, 51, 52, 53, 54, 55,
+            56, 57, 58, 59, 60, 61, 62, 63,
+        );
        let r = avx2::_mm256_adds_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x32::new(
-            32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
-            64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94);
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+            64, 66, 68, 70, 72, 74, 76, 78,
+            80, 82, 84, 86, 88, 90, 92, 94,
+        );
        assert_eq!(r, e);
    }

@ -2204,13 +2311,19 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_adds_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i16x16::new(
-            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47);
-        let r = avx2::_mm256_adds_epi16(a,  b);
+            32, 33, 34, 35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44, 45, 46, 47,
+        );
+        let r = avx2::_mm256_adds_epi16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i16x16::new(
-            32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62);
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+        );

        assert_eq!(r, e);
    }
@ -2233,16 +2346,28 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_adds_epu8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = u8x32::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+            16, 17, 18, 19, 20, 21, 22, 23,
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = u8x32::new(
-            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-            48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
+            32, 33, 34, 35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44, 45, 46, 47,
+            48, 49, 50, 51, 52, 53, 54, 55,
+            56, 57, 58, 59, 60, 61, 62, 63,
+        );
        let r = avx2::_mm256_adds_epu8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = u8x32::new(
-            32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
-            64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94);
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+            64, 66, 68, 70, 72, 74, 76, 78,
+            80, 82, 84, 86, 88, 90, 92, 94,
+        );
        assert_eq!(r, e);
    }

@ -2257,13 +2382,19 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_adds_epu16() {
-        let a = u16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            u16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = u16x16::new(
-            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47);
+            32, 33, 34, 35, 36, 37, 38, 39,
+            40, 41, 42, 43, 44, 45, 46, 47,
+        );
        let r = avx2::_mm256_adds_epu16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = u16x16::new(
-            32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62);
+            32, 34, 36, 38, 40, 42, 44, 46,
+            48, 50, 52, 54, 56, 58, 60, 62,
+        );

        assert_eq!(r, e);
    }
@ -2346,11 +2477,11 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_blendv_epi8() {
-        let (a,b) = (i8x32::splat(4),i8x32::splat(2));
-        let mask = i8x32::splat(0).replace(2,-1);
-        let e = i8x32::splat(4).replace(2,2);
-        let r= avx2::_mm256_blendv_epi8(a,b,mask);
-        assert_eq!(r,e);
+        let (a, b) = (i8x32::splat(4), i8x32::splat(2));
+        let mask = i8x32::splat(0).replace(2, -1);
+        let e = i8x32::splat(4).replace(2, 2);
+        let r = avx2::_mm256_blendv_epi8(a, b, mask);
+        assert_eq!(r, e);
    }

    #[simd_test = "avx2"]
@ -2413,8 +2544,12 @@ mod tests {
    unsafe fn _mm256_broadcastsi128_si256() {
        let a = i64x2::new(0x0987654321012334, 0x5678909876543210);
        let res = avx2::_mm256_broadcastsi128_si256(a);
-        let retval = i64x4::new(0x0987654321012334, 0x5678909876543210,
-                                0x0987654321012334, 0x5678909876543210);
+        let retval = i64x4::new(
+            0x0987654321012334,
+            0x5678909876543210,
+            0x0987654321012334,
+            0x5678909876543210,
+        );
        assert_eq!(res, retval);
    }

@ -2448,30 +2583,38 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_cmpeq_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i8x32::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            0, 1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 11, 12, 13, 14, 15,
+            16, 17, 18, 19, 20, 21, 22, 23,
+            24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i8x32::new(
-            31, 30, 2, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
-            15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+            31, 30, 2, 28, 27, 26, 25, 24,
+            23, 22, 21, 20, 19, 18, 17, 16,
+            15, 14, 13, 12, 11, 10, 9, 8,
+            7, 6, 5, 4, 3, 2, 1, 0,
+        );
        let r = avx2::_mm256_cmpeq_epi8(a, b);
-        assert_eq!(r, i8x32::splat(0).replace(2,0xFFu8 as i8));
+        assert_eq!(r, i8x32::splat(0).replace(2, 0xFFu8 as i8));
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_cmpeq_epi16() {
-        let a = i16x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b = i16x16::new(
-            15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let a =
+            i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b =
+            i16x16::new(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
        let r = avx2::_mm256_cmpeq_epi16(a, b);
        assert_eq!(r, i16x16::splat(0).replace(2, 0xFFFFu16 as i16));
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_cmpeq_epi32() {
-        let a = i32x8::new(0, 1, 2, 3,4,5,6,7);
-        let b = i32x8::new(7,6,2,4,3, 2, 1, 0);
+        let a = i32x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b = i32x8::new(7, 6, 2, 4, 3, 2, 1, 0);
        let r = avx2::_mm256_cmpeq_epi32(a, b);
        assert_eq!(r, i32x8::splat(0).replace(2, 0xFFFFFFFFu32 as i32));
    }
@ -2481,8 +2624,10 @@ mod tests {
        let a = i64x4::new(0, 1, 2, 3);
        let b = i64x4::new(3, 2, 2, 0);
        let r = avx2::_mm256_cmpeq_epi64(a, b);
-        assert_eq!(r, i64x4::splat(0).replace(
-            2, 0xFFFFFFFFFFFFFFFFu64 as i64));
+        assert_eq!(
+            r,
+            i64x4::splat(0).replace(2, 0xFFFFFFFFFFFFFFFFu64 as i64)
+        );
    }

    #[simd_test = "avx2"]
@ -2514,27 +2659,33 @@ mod tests {
        let a = i64x4::splat(0).replace(0, 5);
        let b = i64x4::splat(0);
        let r = avx2::_mm256_cmpgt_epi64(a, b);
-        assert_eq!(r, i64x4::splat(0).replace(
-            0, 0xFFFFFFFFFFFFFFFFu64 as i64));
+        assert_eq!(
+            r,
+            i64x4::splat(0).replace(0, 0xFFFFFFFFFFFFFFFFu64 as i64)
+        );
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_cvtepi8_epi16() {
-        let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
-        let r = i16x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        let a =
+            i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        let r =
+            i16x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
        assert_eq!(r, avx2::_mm256_cvtepi8_epi16(a));
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_cvtepi8_epi32() {
-        let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        let a =
+            i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
        let r = i32x8::new(0, 0, -1, 1, -2, 2, -3, 3);
        assert_eq!(r, avx2::_mm256_cvtepi8_epi32(a));
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_cvtepi8_epi64() {
-        let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
+        let a =
+            i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
        let r = i64x4::new(0, 0, -1, 1);
        assert_eq!(r, avx2::_mm256_cvtepi8_epi64(a));
    }
@ -2580,11 +2731,11 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_hadds_epi16() {
-        let a = i16x16::splat(2).replace(0,0x7FFF).replace(1,1);
+        let a = i16x16::splat(2).replace(0, 0x7FFF).replace(1, 1);
        let b = i16x16::splat(4);
        let r = avx2::_mm256_hadds_epi16(a, b);
-        let e = i16x16::new(
-            0x7FFF, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
+        let e =
+            i16x16::new(0x7FFF, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
        assert_eq!(r, e);
    }

@ -2608,10 +2759,10 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_hsubs_epi16() {
-        let a = i16x16::splat(2).replace(0,0x7FFF).replace(1,-1);
+        let a = i16x16::splat(2).replace(0, 0x7FFF).replace(1, -1);
        let b = i16x16::splat(4);
        let r = avx2::_mm256_hsubs_epi16(a, b);
-        let e = i16x16::splat(0).replace(0,0x7FFF);
+        let e = i16x16::splat(0).replace(0, 0x7FFF);
        assert_eq!(r, e);
    }

@ -2902,11 +3053,13 @@ mod tests {
        let a = i16x16::splat(2);
        let b = i16x16::splat(4);
        let r = avx2::_mm256_packs_epi16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x32::new(
            2, 2, 2, 2, 2, 2, 2, 2,
            4, 4, 4, 4, 4, 4, 4, 4,
            2, 2, 2, 2, 2, 2, 2, 2,
-            4, 4, 4, 4, 4, 4, 4, 4);
+            4, 4, 4, 4, 4, 4, 4, 4,
+        );

        assert_eq!(r, e);
    }
@ -2916,11 +3069,7 @@ mod tests {
        let a = i32x8::splat(2);
        let b = i32x8::splat(4);
        let r = avx2::_mm256_packs_epi32(a, b);
-        let e = i16x16::new(
-            2, 2, 2, 2,
-            4, 4, 4, 4,
-            2, 2, 2, 2,
-            4, 4, 4, 4);
+        let e = i16x16::new(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);

        assert_eq!(r, e);
    }
@ -2930,11 +3079,13 @@ mod tests {
        let a = i16x16::splat(2);
        let b = i16x16::splat(4);
        let r = avx2::_mm256_packus_epi16(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = u8x32::new(
            2, 2, 2, 2, 2, 2, 2, 2,
            4, 4, 4, 4, 4, 4, 4, 4,
            2, 2, 2, 2, 2, 2, 2, 2,
-            4, 4, 4, 4, 4, 4, 4, 4);
+            4, 4, 4, 4, 4, 4, 4, 4,
+        );

        assert_eq!(r, e);
    }
@ -2944,11 +3095,7 @@ mod tests {
        let a = i32x8::splat(2);
        let b = i32x8::splat(4);
        let r = avx2::_mm256_packus_epi32(a, b);
-        let e = u16x16::new(
-            2, 2, 2, 2,
-            4, 4, 4, 4,
-            2, 2, 2, 2,
-            4, 4, 4, 4);
+        let e = u16x16::new(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);

        assert_eq!(r, e);
    }
@ -3017,21 +3164,24 @@ mod tests {
    unsafe fn _mm256_slli_epi16() {
        assert_eq!(
            avx2::_mm256_slli_epi16(i16x16::splat(0xFF), 4),
-            i16x16::splat(0xFF0));
+            i16x16::splat(0xFF0)
+        );
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_slli_epi32() {
        assert_eq!(
            avx2::_mm256_slli_epi32(i32x8::splat(0xFFFF), 4),
-            i32x8::splat(0xFFFF0));
+            i32x8::splat(0xFFFF0)
+        );
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_slli_epi64() {
        assert_eq!(
            avx2::_mm256_slli_epi64(i64x4::splat(0xFFFFFFFF), 4),
-            i64x4::splat(0xFFFFFFFF0));
+            i64x4::splat(0xFFFFFFFF0)
+        );
    }

    #[simd_test = "avx2"]
@ -3090,14 +3240,16 @@ mod tests {
    unsafe fn _mm256_srai_epi16() {
        assert_eq!(
            avx2::_mm256_srai_epi16(i16x16::splat(-1), 1),
-            i16x16::splat(-1));
+            i16x16::splat(-1)
+        );
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_srai_epi32() {
        assert_eq!(
            avx2::_mm256_srai_epi32(i32x8::splat(-1), 1),
-            i32x8::splat(-1));
+            i32x8::splat(-1)
+        );
    }

    #[simd_test = "avx2"]
@ -3106,7 +3258,7 @@ mod tests {
        let count = i32x4::splat(1);
        let r = avx2::_mm_srav_epi32(a, count);
        let e = i32x4::splat(2);
-        assert_eq!(r, e );
+        assert_eq!(r, e);
    }

    #[simd_test = "avx2"]
@ -3115,7 +3267,7 @@ mod tests {
        let count = i32x8::splat(1);
        let r = avx2::_mm256_srav_epi32(a, count);
        let e = i32x8::splat(2);
-        assert_eq!(r, e );
+        assert_eq!(r, e);
    }

    #[simd_test = "avx2"]
@ -3146,21 +3298,24 @@ mod tests {
    unsafe fn _mm256_srli_epi16() {
        assert_eq!(
            avx2::_mm256_srli_epi16(i16x16::splat(0xFF), 4),
-            i16x16::splat(0xF));
+            i16x16::splat(0xF)
+        );
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_srli_epi32() {
        assert_eq!(
            avx2::_mm256_srli_epi32(i32x8::splat(0xFFFF), 4),
-            i32x8::splat(0xFFF));
+            i32x8::splat(0xFFF)
+        );
    }

    #[simd_test = "avx2"]
    unsafe fn _mm256_srli_epi64() {
        assert_eq!(
            avx2::_mm256_srli_epi64(i64x4::splat(0xFFFFFFFF), 4),
-            i64x4::splat(0xFFFFFFF));
+            i64x4::splat(0xFFFFFFF)
+        );
    }

    #[simd_test = "avx2"]
@ -3274,41 +3429,51 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_alignr_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32);
+            25, 26, 27, 28, 29, 30, 31, 32,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i8x32::new(
            -1, -2, -3, -4, -5, -6, -7, -8,
            -9, -10, -11, -12, -13, -14, -15, -16,
            -17, -18, -19, -20, -21, -22, -23, -24,
-            -25, -26, -27, -28, -29, -30, -31, -32);
+            -25, -26, -27, -28, -29, -30, -31, -32,
+        );
        let r = avx2::_mm256_alignr_epi8(a, b, 33);
        assert_eq!(r, i8x32::splat(0));

        let r = avx2::_mm256_alignr_epi8(a, b, 17);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let expected = i8x32::new(
            2, 3, 4, 5, 6, 7, 8, 9,
            10, 11, 12, 13, 14, 15, 16, 17,
            18, 19, 20, 21, 22, 23, 24, 25,
-            26, 27, 28, 29, 30, 31, 32, 0);
+            26, 27, 28, 29, 30, 31, 32, 0,
+        );
        assert_eq!(r, expected);

+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let expected = i8x32::new(
            -17, -18, -19, -20, -21, -22, -23, -24,
            -25, -26, -27, -28, -29, -30, -31, -32,
            1, 2, 3, 4, 5, 6, 7, 8,
-            9, 10, 11, 12, 13, 14, 15, 16);
+            9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = avx2::_mm256_alignr_epi8(a, b, 16);
        assert_eq!(r, expected);

        let r = avx2::_mm256_alignr_epi8(a, b, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let expected = i8x32::new(
            -16, -17, -18, -19, -20, -21, -22, -23,
            -24, -25, -26, -27, -28, -29, -30, -31,
            -32, 1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 11, 12, 13, 14, 15);
+            8, 9, 10, 11, 12, 13, 14, 15,
+        );
        assert_eq!(r, expected);

        let r = avx2::_mm256_alignr_epi8(a, b, 0);
@ -3317,18 +3482,21 @@ mod tests {

    #[simd_test = "avx2"]
    unsafe fn _mm256_shuffle_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = u8x32::new(
            1, 2, 3, 4, 5, 6, 7, 8,
            9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24,
-            25, 26, 27, 28, 29, 30, 31, 32
+            25, 26, 27, 28, 29, 30, 31, 32,
        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = u8x32::new(
            4, 128, 4, 3, 24, 12, 6, 19,
            12, 5, 5, 10, 4, 1, 8, 0,
            4, 128, 4, 3, 24, 12, 6, 19,
            12, 5, 5, 10, 4, 1, 8, 0,
        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let expected = u8x32::new(
            5, 0, 5, 4, 9, 13, 7, 4,
            13, 6, 6, 11, 5, 2, 9, 1,
--- a/library/stdarch/src/x86/bmi.rs
+++ b/library/stdarch/src/x86/bmi.rs
@ -1,11 +1,16 @@
 //! Bit Manipulation Instruction (BMI) Set 1.0.
 //!
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
-//! Manual Volume 2: Instruction Set Reference,
-//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
-//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI1_.28Bit_Manipulation_Instruction_Set_1.29)
-//! provides a quick overview of the available instructions.
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.
+//! 28Advanced_Bit_Manipulation.29
+

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@ -32,8 +37,8 @@ pub unsafe fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
 /// Extracts bits of `a` specified by `control` into
 /// the least significant bits of the result.
 ///
-/// Bits [7,0] of `control` specify the index to the first bit in the range to be
-/// extracted, and bits [15,8] specify the length of the range.
+/// Bits [7,0] of `control` specify the index to the first bit in the range to
+/// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
 #[target_feature = "+bmi"]
 #[cfg_attr(test, assert_instr(bextr))]
@ -44,8 +49,8 @@ pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 {
 /// Extracts bits of `a` specified by `control` into
 /// the least significant bits of the result.
 ///
-/// Bits [7,0] of `control` specify the index to the first bit in the range to be
-/// extracted, and bits [15,8] specify the length of the range.
+/// Bits [7,0] of `control` specify the index to the first bit in the range to
+/// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
 #[target_feature = "+bmi"]
 #[cfg_attr(test, assert_instr(bextr))]
@ -177,9 +182,9 @@ pub unsafe fn _mm_tzcnt_u64(x: u64) -> u64 {

 #[allow(dead_code)]
 extern "C" {
-    #[link_name="llvm.x86.bmi.bextr.32"]
+    #[link_name = "llvm.x86.bmi.bextr.32"]
    fn x86_bmi_bextr_32(x: u32, y: u32) -> u32;
-    #[link_name="llvm.x86.bmi.bextr.64"]
+    #[link_name = "llvm.x86.bmi.bextr.64"]
    fn x86_bmi_bextr_64(x: u64, y: u64) -> u64;
 }

--- a/library/stdarch/src/x86/bmi2.rs
+++ b/library/stdarch/src/x86/bmi2.rs
@ -1,11 +1,15 @@
 //! Bit Manipulation Instruction (BMI) Set 2.0.
 //!
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
-//! Manual Volume 2: Instruction Set Reference,
-//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectu res-software-developer-instruction-set-reference-manual-325383.pdf).
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
-//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29)
-//! provides a quick overview of the available instructions.
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.
+//! 28Advanced_Bit_Manipulation.29

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@ -96,17 +100,17 @@ pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 {

 #[allow(dead_code)]
 extern "C" {
-    #[link_name="llvm.x86.bmi.bzhi.32"]
+    #[link_name = "llvm.x86.bmi.bzhi.32"]
    fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32;
-    #[link_name="llvm.x86.bmi.bzhi.64"]
+    #[link_name = "llvm.x86.bmi.bzhi.64"]
    fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64;
-    #[link_name="llvm.x86.bmi.pdep.32"]
+    #[link_name = "llvm.x86.bmi.pdep.32"]
    fn x86_bmi2_pdep_32(x: u32, y: u32) -> u32;
-    #[link_name="llvm.x86.bmi.pdep.64"]
+    #[link_name = "llvm.x86.bmi.pdep.64"]
    fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64;
-    #[link_name="llvm.x86.bmi.pext.32"]
+    #[link_name = "llvm.x86.bmi.pext.32"]
    fn x86_bmi2_pext_32(x: u32, y: u32) -> u32;
-    #[link_name="llvm.x86.bmi.pext.64"]
+    #[link_name = "llvm.x86.bmi.pext.64"]
    fn x86_bmi2_pext_64(x: u64, y: u64) -> u64;
 }

@ -118,7 +122,7 @@ mod tests {

    #[simd_test = "bmi2"]
    unsafe fn _pext_u32() {
-        let n  = 0b1011_1110_1001_0011u32;
+        let n = 0b1011_1110_1001_0011u32;

        let m0 = 0b0110_0011_1000_0101u32;
        let s0 = 0b0000_0000_0011_0101u32;
@ -133,7 +137,7 @@ mod tests {
    #[simd_test = "bmi2"]
    #[cfg(not(target_arch = "x86"))]
    unsafe fn _pext_u64() {
-        let n  = 0b1011_1110_1001_0011u64;
+        let n = 0b1011_1110_1001_0011u64;

        let m0 = 0b0110_0011_1000_0101u64;
        let s0 = 0b0000_0000_0011_0101u64;
@ -147,7 +151,7 @@ mod tests {

    #[simd_test = "bmi2"]
    unsafe fn _pdep_u32() {
-        let n  = 0b1011_1110_1001_0011u32;
+        let n = 0b1011_1110_1001_0011u32;

        let m0 = 0b0110_0011_1000_0101u32;
        let s0 = 0b0000_0010_0000_0101u32;
@ -162,7 +166,7 @@ mod tests {
    #[simd_test = "bmi2"]
    #[cfg(not(target_arch = "x86"))]
    unsafe fn _pdep_u64() {
-        let n  = 0b1011_1110_1001_0011u64;
+        let n = 0b1011_1110_1001_0011u64;

        let m0 = 0b0110_0011_1000_0101u64;
        let s0 = 0b0000_0010_0000_0101u64;
@ -194,23 +198,31 @@ mod tests {
        let a: u32 = 4_294_967_200;
        let b: u32 = 2;
        let (lo, hi): (u32, u32) = bmi2::_mulx_u32(a, b);
-        // result = 8589934400
-        //        = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64
-        //            ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        /*
+result = 8589934400
+       = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64
+           ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        */
        assert_eq!(lo, 0b1111_1111_1111_1111_1111_1111_0100_0000u32);
        assert_eq!(hi, 0b0001u32);
    }

    #[simd_test = "bmi2"]
    #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
    unsafe fn _mulx_u64() {
        let a: u64 = 9_223_372_036_854_775_800;
        let b: u64 = 100;
        let (lo, hi): (u64, u64) = bmi2::_mulx_u64(a, b);
-        // result = 922337203685477580000
-        //        = 0b00110001_11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u128
-        //            ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-        assert_eq!(lo, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64);
+        /*
+result = 922337203685477580000 =
+0b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000
+  ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        */
+        assert_eq!(
+            lo,
+            0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64
+        );
        assert_eq!(hi, 0b00110001u64);
    }
 }
--- a/library/stdarch/src/x86/macros.rs
+++ b/library/stdarch/src/x86/macros.rs
@ -348,5 +348,3 @@ macro_rules! assert_approx_eq {
                 *a, *b, $eps, (*a - *b).abs());
    })
 }
-
-
--- a/library/stdarch/src/x86/mod.rs
+++ b/library/stdarch/src/x86/mod.rs
@ -12,7 +12,7 @@ pub use self::bmi::*;
 pub use self::bmi2::*;
 pub use self::tbm::*;

-pub use self::runtime::{__Feature, __unstable_detect_feature};
+pub use self::runtime::{__unstable_detect_feature, __Feature};

 #[allow(non_camel_case_types)]
 pub type __m128i = ::v128::i8x16;
--- a/library/stdarch/src/x86/runtime.rs
+++ b/library/stdarch/src/x86/runtime.rs
@ -1,9 +1,9 @@
 //! This module implements minimal run-time feature detection for x86.
 //!
-//! The features are detected using the `detect_features` function below. This function
-//! uses the CPUID instruction to read the feature flags from the CPU and encodes them in
-//! an `usize` where each bit position represents whether a feature is available (bit is set)
-//! or unavaiable (bit is cleared).
+//! The features are detected using the `detect_features` function below.
+//! This function uses the CPUID instruction to read the feature flags from the
+//! CPU and encodes them in an `usize` where each bit position represents
+//! whether a feature is available (bit is set) or unavaiable (bit is cleared).
 //!
 //! The enum `__Feature` is used to map bit positions to feature names, and the
 //! the `__unstable_detect_feature!` macro is used to map string literals (e.g.
@ -12,10 +12,10 @@
 //!
 //! The run-time feature detection is performed by the
 //! `__unstable_detect_feature(__Feature) -> bool` function. On its first call,
-//! this functions queries the CPU for the available features and stores them in
-//! a global `AtomicUsize` variable. The query is performed by just checking whether the
-//! feature bit in this global variable is set or cleared.
-use ::std::sync::atomic::{AtomicUsize, Ordering};
+//! this functions queries the CPU for the available features and stores them
+//! in a global `AtomicUsize` variable. The query is performed by just checking
+//! whether the feature bit in this global variable is set or cleared.
+use std::sync::atomic::{AtomicUsize, Ordering};

 /// This macro maps the string-literal feature names to values of the
 /// `__Feature` enum at compile-time. The feature names used are the same as
@ -26,22 +26,68 @@ use ::std::sync::atomic::{AtomicUsize, Ordering};
 #[macro_export]
 #[doc(hidden)]
 macro_rules! __unstable_detect_feature {
-    ("sse") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse{}) };
-    ("sse2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse2{}) };
-    ("sse3") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse3{}) };
-    ("ssse3") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::ssse3{}) };
-    ("sse4.1") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse4_1{}) };
-    ("sse4.2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse4_2{}) };
-    ("avx") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::avx{}) };
-    ("avx2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::avx2{}) };
-    ("fma") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::fma{}) };
-    ("bmi") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::bmi{}) };
-    ("bmi2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::bmi2{}) };
-    ("abm") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::abm{}) };
-    ("lzcnt") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::abm{}) };
-    ("tbm") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::tbm{}) };
-    ("popcnt") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::popcnt{}) };
-    ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
+    ("sse") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse{})  };
+    ("sse2") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse2{})
+    };
+    ("sse3") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse3{})
+    };
+    ("ssse3") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::ssse3{})
+    };
+    ("sse4.1") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse4_1{})
+    };
+    ("sse4.2") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::sse4_2{})
+    };
+    ("avx") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::avx{})
+    };
+    ("avx2") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::avx2{})
+    };
+    ("fma") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::fma{})
+    };
+    ("bmi") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::bmi{})
+    };
+    ("bmi2") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::bmi2{})
+    };
+    ("abm") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::abm{})
+    };
+    ("lzcnt") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::abm{})
+    };
+    ("tbm") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::tbm{})
+    };
+    ("popcnt") => {
+        $crate::vendor::__unstable_detect_feature(
+            $crate::vendor::__Feature::popcnt{})
+    };
+    ($t:tt) => {
+        compile_error!(concat!("unknown target feature: ", $t))
+    };
 }

 /// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
@ -74,15 +120,15 @@ pub enum __Feature {
    bmi,
    /// BMI1 (Bit Manipulation Instructions 2)
    bmi2,
-    /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero Count) on Intel
+    /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
+    /// Count) on Intel
    abm,
    /// TBM (Trailing Bit Manipulation)
    tbm,
    /// POPCNT (Population Count)
    popcnt,

-    #[doc(hidden)]
-    __NonExhaustive
+    #[doc(hidden)] __NonExhaustive,
 }

 fn set_bit(x: usize, bit: u32) -> usize {
@ -102,14 +148,19 @@ fn inv_test_bit(v: usize, idx: u32) -> bool {

 /// Run-time feature detection on x86 works by using the CPUID instruction.
 ///
-/// The [CPUID Wikipedia page](https://en.wikipedia.org/wiki/CPUID) contains all
-/// the information about which flags to set to query which values, and in which
-/// registers these are reported.
+/// The [CPUID Wikipedia page][wiki_cpuid] contains
+/// all the information about which flags to set to query which values, and in
+/// which registers these are reported.
 ///
 /// The definitive references are:
-/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
-/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
+/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+///   Instruction Set Reference, A-Z][intel64_ref].
+/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+///   System Instructions][amd64_ref].
 ///
+/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
+/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
 fn detect_features() -> usize {
    let ebx;
    let ecx;
@ -119,14 +170,16 @@ fn detect_features() -> usize {
        /// To obtain all feature flags we need two CPUID queries:

        /// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
-        /// This gives us most of the CPU features in ECX and EDX (see below),
+        /// This gives us most of the CPU features in ECX and EDX (see
+        /// below),
        asm!("cpuid"
             : "={ecx}"(ecx), "={edx}"(edx)
             : "{eax}"(0x00000001u32), "{ecx}"(0 as u32)
             : :);

        /// 2. EAX=7, ECX=0: Queries "Extended Features"
-        /// This gives us information about bmi,bmi2, and avx2 support (see below).
+        /// This gives us information about bmi,bmi2, and avx2 support
+        /// (see below).
        asm!("cpuid"
             : "={ebx}"(ebx)
             : "{eax}"(0x00000007u32), "{ecx}"(0 as u32)
@ -135,36 +188,65 @@ fn detect_features() -> usize {

    let mut value: usize = 0;

-    // CPUID call with EAX=7, ECX=0 => Extended Features in EBX and ECX (unneeded):
-    if inv_test_bit(ebx, 3) { value = set_bit(value, __Feature::bmi as u32); }
-    if inv_test_bit(ebx, 5) { value = set_bit(value, __Feature::avx2 as u32); }
-    if inv_test_bit(ebx, 8) { value = set_bit(value, __Feature::bmi2 as u32); }
+    // CPUID call with EAX=7, ECX=0 => Extended Features in EBX and ECX
+    // (the result in ECX is not currently needed):
+    if inv_test_bit(ebx, 3) {
+        value = set_bit(value, __Feature::bmi as u32);
+    }
+    if inv_test_bit(ebx, 5) {
+        value = set_bit(value, __Feature::avx2 as u32);
+    }
+    if inv_test_bit(ebx, 8) {
+        value = set_bit(value, __Feature::bmi2 as u32);
+    }

    // CPUID call with EAX=1 => feature bits in ECX and EDX:
-    if inv_test_bit(ecx, 0) { value = set_bit(value, __Feature::sse3 as u32); }
-    if inv_test_bit(ecx, 5) { value = set_bit(value, __Feature::abm as u32); }
-    if inv_test_bit(ecx, 9) { value = set_bit(value, __Feature::ssse3 as u32); }
-    if inv_test_bit(ecx, 12) { value = set_bit(value, __Feature::fma as u32); }
-    if inv_test_bit(ecx, 19) { value = set_bit(value, __Feature::sse4_1 as u32); }
-    if inv_test_bit(ecx, 20) { value = set_bit(value, __Feature::sse4_2 as u32); }
-    if inv_test_bit(ecx, 21) { value = set_bit(value, __Feature::tbm as u32); }
-    if inv_test_bit(ecx, 23) { value = set_bit(value, __Feature::popcnt as u32); }
-    if inv_test_bit(ecx, 28) { value = set_bit(value, __Feature::avx as u32); }
+    if inv_test_bit(ecx, 0) {
+        value = set_bit(value, __Feature::sse3 as u32);
+    }
+    if inv_test_bit(ecx, 5) {
+        value = set_bit(value, __Feature::abm as u32);
+    }
+    if inv_test_bit(ecx, 9) {
+        value = set_bit(value, __Feature::ssse3 as u32);
+    }
+    if inv_test_bit(ecx, 12) {
+        value = set_bit(value, __Feature::fma as u32);
+    }
+    if inv_test_bit(ecx, 19) {
+        value = set_bit(value, __Feature::sse4_1 as u32);
+    }
+    if inv_test_bit(ecx, 20) {
+        value = set_bit(value, __Feature::sse4_2 as u32);
+    }
+    if inv_test_bit(ecx, 21) {
+        value = set_bit(value, __Feature::tbm as u32);
+    }
+    if inv_test_bit(ecx, 23) {
+        value = set_bit(value, __Feature::popcnt as u32);
+    }
+    if inv_test_bit(ecx, 28) {
+        value = set_bit(value, __Feature::avx as u32);
+    }

-    if inv_test_bit(edx, 25) { value = set_bit(value, __Feature::sse as u32); }
-    if inv_test_bit(edx, 26) { value = set_bit(value, __Feature::sse2 as u32); }
+    if inv_test_bit(edx, 25) {
+        value = set_bit(value, __Feature::sse as u32);
+    }
+    if inv_test_bit(edx, 26) {
+        value = set_bit(value, __Feature::sse2 as u32);
+    }

    value
 }

-/// This global variable is a bitset used to cache the features supported by the
-/// CPU.
+/// This global variable is a bitset used to cache the features supported by
+/// the CPU.
 static FEATURES: AtomicUsize = AtomicUsize::new(::std::usize::MAX);

 /// Performs run-time feature detection.
 ///
-/// On its first invocation, it detects the CPU features and caches them in the
-/// `FEATURES` global variable as an `AtomicUsize`.
+/// On its first invocation, it detects the CPU features and caches them
+/// in the `FEATURES` global variable as an `AtomicUsize`.
 ///
 /// It uses the `__Feature` variant to index into this variable as a bitset. If
 /// the bit is set, the feature is enabled, and otherwise it is disabled.
@ -172,7 +254,7 @@ static FEATURES: AtomicUsize = AtomicUsize::new(::std::usize::MAX);
 /// PLEASE: do not use this, it is an implementation detail subject to change.
 #[doc(hidden)]
 pub fn __unstable_detect_feature(x: __Feature) -> bool {
-    if FEATURES.load(Ordering::Relaxed)  == ::std::usize::MAX {
+    if FEATURES.load(Ordering::Relaxed) == ::std::usize::MAX {
        FEATURES.store(detect_features(), Ordering::Relaxed);
    }
    test_bit(FEATURES.load(Ordering::Relaxed), x as u32)
--- a/library/stdarch/src/x86/sse.rs
+++ b/library/stdarch/src/x86/sse.rs
--- a/library/stdarch/src/x86/sse2.rs
+++ b/library/stdarch/src/x86/sse2.rs
@ -5,9 +5,8 @@ use std::mem;
 use std::os::raw::c_void;
 use std::ptr;

-use simd_llvm::{
-    simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16,
-};
+use simd_llvm::{simd_cast, simd_shuffle16, simd_shuffle2, simd_shuffle4,
+                simd_shuffle8};
 use x86::__m128i;
 use v128::*;
 use v64::*;
@ -317,7 +316,9 @@ pub unsafe fn _mm_subs_epu16(a: u16x8, b: u16x8) -> u16x8 {
 #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
 pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i {
    let (zero, imm8) = (__m128i::splat(0), imm8 as u32);
-    const fn sub(a: u32, b: u32) -> u32 { a - b }
+    const fn sub(a: u32, b: u32) -> u32 {
+        a - b
+    }
    macro_rules! shuffle {
        ($shift:expr) => {
            simd_shuffle16::<__m128i, __m128i>(zero, a, [
@ -333,14 +334,22 @@ pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i {
        }
    }
    match imm8 {
-        0 => shuffle!(0), 1 => shuffle!(1),
-        2 => shuffle!(2), 3 => shuffle!(3),
-        4 => shuffle!(4), 5 => shuffle!(5),
-        6 => shuffle!(6), 7 => shuffle!(7),
-        8 => shuffle!(8), 9 => shuffle!(9),
-        10 => shuffle!(10), 11 => shuffle!(11),
-        12 => shuffle!(12), 13 => shuffle!(13),
-        14 => shuffle!(14), 15 => shuffle!(15),
+        0 => shuffle!(0),
+        1 => shuffle!(1),
+        2 => shuffle!(2),
+        3 => shuffle!(3),
+        4 => shuffle!(4),
+        5 => shuffle!(5),
+        6 => shuffle!(6),
+        7 => shuffle!(7),
+        8 => shuffle!(8),
+        9 => shuffle!(9),
+        10 => shuffle!(10),
+        11 => shuffle!(11),
+        12 => shuffle!(12),
+        13 => shuffle!(13),
+        14 => shuffle!(14),
+        15 => shuffle!(15),
        _ => shuffle!(16),
    }
 }
@ -365,7 +374,7 @@ pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(psllw))]
-pub unsafe fn _mm_slli_epi16(a: i16x8, imm8: i32) -> i16x8  {
+pub unsafe fn _mm_slli_epi16(a: i16x8, imm8: i32) -> i16x8 {
    pslliw(a, imm8)
 }

@ -454,7 +463,9 @@ pub unsafe fn _mm_sra_epi32(a: i32x4, count: i32x4) -> i32x4 {
 #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
 pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
    let (zero, imm8) = (__m128i::splat(0), imm8 as u32);
-    const fn add(a: u32, b: u32) -> u32 { a + b }
+    const fn add(a: u32, b: u32) -> u32 {
+        a + b
+    }
    macro_rules! shuffle {
        ($shift:expr) => {
            simd_shuffle16::<__m128i, __m128i>(a, zero, [
@ -470,14 +481,22 @@ pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
        }
    }
    match imm8 {
-        0 => shuffle!(0), 1 => shuffle!(1),
-        2 => shuffle!(2), 3 => shuffle!(3),
-        4 => shuffle!(4), 5 => shuffle!(5),
-        6 => shuffle!(6), 7 => shuffle!(7),
-        8 => shuffle!(8), 9 => shuffle!(9),
-        10 => shuffle!(10), 11 => shuffle!(11),
-        12 => shuffle!(12), 13 => shuffle!(13),
-        14 => shuffle!(14), 15 => shuffle!(15),
+        0 => shuffle!(0),
+        1 => shuffle!(1),
+        2 => shuffle!(2),
+        3 => shuffle!(3),
+        4 => shuffle!(4),
+        5 => shuffle!(5),
+        6 => shuffle!(6),
+        7 => shuffle!(7),
+        8 => shuffle!(8),
+        9 => shuffle!(9),
+        10 => shuffle!(10),
+        11 => shuffle!(11),
+        12 => shuffle!(12),
+        13 => shuffle!(13),
+        14 => shuffle!(14),
+        15 => shuffle!(15),
        _ => shuffle!(16),
    }
 }
@ -487,7 +506,7 @@ pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(psrlw))]
-pub unsafe fn _mm_srli_epi16(a: i16x8, imm8: i32) -> i16x8  {
+pub unsafe fn _mm_srli_epi16(a: i16x8, imm8: i32) -> i16x8 {
    psrliw(a, imm8)
 }

@ -649,7 +668,7 @@ pub unsafe fn _mm_cmplt_epi32(a: i32x4, b: i32x4) -> i32x4 {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtdq2pd))]
-pub unsafe fn _mm_cvtepi32_pd(a: i32x4) -> f64x2  {
+pub unsafe fn _mm_cvtepi32_pd(a: i32x4) -> f64x2 {
    simd_cast::<i32x2, f64x2>(simd_shuffle2(a, a, [0, 1]))
 }

@ -777,7 +796,7 @@ pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 {
 #[target_feature = "+sse2"]
 // no particular instruction to test
 pub unsafe fn _mm_set_epi16(
-    e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
+    e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
 ) -> i16x8 {
    i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)
 }
@ -790,6 +809,7 @@ pub unsafe fn _mm_set_epi8(
    e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8,
    e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8,
 ) -> i8x16 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
    i8x16::new(
        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
    )
@ -840,7 +860,7 @@ pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 {
 #[target_feature = "+sse2"]
 // no particular instruction to test
 pub unsafe fn _mm_setr_epi16(
-    e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
+    e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
 ) -> i16x8 {
    i16x8::new(e7, e6, e5, e4, e3, e2, e1, e0)
 }
@ -853,6 +873,7 @@ pub unsafe fn _mm_setr_epi8(
    e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8,
    e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8,
 ) -> i8x16 {
+    #[cfg_attr(rustfmt, rustfmt_skip)]
    i8x16::new(
        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
    )
@ -895,7 +916,8 @@ pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
    ptr::copy_nonoverlapping(
        mem_addr as *const u8,
        &mut dst as *mut __m128i as *mut u8,
-        mem::size_of::<__m128i>());
+        mem::size_of::<__m128i>(),
+    );
    dst
 }

@ -934,7 +956,8 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
    ptr::copy_nonoverlapping(
        &a as *const _ as *const u8,
        mem_addr as *mut u8,
-        mem::size_of::<__m128i>());
+        mem::size_of::<__m128i>(),
+    );
 }

 /// Store the lower 64-bit integer `a` to a memory location.
@ -945,7 +968,10 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
 // no particular instruction to test
 pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
    ptr::copy_nonoverlapping(
-        &a as *const _ as *const u8, mem_addr as *mut u8, 8);
+        &a as *const _ as *const u8,
+        mem_addr as *mut u8,
+        8,
+    );
 }

 /// Return a vector where the low element is extracted from `a` and its upper
@ -1076,7 +1102,9 @@ pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 {
 pub unsafe fn _mm_shufflehi_epi16(a: i16x8, imm8: i32) -> i16x8 {
    // See _mm_shuffle_epi32.
    let imm8 = (imm8 & 0xFF) as u8;
-    const fn add4(x: u32) -> u32 { x + 4 }
+    const fn add4(x: u32) -> u32 {
+        x + 4
+    }

    macro_rules! shuffle_done {
        ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
@ -1183,10 +1211,11 @@ pub unsafe fn _mm_shufflelo_epi16(a: i16x8, imm8: i32) -> i16x8 {
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(punpckhbw))]
 pub unsafe fn _mm_unpackhi_epi8(a: i8x16, b: i8x16) -> i8x16 {
-    simd_shuffle16(a, b, [
-        8, 24, 9, 25, 10, 26, 11, 27,
-        12, 28, 13, 29, 14, 30, 15, 31,
-    ])
+    simd_shuffle16(
+        a,
+        b,
+        [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
+    )
 }

 /// Unpack and interleave 16-bit integers from the high half of `a` and `b`.
@ -1218,10 +1247,11 @@ pub unsafe fn _mm_unpackhi_epi64(a: i64x2, b: i64x2) -> i64x2 {
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(punpcklbw))]
 pub unsafe fn _mm_unpacklo_epi8(a: i8x16, b: i8x16) -> i8x16 {
-    simd_shuffle16(a, b, [
-        0, 16, 1, 17, 2, 18, 3, 19,
-        4, 20, 5, 21, 6, 22, 7, 23,
-    ])
+    simd_shuffle16(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
+    )
 }

 /// Unpack and interleave 16-bit integers from the low half of `a` and `b`.
@ -1718,7 +1748,8 @@ pub unsafe fn _mm_ucomineq_sd(a: f64x2, b: f64x2) -> bool {
    mem::transmute(ucomineqsd(a, b) as u8)
 }

-/// Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements
+/// Convert packed double-precision (64-bit) floating-point elements in "a" to
+/// packed single-precision (32-bit) floating-point elements
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtpd2ps))]
@ -1726,8 +1757,8 @@ pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 {
    cvtpd2ps(a)
 }

-
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to packed
+/// Convert packed single-precision (32-bit) floating-point elements in `a` to
+/// packed
 /// double-precision (64-bit) floating-point elements.
 #[inline(always)]
 #[target_feature = "+sse2"]
@ -1736,7 +1767,8 @@ pub unsafe fn _mm_cvtps_pd(a: f32x4) -> f64x2 {
    cvtps2pd(a)
 }

-/// Convert packed double-precision (64-bit) floating-point elements in `a` to packed 32-bit integers.
+/// Convert packed double-precision (64-bit) floating-point elements in `a` to
+/// packed 32-bit integers.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtpd2dq))]
@ -1744,7 +1776,8 @@ pub unsafe fn _mm_cvtpd_epi32(a: f64x2) -> i32x4 {
    cvtpd2dq(a)
 }

-/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer.
+/// Convert the lower double-precision (64-bit) floating-point element in a to
+/// a 32-bit integer.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtsd2si))]
@ -1752,7 +1785,8 @@ pub unsafe fn _mm_cvtsd_si32(a: f64x2) -> i32 {
    cvtsd2si(a)
 }

-/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer.
+/// Convert the lower double-precision (64-bit) floating-point element in a to
+/// a 64-bit integer.
 #[cfg(target_arch = "x86_64")]
 #[inline(always)]
 #[target_feature = "+sse2"]
@ -1761,9 +1795,10 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
    cvtsd2si64(a)
 }

-/// Convert the lower double-precision (64-bit) floating-point element in `b` to a
-/// single-precision (32-bit) floating-point element, store the result in the lower element
-/// of the return value, and copy the upper element from `a` to the upper element the return value.
+/// Convert the lower double-precision (64-bit) floating-point element in `b`
+/// to a single-precision (32-bit) floating-point element, store the result in
+/// the lower element of the return value, and copy the upper element from `a`
+/// to the upper element the return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtsd2ss))]
@ -1771,18 +1806,19 @@ pub unsafe fn _mm_cvtsd_ss(a: f32x4, b: f64x2) -> f32x4 {
    cvtsd2ss(a, b)
 }

-/// Convert the lower single-precision (32-bit) floating-point element in `b` to a
-/// double-precision (64-bit) floating-point element, store the result in the lower element
-/// of the return value, and copy the upper element from `a` to the upper element the return value.
+/// Convert the lower single-precision (32-bit) floating-point element in `b`
+/// to a double-precision (64-bit) floating-point element, store the result in
+/// the lower element of the return value, and copy the upper element from `a`
+/// to the upper element the return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtss2sd))]
-pub unsafe fn _mm_cvtss_sd(a: f64x2, b: f32x4 ) -> f64x2 {
+pub unsafe fn _mm_cvtss_sd(a: f64x2, b: f32x4) -> f64x2 {
    cvtss2sd(a, b)
 }

-/// Convert packed double-precision (64-bit) floating-point elements in `a` to packed
-/// 32-bit integers with truncation.
+/// Convert packed double-precision (64-bit) floating-point elements in `a` to
+/// packed 32-bit integers with truncation.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvttpd2dq))]
@ -1790,8 +1826,8 @@ pub unsafe fn _mm_cvttpd_epi32(a: f64x2) -> i32x4 {
    cvttpd2dq(a)
 }

-/// Convert the lower double-precision (64-bit) floating-point element in `a` to a 32-bit integer
-/// with truncation.
+/// Convert the lower double-precision (64-bit) floating-point element in `a`
+/// to a 32-bit integer with truncation.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvttsd2si))]
@ -1799,8 +1835,8 @@ pub unsafe fn _mm_cvttsd_si32(a: f64x2) -> i32 {
    cvttsd2si(a)
 }

-/// Convert the lower double-precision (64-bit) floating-point element in `a` to a 64-bit integer
-/// with truncation.
+/// Convert the lower double-precision (64-bit) floating-point element in `a`
+/// to a 64-bit integer with truncation.
 #[cfg(target_arch = "x86_64")]
 #[inline(always)]
 #[target_feature = "+sse2"]
@ -1809,8 +1845,8 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
    cvttsd2si64(a)
 }

-/// Convert packed single-precision (32-bit) floating-point elements in `a` to packed 32-bit
-/// integers with truncation
+/// Convert packed single-precision (32-bit) floating-point elements in `a` to
+/// packed 32-bit integers with truncation.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvttps2dq))]
@ -1818,45 +1854,48 @@ pub unsafe fn _mm_cvttps_epi32(a: f32x4) -> i32x4 {
    cvttps2dq(a)
 }

-/// Copy double-precision (64-bit) floating-point element `a` to the lower element of the
-/// packed 64-bit return value
+/// Copy double-precision (64-bit) floating-point element `a` to the lower
+/// element of the packed 64-bit return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_set_sd(a: f64) -> f64x2 {
    f64x2::new(a, 0_f64)
 }

-/// Broadcast double-precision (64-bit) floating-point value a to all elements of the return value
+/// Broadcast double-precision (64-bit) floating-point value a to all elements
+/// of the return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_set1_pd(a: f64) -> f64x2 {
    f64x2::new(a, a)
 }

-/// Broadcast double-precision (64-bit) floating-point value a to all elements of the return value
+/// Broadcast double-precision (64-bit) floating-point value a to all elements
+/// of the return value.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_set_pd1(a: f64) -> f64x2 {
    f64x2::new(a, a)
 }

-/// Set packed double-precision (64-bit) floating-point elements in the return value with the
-/// supplied values.
+/// Set packed double-precision (64-bit) floating-point elements in the return
+/// value with the supplied values.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_set_pd(a: f64, b: f64) -> f64x2 {
    f64x2::new(b, a)
 }

-/// Set packed double-precision (64-bit) floating-point elements in the return value with the
-/// supplied values in reverse order.
+/// Set packed double-precision (64-bit) floating-point elements in the return
+/// value with the supplied values in reverse order.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 {
    f64x2::new(a, b)
 }

-/// returns packed double-precision (64-bit) floating-point elements with all zeros.
+/// returns packed double-precision (64-bit) floating-point elements with all
+/// zeros.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_setzero_pd() -> f64x2 {
@ -1876,9 +1915,10 @@ pub unsafe fn _mm_movemask_pd(a: f64x2) -> i32 {



-/// Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements)
-/// from memory into the returned vector. mem_addr must be aligned on a 16-byte boundary or
-/// a general-protection exception may be generated.
+/// Load 128-bits (composed of 2 packed double-precision (64-bit)
+/// floating-point elements) from memory into the returned vector.
+/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
+/// exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movaps))]
@ -1886,9 +1926,9 @@ pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 {
    *(mem_addr as *const f64x2)
 }

-/// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from `a`
-/// into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
-/// may be generated.
+/// Store 128-bits (composed of 2 packed double-precision (64-bit)
+/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
+/// on a 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movaps))]
@ -1906,12 +1946,13 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: f64x2) {
    ptr::copy_nonoverlapping(
        &a as *const f64x2 as *const u8,
        mem_addr as *mut u8,
-        mem::size_of::<f64x2>());
+        mem::size_of::<f64x2>(),
+    );
 }

-/// Store the lower double-precision (64-bit) floating-point element from `a` into 2 contiguous
-/// elements in memory. `mem_addr` must be aligned on a 16-byte boundary or a general-protection
-/// exception may be generated.
+/// Store the lower double-precision (64-bit) floating-point element from `a`
+/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
+/// 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: f64x2) {
@ -1919,9 +1960,9 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: f64x2) {
    *(mem_addr as *mut f64x2) = b;
 }

-/// Store the lower double-precision (64-bit) floating-point element from `a` into 2 contiguous
-/// elements in memory. `mem_addr` must be aligned on a 16-byte boundary or a general-protection
-/// exception may be generated.
+/// Store the lower double-precision (64-bit) floating-point element from `a`
+/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
+/// 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: f64x2) {
@ -1929,8 +1970,10 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: f64x2) {
    *(mem_addr as *mut f64x2) = b;
 }

-/// Store 2 double-precision (64-bit) floating-point elements from `a` into memory in reverse order.
-/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+/// Store 2 double-precision (64-bit) floating-point elements from `a` into
+/// memory in reverse order.
+/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
+/// exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: f64x2) {
@ -1956,9 +1999,9 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> f64x2 {
    f64x2::new(d, d)
 }

-/// Load 2 double-precision (64-bit) floating-point elements from memory into the returned vector
-/// in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection
-/// exception may be generated.
+/// Load 2 double-precision (64-bit) floating-point elements from memory into
+/// the returned vector in reverse order. `mem_addr` must be aligned on a
+/// 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movapd))]
@ -1967,9 +2010,9 @@ pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> f64x2 {
    simd_shuffle2(a, a, [1, 0])
 }

-/// Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements)
-/// from memory into the returned vector. mem_addr does not need to be aligned on any particular
-/// oundary.
+/// Load 128-bits (composed of 2 packed double-precision (64-bit)
+/// floating-point elements) from memory into the returned vector.
+/// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movups))]
@ -1978,7 +2021,8 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> f64x2 {
    ptr::copy_nonoverlapping(
        mem_addr as *const u8,
        &mut dst as *mut f64x2 as *mut u8,
-        mem::size_of::<f64x2>());
+        mem::size_of::<f64x2>(),
+    );
    dst
 }

@ -1997,7 +2041,7 @@ pub unsafe fn _mm_undefined_si128() -> __m128i {
 }

 #[allow(improper_ctypes)]
-extern {
+extern "C" {
    #[link_name = "llvm.x86.sse2.pause"]
    fn pause();
    #[link_name = "llvm.x86.sse2.clflush"]
@ -2145,7 +2189,7 @@ extern {
    #[link_name = "llvm.x86.sse2.cvtsd2ss"]
    fn cvtsd2ss(a: f32x4, b: f64x2) -> f32x4;
    #[link_name = "llvm.x86.sse2.cvtss2sd"]
-    fn cvtss2sd(a: f64x2, b: f32x4 ) -> f64x2;
+    fn cvtss2sd(a: f64x2, b: f32x4) -> f64x2;
    #[link_name = "llvm.x86.sse2.cvttpd2dq"]
    fn cvttpd2dq(a: f64x2) -> i32x4;
    #[link_name = "llvm.x86.sse2.cvttsd2si"]
@ -2160,7 +2204,7 @@ extern {
 mod tests {
    use std::os::raw::c_void;
    use stdsimd_test::simd_test;
-    use test::black_box;  // Used to inhibit constant-folding.
+    use test::black_box; // Used to inhibit constant-folding.

    use v128::*;
    use x86::{__m128i, sse2};
@ -2188,13 +2232,17 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_add_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
        let r = sse2::_mm_add_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x16::new(
-            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46);
+            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
+        );
        assert_eq!(r, e);
    }

@ -2235,13 +2283,17 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_adds_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
        let r = sse2::_mm_adds_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x16::new(
-            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46);
+            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
+        );
        assert_eq!(r, e);
    }

@ -2288,13 +2340,17 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_adds_epu8() {
-        let a = u8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = u8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
        let r = sse2::_mm_adds_epu8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = u8x16::new(
-            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46);
+            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
+        );
        assert_eq!(r, e);
    }

@ -2410,12 +2466,11 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_sad_epu8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = u8x16::new(
-            255, 254, 253, 252, 1, 2, 3, 4,
-            155, 154, 153, 152, 1, 2, 3, 4);
-        let b = u8x16::new(
-            0, 0, 0, 0, 2, 1, 2, 1,
-            1, 1, 1, 1, 1, 2, 1, 2);
+            255, 254, 253, 252, 1, 2, 3, 4, 155, 154, 153, 152, 1, 2, 3, 4,
+        );
+        let b = u8x16::new(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
        let r = sse2::_mm_sad_epu8(a, b);
        let e = u64x2::new(1020, 614);
        assert_eq!(r, e);
@ -2527,44 +2582,58 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_slli_si128() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_slli_si128(a, 1);
-        let e = __m128i::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e =
+            __m128i::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        assert_eq!(r, e);

+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_slli_si128(a, 15);
-        let e = __m128i::new(
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+        let e = __m128i::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
        assert_eq!(r, e);

+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_slli_si128(a, 16);
        assert_eq!(r, __m128i::splat(0));

+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_slli_si128(a, -1);
        assert_eq!(r, __m128i::splat(0));

+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_slli_si128(a, -0x80000000);
        assert_eq!(r, __m128i::splat(0));
    }

    #[simd_test = "sse2"]
    unsafe fn _mm_slli_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i16x8::new(
-            0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0);
+            0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
+        );
        let r = sse2::_mm_slli_epi16(a, 4);
+
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i16x8::new(
-            0xFFF0 as u16 as i16,
-            0xFFF0 as u16 as i16, 0x0FF0, 0x00F0, 0, 0, 0, 0);
+            0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0,
+            0, 0, 0, 0,
+        );
        assert_eq!(r, e);
    }

@ -2635,44 +2704,58 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_srli_si128() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_srli_si128(a, 1);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = __m128i::new(
-            2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0);
+            2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
+        );
        assert_eq!(r, e);

+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_srli_si128(a, 15);
-        let e = __m128i::new(
-            16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e = __m128i::new(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
        assert_eq!(r, e);

+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_srli_si128(a, 16);
        assert_eq!(r, __m128i::splat(0));

+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_srli_si128(a, -1);
        assert_eq!(r, __m128i::splat(0));

+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = __m128i::new(
-            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+        );
        let r = sse2::_mm_srli_si128(a, -0x80000000);
        assert_eq!(r, __m128i::splat(0));
    }

    #[simd_test = "sse2"]
    unsafe fn _mm_srli_epi16() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let a = i16x8::new(
-            0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0);
+            0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
+        );
        let r = sse2::_mm_srli_epi16(a, 4);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i16x8::new(
-            0xFFF as u16 as i16,
-            0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0);
+            0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0,
+        );
        assert_eq!(r, e);
    }

@ -2747,13 +2830,18 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_cmpeq_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b = i8x16::new(
-            15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b =
+            i8x16::new(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
        let r = sse2::_mm_cmpeq_epi8(a, b);
-        assert_eq!(r, i8x16::new(
-            0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+        assert_eq!(
+            r,
+            #[cfg_attr(rustfmt, rustfmt_skip)]
+            i8x16::new(
+                0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            )
+        );
    }

    #[simd_test = "sse2"]
@ -2902,18 +2990,12 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_set_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let r = sse2::_mm_set_epi8(
-            0, 1, 2, 3,
-            4, 5, 6, 7,
-            8, 9, 10, 11,
-            12, 13, 14, 15,
-        );
-        let e = i8x16::new(
-            15, 14, 13, 12,
-            11, 10, 9, 8,
-            7, 6, 5, 4,
-            3, 2, 1, 0,
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
        );
+        let e =
+            i8x16::new(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
        assert_eq!(r, e);
    }

@ -2955,18 +3037,12 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_setr_epi8() {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let r = sse2::_mm_setr_epi8(
-            0, 1, 2, 3,
-            4, 5, 6, 7,
-            8, 9, 10, 11,
-            12, 13, 14, 15,
-        );
-        let e = i8x16::new(
-            0, 1, 2, 3,
-            4, 5, 6, 7,
-            8, 9, 10, 11,
-            12, 13, 14, 15,
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
        );
+        let e =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        assert_eq!(r, e);
    }

@ -3042,9 +3118,13 @@ mod tests {
        let a = i16x8::new(0x80, -0x81, 0, 0, 0, 0, 0, 0);
        let b = i16x8::new(0, 0, 0, 0, 0, 0, -0x81, 0x80);
        let r = sse2::_mm_packs_epi16(a, b);
-        assert_eq!(r, i8x16::new(
-            0x7F, -0x80, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, -0x80, 0x7F));
+        assert_eq!(
+            r,
+            #[cfg_attr(rustfmt, rustfmt_skip)]
+            i8x16::new(
+                0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
+            )
+        );
    }

    #[simd_test = "sse2"]
@ -3053,7 +3133,9 @@ mod tests {
        let b = i32x4::new(0, 0, -0x8001, 0x8000);
        let r = sse2::_mm_packs_epi32(a, b);
        assert_eq!(
-            r, i16x8::new(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF));
+            r,
+            i16x8::new(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF)
+        );
    }

    #[simd_test = "sse2"]
@ -3061,9 +3143,10 @@ mod tests {
        let a = i16x8::new(0x100, -1, 0, 0, 0, 0, 0, 0);
        let b = i16x8::new(0, 0, 0, 0, 0, 0, -1, 0x100);
        let r = sse2::_mm_packus_epi16(a, b);
-        assert_eq!(r, u8x16::new(
-            0xFF, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0xFF));
+        assert_eq!(
+            r,
+            u8x16::new(0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF)
+        );
    }

    #[simd_test = "sse2"]
@ -3082,9 +3165,9 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_movemask_epi8() {
-        let a = i8x16::from(u8x16::new(
+        let a = i8x16::from(#[cfg_attr(rustfmt, rustfmt_skip)] u8x16::new(
            0b1000_0000, 0b0, 0b1000_0000, 0b01, 0b0101, 0b1111_0000, 0, 0,
-            0, 0, 0b1111_0000, 0b0101, 0b01, 0b1000_0000, 0b0, 0b1000_0000));
+                0, 0, 0b1111_0000, 0b0101, 0b01, 0b1000_0000, 0b0, 0b1000_0000, ));
        let r = sse2::_mm_movemask_epi8(a);
        assert_eq!(r, 0b10100100_00100101);
    }
@ -3115,13 +3198,17 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_unpackhi_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
        let r = sse2::_mm_unpackhi_epi8(a, b);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x16::new(
-            8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);
+            8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
+        );
        assert_eq!(r, e);
    }

@ -3154,13 +3241,15 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_unpacklo_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
        let r = sse2::_mm_unpacklo_epi8(a, b);
-        let e = i8x16::new(
-            0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
+        let e =
+            i8x16::new(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
        assert_eq!(r, e);
    }

@ -3825,7 +3914,7 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_cvtpd_ps() {
-        use std::{f64,f32};
+        use std::{f32, f64};

        let r = sse2::_mm_cvtpd_ps(f64x2::new(-1.0, 5.0));
        assert_eq!(r, f32x4::new(-1.0, 5.0, 0.0, 0.0));
@ -3834,20 +3923,23 @@ mod tests {
        assert_eq!(r, f32x4::new(-1.0, -5.0, 0.0, 0.0));

        let r = sse2::_mm_cvtpd_ps(f64x2::new(f64::MAX, f64::MIN));
-        assert_eq!(r, f32x4::new(f32::INFINITY, f32::NEG_INFINITY, 0.0,0.0));
+        assert_eq!(r, f32x4::new(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));

-        let r = sse2::_mm_cvtpd_ps(f64x2::new(f32::MAX as f64, f32::MIN as f64));
-        assert_eq!(r, f32x4::new(f32::MAX, f32::MIN, 0.0,0.0));
+        let r =
+            sse2::_mm_cvtpd_ps(f64x2::new(f32::MAX as f64, f32::MIN as f64));
+        assert_eq!(r, f32x4::new(f32::MAX, f32::MIN, 0.0, 0.0));
    }

    #[simd_test = "sse2"]
    unsafe fn _mm_cvtps_pd() {
-        use std::{f64, f32};
+        use std::{f32, f64};

        let r = sse2::_mm_cvtps_pd(f32x4::new(-1.0, 2.0, -3.0, 5.0));
        assert_eq!(r, f64x2::new(-1.0, 2.0));

-        let r = sse2::_mm_cvtps_pd(f32x4::new(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN));
+        let r = sse2::_mm_cvtps_pd(
+            f32x4::new(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN),
+        );
        assert_eq!(r, f64x2::new(f32::MAX as f64, f64::INFINITY));
    }

@ -3864,7 +3956,9 @@ mod tests {
        let r = sse2::_mm_cvtpd_epi32(f64x2::new(f64::MAX, f64::MIN));
        assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, 0, 0));

-        let r = sse2::_mm_cvtpd_epi32(f64x2::new(f64::INFINITY, f64::NEG_INFINITY));
+        let r = sse2::_mm_cvtpd_epi32(
+            f64x2::new(f64::INFINITY, f64::NEG_INFINITY),
+        );
        assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, 0, 0));

        let r = sse2::_mm_cvtpd_epi32(f64x2::new(f64::NAN, f64::NAN));
@ -3902,7 +3996,7 @@ mod tests {

    #[simd_test = "sse2"]
    unsafe fn _mm_cvtsd_ss() {
-        use std::{f64, f32};
+        use std::{f32, f64};

        let a = f32x4::new(-1.1, -2.2, 3.3, 4.4);
        let b = f64x2::new(2.0, -5.0);
@ -3911,17 +4005,26 @@ mod tests {

        assert_eq!(r, f32x4::new(2.0, -2.2, 3.3, 4.4));

-        let a = f32x4::new(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
+        let a =
+            f32x4::new(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
        let b = f64x2::new(f64::INFINITY, -5.0);

        let r = sse2::_mm_cvtsd_ss(a, b);

-        assert_eq!(r, f32x4::new(f32::INFINITY, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY));
+        assert_eq!(
+            r,
+            f32x4::new(
+                f32::INFINITY,
+                f32::NEG_INFINITY,
+                f32::MAX,
+                f32::NEG_INFINITY
+            )
+        );
    }

    #[simd_test = "sse2"]
    unsafe fn _mm_cvtss_sd() {
-        use std::{f64, f32};
+        use std::{f32, f64};

        let a = f64x2::new(-1.1, 2.2);
        let b = f32x4::new(1.0, 2.0, 3.0, 4.0);
@ -3984,7 +4087,8 @@ mod tests {
        let r = sse2::_mm_cvttps_epi32(a);
        assert_eq!(r, i32x4::new(-1, 2, -3, 6));

-        let a = f32x4::new(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
+        let a =
+            f32x4::new(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
        let r = sse2::_mm_cvttps_epi32(a);
        assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
    }
--- a/library/stdarch/src/x86/sse3.rs
+++ b/library/stdarch/src/x86/sse3.rs
@ -106,7 +106,7 @@ pub unsafe fn _mm_moveldup_ps(a: f32x4) -> f32x4 {
 }

 #[allow(improper_ctypes)]
-extern {
+extern "C" {
    #[link_name = "llvm.x86.sse3.addsub.ps"]
    fn addsubps(a: f32x4, b: f32x4) -> f32x4;
    #[link_name = "llvm.x86.sse3.addsub.pd"]
@ -129,7 +129,7 @@ mod tests {
    use stdsimd_test::simd_test;

    use v128::*;
-    use x86::sse3 as sse3;
+    use x86::sse3;

    #[simd_test = "sse3"]
    unsafe fn _mm_addsub_ps() {
@ -181,7 +181,8 @@ mod tests {

    #[simd_test = "sse3"]
    unsafe fn _mm_lddqu_si128() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let a =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
        let r = sse3::_mm_lddqu_si128(&a);
        assert_eq!(a, r);
    }
@ -213,4 +214,4 @@ mod tests {
        let r = sse3::_mm_loaddup_pd(&d);
        assert_eq!(r, f64x2::new(d, d));
    }
-}
+}
--- a/library/stdarch/src/x86/sse41.rs
+++ b/library/stdarch/src/x86/sse41.rs
@ -15,7 +15,7 @@ pub unsafe fn _mm_blendv_epi8(a: i8x16, b: i8x16, mask: i8x16) -> i8x16 {

 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pblendw, imm8=0xF0))]
+#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
 pub unsafe fn _mm_blend_epi16(a: i16x8, b: i16x8, imm8: u8) -> i16x8 {
    macro_rules! call {
        ($imm8:expr) => { pblendw(a, b, $imm8) }
@ -23,7 +23,8 @@ pub unsafe fn _mm_blend_epi16(a: i16x8, b: i16x8, imm8: u8) -> i16x8 {
    constify_imm8!(imm8, call)
 }

-/// Blend packed double-precision (64-bit) floating-point elements from `a` and `b` using `mask`
+/// Blend packed double-precision (64-bit) floating-point elements from `a`
+/// and `b` using `mask`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 #[cfg_attr(test, assert_instr(blendvpd))]
@ -31,7 +32,8 @@ pub unsafe fn _mm_blendv_pd(a: f64x2, b: f64x2, mask: f64x2) -> f64x2 {
    blendvpd(a, b, mask)
 }

-/// Blend packed single-precision (32-bit) floating-point elements from `a` and `b` using `mask`
+/// Blend packed single-precision (32-bit) floating-point elements from `a`
+/// and `b` using `mask`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 #[cfg_attr(test, assert_instr(blendvps))]
@ -39,10 +41,11 @@ pub unsafe fn _mm_blendv_ps(a: f32x4, b: f32x4, mask: f32x4) -> f32x4 {
    blendvps(a, b, mask)
 }

-/// Blend packed double-precision (64-bit) floating-point elements from `a` and `b` using control mask `imm2`
+/// Blend packed double-precision (64-bit) floating-point elements from `a`
+/// and `b` using control mask `imm2`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(blendpd, imm2=0b10))]
+#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
 pub unsafe fn _mm_blend_pd(a: f64x2, b: f64x2, imm2: u8) -> f64x2 {
    macro_rules! call {
        ($imm2:expr) => { blendpd(a, b, $imm2) }
@ -50,10 +53,11 @@ pub unsafe fn _mm_blend_pd(a: f64x2, b: f64x2, imm2: u8) -> f64x2 {
    constify_imm2!(imm2, call)
 }

-/// Blend packed single-precision (32-bit) floating-point elements from `a` and `b` using mask `imm4`
+/// Blend packed single-precision (32-bit) floating-point elements from `a`
+/// and `b` using mask `imm4`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(blendps, imm4=0b0101))]
+#[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))]
 pub unsafe fn _mm_blend_ps(a: f32x4, b: f32x4, imm4: u8) -> f32x4 {
    macro_rules! call {
        ($imm4:expr) => { blendps(a, b, $imm4) }
@ -61,11 +65,12 @@ pub unsafe fn _mm_blend_ps(a: f32x4, b: f32x4, imm4: u8) -> f32x4 {
    constify_imm4!(imm4, call)
 }

-/// Extract a single-precision (32-bit) floating-point element from `a`, selected with `imm8`
+/// Extract a single-precision (32-bit) floating-point element from `a`,
+/// selected with `imm8`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 // TODO: Add test for Windows
-#[cfg_attr(all(test, not(windows)), assert_instr(extractps, imm8=0))]
+#[cfg_attr(all(test, not(windows)), assert_instr(extractps, imm8 = 0))]
 pub unsafe fn _mm_extract_ps(a: f32x4, imm8: u8) -> i32 {
    mem::transmute(a.extract(imm8 as u32 & 0b11))
 }
@ -73,7 +78,7 @@ pub unsafe fn _mm_extract_ps(a: f32x4, imm8: u8) -> i32 {
 /// Extract an 8-bit integer from `a` selected with `imm8`
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pextrb, imm8=0))]
+#[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
 pub unsafe fn _mm_extract_epi8(a: i8x16, imm8: u8) -> i8 {
    a.extract((imm8 & 0b1111) as u32)
 }
@ -82,7 +87,7 @@ pub unsafe fn _mm_extract_epi8(a: i8x16, imm8: u8) -> i8 {
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 // TODO: Add test for Windows
-#[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8=1))]
+#[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8 = 1))]
 pub unsafe fn _mm_extract_epi32(a: i32x4, imm8: u8) -> i32 {
    a.extract((imm8 & 0b11) as u32)
 }
@ -92,15 +97,16 @@ pub unsafe fn _mm_extract_epi32(a: i32x4, imm8: u8) -> i32 {
 #[inline(always)]
 #[target_feature = "+sse4.1"]
 // TODO: Add test for Windows
-#[cfg_attr(all(test, not(windows)), assert_instr(pextrq, imm8=1))]
+#[cfg_attr(all(test, not(windows)), assert_instr(pextrq, imm8 = 1))]
 pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: u8) -> i64 {
    a.extract((imm8 & 0b1) as u32)
 }

-/// Select a single value in `a` to store at some position in `b`, 
+/// Select a single value in `a` to store at some position in `b`,
 /// Then zero elements according to `imm8`.
-/// 
-/// `imm8` specifies which bits from operand `a` will be copied, which bits in the 
+///
+/// `imm8` specifies which bits from operand `a` will be copied, which bits in
+/// the
 /// result they will be copied to, and which bits in the result will be
 /// cleared. The following assignments are made:
 ///
@ -121,7 +127,7 @@ pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: u8) -> i64 {
 /// element is cleared.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(insertps, imm8=0b1010))]
+#[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))]
 pub unsafe fn _mm_insert_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 {
    macro_rules! call {
        ($imm8:expr) => { insertps(a, b, $imm8) }
@ -129,59 +135,66 @@ pub unsafe fn _mm_insert_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 {
    constify_imm8!(imm8, call)
 }

-/// Return a copy of `a` with the 8-bit integer from `i` inserted at a location specified by `imm8`. 
+/// Return a copy of `a` with the 8-bit integer from `i` inserted at a
+/// location specified by `imm8`.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pinsrb, imm8=0))]
+#[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
 pub unsafe fn _mm_insert_epi8(a: i8x16, i: i8, imm8: u8) -> i8x16 {
    a.replace((imm8 & 0b1111) as u32, i)
 }

-/// Return a copy of `a` with the 32-bit integer from `i` inserted at a location specified by `imm8`. 
+/// Return a copy of `a` with the 32-bit integer from `i` inserted at a
+/// location specified by `imm8`.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pinsrd, imm8=0))]
+#[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))]
 pub unsafe fn _mm_insert_epi32(a: i32x4, i: i32, imm8: u8) -> i32x4 {
    a.replace((imm8 & 0b11) as u32, i)
 }

-/// Return a copy of `a` with the 64-bit integer from `i` inserted at a location specified by `imm8`. 
+/// Return a copy of `a` with the 64-bit integer from `i` inserted at a
+/// location specified by `imm8`.
 #[cfg(target_arch = "x86_64")]
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pinsrq, imm8=0))]
+#[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))]
 pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: u8) -> i64x2 {
    a.replace((imm8 & 0b1) as u32, i)
 }

-/// Compare packed 8-bit integers in `a` and `b`,87 and return packed maximum values in dst. 
+/// Compare packed 8-bit integers in `a` and `b`,87 and return packed maximum
+/// values in dst.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pmaxsb, imm8=0))]
+#[cfg_attr(test, assert_instr(pmaxsb, imm8 = 0))]
 pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
    pmaxsb(a, b)
 }

-/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed maximum.
+/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
+/// maximum.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pmaxuw, imm8=0))]
+#[cfg_attr(test, assert_instr(pmaxuw, imm8 = 0))]
 pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
    pmaxuw(a, b)
 }

-// Compare packed 32-bit integers in `a` and `b`, and return packed maximum values.
+// Compare packed 32-bit integers in `a` and `b`, and return packed maximum
+// values.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pmaxsd, imm8=0))]
+#[cfg_attr(test, assert_instr(pmaxsd, imm8 = 0))]
 pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
    pmaxsd(a, b)
 }

-// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed maximum values.
+// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
+// maximum values.
 #[inline(always)]
 #[target_feature = "+sse4.1"]
-#[cfg_attr(test, assert_instr(pmaxud, imm8=0))]
+#[cfg_attr(test, assert_instr(pmaxud, imm8 = 0))]
 pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
    pmaxud(a, b)
 }
@ -221,7 +234,7 @@ pub unsafe fn _mm_dp_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 {
 }

 #[allow(improper_ctypes)]
-extern {
+extern "C" {
    #[link_name = "llvm.x86.sse41.pblendvb"]
    fn pblendvb(a: i8x16, b: i8x16, mask: i8x16) -> i8x16;
    #[link_name = "llvm.x86.sse41.blendvpd"]
@ -261,14 +274,18 @@ mod tests {

    #[simd_test = "sse4.1"]
    unsafe fn _mm_blendv_epi8() {
-        let a = i8x16::new(
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let b = i8x16::new(
-            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
-        let mask = i8x16::new(
-            0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1);
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
+        let mask =
+            i8x16::new(0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
        let e = i8x16::new(
-            0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31);
+            0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
+        );
        assert_eq!(sse41::_mm_blendv_epi8(a, b, mask), e);
    }

@ -286,7 +303,7 @@ mod tests {
    unsafe fn _mm_blendv_ps() {
        let a = f32x4::splat(0.0);
        let b = f32x4::splat(1.0);
-        let mask = mem::transmute(i32x4::new(0,-1, 0, -1));
+        let mask = mem::transmute(i32x4::new(0, -1, 0, -1));
        let r = sse41::_mm_blendv_ps(a, b, mask);
        let e = f32x4::new(0.0, 1.0, 0.0, 1.0);
        assert_eq!(r, e);
@ -330,7 +347,8 @@ mod tests {

    #[simd_test = "sse4.1"]
    unsafe fn _mm_extract_epi8() {
-        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let a =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        let r = sse41::_mm_extract_epi8(a, 1);
        assert_eq!(r, 1);
        let r = sse41::_mm_extract_epi8(a, 17);
@ -398,10 +416,22 @@ mod tests {

    #[simd_test = "sse4.1"]
    unsafe fn _mm_max_epi8() {
-        let a = i8x16::new(1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29, 32);
-        let b = i8x16::new(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = i8x16::new(
+            1, 4, 5, 8, 9, 12, 13, 16,
+            17, 20, 21, 24, 25, 28, 29, 32,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b = i8x16::new(
+            2, 3, 6, 7, 10, 11, 14, 15,
+            18, 19, 22, 23, 26, 27, 30, 31,
+        );
        let r = sse41::_mm_max_epi8(a, b);
-        let e = i8x16::new(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let e = i8x16::new(
+            2, 4, 6, 8, 10, 12, 14, 16,
+            18, 20, 22, 24, 26, 28, 30, 32,
+        );
        assert_eq!(r, e);
    }

--- a/library/stdarch/src/x86/sse42.rs
+++ b/library/stdarch/src/x86/sse42.rs
@ -15,7 +15,8 @@ pub const _SIDD_SWORD_OPS: i8 = 0b00000011;

 /// For each character in `a`, find if it is in `b` *(Default)*
 pub const _SIDD_CMP_EQUAL_ANY: i8 = 0b00000000;
-/// For each character in `a`, determine if `b[0] <= c <= b[1] or b[1] <= c <= b[2]...`
+/// For each character in `a`, determine if `b[0] <= c <= b[1] or b[1] <= c <=
+/// b[2]...`
 pub const _SIDD_CMP_RANGES: i8 = 0b00000100;
 /// The strings defined by `a` and `b` are equal
 pub const _SIDD_CMP_EQUAL_EACH: i8 = 0b00001000;
@ -46,11 +47,7 @@ pub const _SIDD_UNIT_MASK: i8 = 0b01000000;
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))]
-pub unsafe fn _mm_cmpistrm(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> u8x16 {
+pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i8) -> u8x16 {
    macro_rules! call {
        ($imm8:expr) => { pcmpistrm128(a, b, $imm8) }
    }
@ -58,9 +55,9 @@ pub unsafe fn _mm_cmpistrm(
 }

 /// Compare packed strings with implicit lengths in `a` and `b` using the
-/// control in `imm8`, and return the generated index. Similar to [`_mm_cmpestri`]
-/// with the excception that [`_mm_cmpestri`] requires the lengths of `a` and
-/// `b` to be explicitly specified.
+/// control in `imm8`, and return the generated index. Similar to
+/// [`_mm_cmpestri`] with the excception that [`_mm_cmpestri`] requires the
+/// lengths of `a` and `b` to be explicitly specified.
 ///
 /// # Control modes
 ///
@ -105,7 +102,8 @@ pub unsafe fn _mm_cmpistrm(
 /// use stdsimd::simd::u8x16;
 /// use stdsimd::vendor::{__m128i, _mm_cmpistri, _SIDD_CMP_EQUAL_ORDERED};
 ///
-/// let haystack = b"This is a long string of text data\r\n\tthat extends multiple lines";
+/// let haystack = b"This is a long string of text data\r\n\tthat extends
+/// multiple lines";
 /// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0";
 ///
 /// let a = __m128i::from(u8x16::load(needle, 0));
@ -171,8 +169,8 @@ pub unsafe fn _mm_cmpistrm(
 /// # }
 /// ```
 ///
-/// Find the index of the first character in the haystack that is within a range
-/// of characters.
+/// Find the index of the first character in the haystack that is within a
+/// range of characters.
 ///
 /// ```
 /// # #![feature(cfg_target_feature)]
@ -269,11 +267,7 @@ pub unsafe fn _mm_cmpistrm(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistri(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i8) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpistri128(a, b, $imm8) }
    }
@ -286,11 +280,7 @@ pub unsafe fn _mm_cmpistri(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistrz(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i8) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpistriz128(a, b, $imm8) }
    }
@ -303,11 +293,7 @@ pub unsafe fn _mm_cmpistrz(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistrc(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i8) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpistric128(a, b, $imm8) }
    }
@ -320,11 +306,7 @@ pub unsafe fn _mm_cmpistrc(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistrs(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i8) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpistris128(a, b, $imm8) }
    }
@ -336,11 +318,7 @@ pub unsafe fn _mm_cmpistrs(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistro(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i8) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpistrio128(a, b, $imm8) }
    }
@ -353,11 +331,7 @@ pub unsafe fn _mm_cmpistro(
 #[inline(always)]
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
-pub unsafe fn _mm_cmpistra(
-    a: __m128i,
-    b: __m128i,
-    imm8: i8,
-) -> i32 {
+pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i8) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpistria128(a, b, $imm8) }
    }
@ -370,11 +344,7 @@ pub unsafe fn _mm_cmpistra(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))]
 pub unsafe fn _mm_cmpestrm(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> u8x16 {
    macro_rules! call {
        ($imm8:expr) => { pcmpestrm128(a, la, b, lb, $imm8) }
@ -383,9 +353,9 @@ pub unsafe fn _mm_cmpestrm(
 }

 /// Compare packed strings `a` and `b` with lengths `la` and `lb` using the
-/// control in `imm8`, and return the generated index. Similar to [`_mm_cmpistri`]
-/// with the excception that [`_mm_cmpistri`] implicityly determines the length of
-/// `a` and `b`.
+/// control in `imm8`, and return the generated index. Similar to
+/// [`_mm_cmpistri`] with the excception that [`_mm_cmpistri`] implicityly
+/// determines the length of `a` and `b`.
 ///
 /// # Control modes
 ///
@ -468,11 +438,7 @@ pub unsafe fn _mm_cmpestrm(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestri(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpestri128(a, la, b, lb, $imm8) }
@ -487,11 +453,7 @@ pub unsafe fn _mm_cmpestri(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestrz(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpestriz128(a, la, b, lb, $imm8) }
@ -506,11 +468,7 @@ pub unsafe fn _mm_cmpestrz(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestrc(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpestric128(a, la, b, lb, $imm8) }
@ -525,11 +483,7 @@ pub unsafe fn _mm_cmpestrc(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestrs(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpestris128(a, la, b, lb, $imm8) }
@ -544,11 +498,7 @@ pub unsafe fn _mm_cmpestrs(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestro(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpestrio128(a, la, b, lb, $imm8) }
@ -564,11 +514,7 @@ pub unsafe fn _mm_cmpestro(
 #[target_feature = "+sse4.2"]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestra(
-    a: __m128i,
-    la: i32,
-    b: __m128i,
-    lb: i32,
-    imm8: i8,
+    a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
 ) -> i32 {
    macro_rules! call {
        ($imm8:expr) => { pcmpestria128(a, la, b, lb, $imm8) }
@ -624,22 +570,35 @@ pub unsafe fn _mm_cmpgt_epi64(a: i64x2, b: i64x2) -> i64x2 {
 }

 #[allow(improper_ctypes)]
-extern {
+extern "C" {
    // SSE 4.2 string and text comparison ops
    #[link_name = "llvm.x86.sse42.pcmpestrm128"]
-    fn pcmpestrm128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> u8x16;
+    fn pcmpestrm128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> u8x16;
    #[link_name = "llvm.x86.sse42.pcmpestri128"]
-    fn pcmpestri128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestri128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8)
+        -> i32;
    #[link_name = "llvm.x86.sse42.pcmpestriz128"]
-    fn pcmpestriz128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestriz128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
    #[link_name = "llvm.x86.sse42.pcmpestric128"]
-    fn pcmpestric128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestric128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
    #[link_name = "llvm.x86.sse42.pcmpestris128"]
-    fn pcmpestris128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestris128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
    #[link_name = "llvm.x86.sse42.pcmpestrio128"]
-    fn pcmpestrio128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestrio128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
    #[link_name = "llvm.x86.sse42.pcmpestria128"]
-    fn pcmpestria128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> i32;
+    fn pcmpestria128(
+        a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8
+    ) -> i32;
    #[link_name = "llvm.x86.sse42.pcmpistrm128"]
    fn pcmpistrm128(a: __m128i, b: __m128i, imm8: i8) -> u8x16;
    #[link_name = "llvm.x86.sse42.pcmpistri128"]
@ -685,7 +644,8 @@ mod tests {
        ptr::copy_nonoverlapping(
            s.get_unchecked(0) as *const u8 as *const u8,
            slice.get_unchecked_mut(0) as *mut u8 as *mut u8,
-            s.len());
+            s.len(),
+        );
        __m128i::from(u8x16::load(slice, 0))
    }

@ -694,8 +654,11 @@ mod tests {
        let a = str_to_m128i(b"Hello! Good-Bye!");
        let b = str_to_m128i(b"hello! good-bye!");
        let i = sse42::_mm_cmpistrm(a, b, sse42::_SIDD_UNIT_MASK);
-        let res = u8x16::new(0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
-                             0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let res = u8x16::new(
+            0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+            0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff,
+        );
        assert_eq!(i, res);
    }

@ -733,14 +696,23 @@ mod tests {

    #[simd_test = "sse4.2"]
    unsafe fn _mm_cmpistro() {
-        let a_bytes = u8x16::new(0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
-                                 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let b_bytes = u8x16::new(0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
-                                 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a_bytes = u8x16::new(
+            0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
+            0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b_bytes = u8x16::new(
+            0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
+            0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        );
        let a = __m128i::from(a_bytes);
        let b = __m128i::from(b_bytes);
        let i = sse42::_mm_cmpistro(
-                a, b, sse42::_SIDD_UWORD_OPS | sse42::_SIDD_UNIT_MASK);
+            a,
+            b,
+            sse42::_SIDD_UWORD_OPS | sse42::_SIDD_UNIT_MASK,
+        );
        assert_eq!(0, i);
    }

@ -757,15 +729,20 @@ mod tests {
        let a = str_to_m128i(b"Hello!");
        let b = str_to_m128i(b"Hello.");
        let i = sse42::_mm_cmpestrm(a, 5, b, 5, sse42::_SIDD_UNIT_MASK);
-        assert_eq!(i, u8x16::new(0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00,
-                                 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00));
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let r = u8x16::new(
+            0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+        );
+        assert_eq!(i, r);
    }

    #[simd_test = "sse4.2"]
    unsafe fn _mm_cmpestri() {
        let a = str_to_m128i(b"bar - garbage");
        let b = str_to_m128i(b"foobar");
-        let i = sse42::_mm_cmpestri(a, 3, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED);
+        let i =
+            sse42::_mm_cmpestri(a, 3, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED);
        assert_eq!(3, i);
    }

@ -773,8 +750,8 @@ mod tests {
    unsafe fn _mm_cmpestrz() {
        let a = str_to_m128i(b"");
        let b = str_to_m128i(b"Hello");
-        let i = sse42::_mm_cmpestrz(
-                a, 16, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED);
+        let i =
+            sse42::_mm_cmpestrz(a, 16, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED);
        assert_eq!(1, i);
    }

@ -782,19 +759,20 @@ mod tests {
    unsafe fn _mm_cmpestrc() {
        let va = str_to_m128i(b"!!!!!!!!");
        let vb = str_to_m128i(b"        ");
-        let i = sse42::_mm_cmpestrc(
-                va, 7, vb, 7, sse42::_SIDD_UNIT_MASK);
+        let i = sse42::_mm_cmpestrc(va, 7, vb, 7, sse42::_SIDD_UNIT_MASK);
        assert_eq!(0, i);
    }

    #[simd_test = "sse4.2"]
    unsafe fn _mm_cmpestrs() {
-        let a_bytes = u8x16::new(0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
-                                 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a_bytes = u8x16::new(
+            0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
+            0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        );
        let a = __m128i::from(a_bytes);
        let b = __m128i::from(u8x16::splat(0x00));
-        let i = sse42::_mm_cmpestrs(
-                a, 8, b, 0, sse42::_SIDD_UWORD_OPS);
+        let i = sse42::_mm_cmpestrs(a, 8, b, 0, sse42::_SIDD_UWORD_OPS);
        assert_eq!(0, i);
    }

@ -802,8 +780,7 @@ mod tests {
    unsafe fn _mm_cmpestro() {
        let a = str_to_m128i(b"Hello");
        let b = str_to_m128i(b"World");
-        let i = sse42::_mm_cmpestro(
-                a, 5, b, 5, sse42::_SIDD_UBYTE_OPS);
+        let i = sse42::_mm_cmpestro(a, 5, b, 5, sse42::_SIDD_UBYTE_OPS);
        assert_eq!(0, i);
    }

@ -812,7 +789,12 @@ mod tests {
        let a = str_to_m128i(b"Cannot match a");
        let b = str_to_m128i(b"Null after 14");
        let i = sse42::_mm_cmpestra(
-                a, 14, b, 16, sse42::_SIDD_CMP_EQUAL_EACH | sse42::_SIDD_UNIT_MASK);
+            a,
+            14,
+            b,
+            16,
+            sse42::_SIDD_CMP_EQUAL_EACH | sse42::_SIDD_UNIT_MASK,
+        );
        assert_eq!(1, i);
    }

--- a/library/stdarch/src/x86/ssse3.rs
+++ b/library/stdarch/src/x86/ssse3.rs
@ -13,7 +13,8 @@ pub unsafe fn _mm_abs_epi8(a: i8x16) -> u8x16 {
    pabsb128(a)
 }

-/// Compute the absolute value of each of the packed 16-bit signed integers in `a` and
+/// Compute the absolute value of each of the packed 16-bit signed integers in
+/// `a` and
 /// return the 16-bit unsigned integer
 #[inline(always)]
 #[target_feature = "+ssse3"]
@ -22,7 +23,8 @@ pub unsafe fn _mm_abs_epi16(a: i16x8) -> u16x8 {
    pabsw128(a)
 }

-/// Compute the absolute value of each of the packed 32-bit signed integers in `a` and
+/// Compute the absolute value of each of the packed 32-bit signed integers in
+/// `a` and
 /// return the 32-bit unsigned integer
 #[inline(always)]
 #[target_feature = "+ssse3"]
@ -82,7 +84,9 @@ pub unsafe fn _mm_alignr_epi8(a: i8x16, b: i8x16, n: i32) -> i8x16 {
        (a, b, n)
    };

-    const fn add(a: u32, b: u32) -> u32 { a + b }
+    const fn add(a: u32, b: u32) -> u32 {
+        a + b
+    }
    macro_rules! shuffle {
        ($shift:expr) => {
            simd_shuffle16(b, a, [
@ -98,14 +102,22 @@ pub unsafe fn _mm_alignr_epi8(a: i8x16, b: i8x16, n: i32) -> i8x16 {
        }
    }
    match n {
-        0 => shuffle!(0), 1 => shuffle!(1),
-        2 => shuffle!(2), 3 => shuffle!(3),
-        4 => shuffle!(4), 5 => shuffle!(5),
-        6 => shuffle!(6), 7 => shuffle!(7),
-        8 => shuffle!(8), 9 => shuffle!(9),
-        10 => shuffle!(10), 11 => shuffle!(11),
-        12 => shuffle!(12), 13 => shuffle!(13),
-        14 => shuffle!(14), 15 => shuffle!(15),
+        0 => shuffle!(0),
+        1 => shuffle!(1),
+        2 => shuffle!(2),
+        3 => shuffle!(3),
+        4 => shuffle!(4),
+        5 => shuffle!(5),
+        6 => shuffle!(6),
+        7 => shuffle!(7),
+        8 => shuffle!(8),
+        9 => shuffle!(9),
+        10 => shuffle!(10),
+        11 => shuffle!(11),
+        12 => shuffle!(12),
+        13 => shuffle!(13),
+        14 => shuffle!(14),
+        15 => shuffle!(15),
        _ => shuffle!(16),
    }
 }
@ -223,7 +235,7 @@ pub unsafe fn _mm_sign_epi32(a: i32x4, b: i32x4) -> i32x4 {
 }

 #[allow(improper_ctypes)]
-extern {
+extern "C" {
    #[link_name = "llvm.x86.ssse3.pabs.b.128"]
    fn pabsb128(a: i8x16) -> u8x16;

@ -275,7 +287,7 @@ mod tests {
    use stdsimd_test::simd_test;

    use v128::*;
-    use x86::ssse3 as ssse3;
+    use x86::ssse3;

    #[simd_test = "ssse3"]
    unsafe fn _mm_abs_epi8() {
@ -297,44 +309,36 @@ mod tests {

    #[simd_test = "ssse3"]
    unsafe fn _mm_shuffle_epi8() {
-        let a = u8x16::new(
-            1, 2, 3, 4,
-            5, 6, 7, 8,
-            9, 10, 11, 12,
-            13, 14, 15, 16,
-        );
-        let b = u8x16::new(
-            4, 128, 4, 3,
-            24, 12, 6, 19,
-            12, 5, 5, 10,
-            4, 1, 8, 0,
-        );
-        let expected = u8x16::new(
-            5, 0, 5, 4,
-            9, 13, 7, 4,
-            13, 6, 6, 11,
-            5, 2, 9, 1,
-        );
+        let a =
+            u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b =
+            u8x16::new(4, 128, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        let expected =
+            u8x16::new(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
        let r = ssse3::_mm_shuffle_epi8(a, b);
        assert_eq!(r, expected);
    }

    #[simd_test = "ssse3"]
    unsafe fn _mm_alignr_epi8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b = i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        let a =
+            i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b =
+            i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
        let r = ssse3::_mm_alignr_epi8(a, b, 33);
        assert_eq!(r, i8x16::splat(0));

        let r = ssse3::_mm_alignr_epi8(a, b, 17);
-        let expected = i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0);
+        let expected =
+            i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0);
        assert_eq!(r, expected);

        let r = ssse3::_mm_alignr_epi8(a, b, 16);
        assert_eq!(r, a);

        let r = ssse3::_mm_alignr_epi8(a, b, 15);
-        let expected = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let expected =
+            i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        assert_eq!(r, expected);

        let r = ssse3::_mm_alignr_epi8(a, b, 0);
@ -397,8 +401,10 @@ mod tests {

    #[simd_test = "ssse3"]
    unsafe fn _mm_maddubs_epi16() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b = i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        let a =
+            u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b =
+            i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
        let expected = i16x8::new(130, 24, 192, 194, 158, 175, 66, 120);
        let r = ssse3::_mm_maddubs_epi16(a, b);
        assert_eq!(r, expected);
@ -415,9 +421,21 @@ mod tests {

    #[simd_test = "ssse3"]
    unsafe fn _mm_sign_epi8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16);
-        let b = i8x16::new(4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0);
-        let expected = i8x16::new(1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0);
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let a = i8x16::new(
+            1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, -14, -15, 16,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let b = i8x16::new(
+            4, 63, -4, 3, 24, 12, -6, -19,
+            12, 5, -5, 10, 4, 1, -8, 0,
+        );
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        let expected = i8x16::new(
+            1, 2, -3, 4, 5, 6, -7, -8,
+            9, 10, -11, 12, 13, -14, 15, 0,
+        );
        let r = ssse3::_mm_sign_epi8(a, b);
        assert_eq!(r, expected);
    }
--- a/library/stdarch/src/x86/tbm.rs
+++ b/library/stdarch/src/x86/tbm.rs
@ -1,16 +1,21 @@
 //! Trailing Bit Manipulation (TBM) instruction set.
 //!
 //! The reference is [AMD64 Architecture Programmer's Manual, Volume 3:
-//! General-Purpose and System
-//! Instructions](http://support.amd.com/TechDocs/24594.pdf).
+//! General-Purpose and System Instructions][amd64_ref].
 //!
-//! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_.28Trailing_Bit_Manipulation.29)
-//! provides a quick overview of the available instructions.
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the available
+//! instructions.
+//!
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.
+//! 28Advanced_Bit_Manipulation.29

 #[cfg(test)]
 use stdsimd_test::assert_instr;

-// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32
+// TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select:
+// intrinsic %llvm.x86.tbm.bextri.u32
 /*
 #[allow(dead_code)]
 extern "C" {
@ -39,8 +44,8 @@ pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
 /// Extracts bits of `a` specified by `control` into
 /// the least significant bits of the result.
 ///
-/// Bits [7,0] of `control` specify the index to the first bit in the range to be
-/// extracted, and bits [15,8] specify the length of the range.
+/// Bits [7,0] of `control` specify the index to the first bit in the range to
+/// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
 #[target_feature = "+tbm"]
 pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
@ -50,8 +55,8 @@ pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
 /// Extracts bits of `a` specified by `control` into
 /// the least significant bits of the result.
 ///
-/// Bits [7,0] of `control` specify the index to the first bit in the range to be
-/// extracted, and bits [15,8] specify the length of the range.
+/// Bits [7,0] of `control` specify the index to the first bit in the range to
+/// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
 #[target_feature = "+tbm"]
 pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
@ -122,7 +127,8 @@ pub unsafe fn _blcic_u64(x: u64) -> u64 {
    !x & (x.wrapping_add(1))
 }

-/// Sets the least significant zero bit of `x` and clears all bits above that bit.
+/// Sets the least significant zero bit of `x` and clears all bits above
+/// that bit.
 ///
 /// If there is no zero bit in `x`, it sets all the bits.
 #[inline(always)]
@ -132,7 +138,8 @@ pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
    x ^ (x.wrapping_add(1))
 }

-/// Sets the least significant zero bit of `x` and clears all bits above that bit.
+/// Sets the least significant zero bit of `x` and clears all bits above
+/// that bit.
 ///
 /// If there is no zero bit in `x`, it sets all the bits.
 #[inline(always)]
@ -272,162 +279,152 @@ mod tests {

    #[simd_test = "tbm"]
    unsafe fn _blcfill_u32() {
-        assert_eq!(
-            tbm::_blcfill_u32(0b0101_0111u32),
-            0b0101_0000u32);
-        assert_eq!(
-            tbm::_blcfill_u32(0b1111_1111u32),
-            0u32);
+        assert_eq!(tbm::_blcfill_u32(0b0101_0111u32), 0b0101_0000u32);
+        assert_eq!(tbm::_blcfill_u32(0b1111_1111u32), 0u32);
    }

    #[simd_test = "tbm"]
    #[cfg(not(target_arch = "x86"))]
    unsafe fn _blcfill_u64() {
-        assert_eq!(
-            tbm::_blcfill_u64(0b0101_0111u64),
-            0b0101_0000u64);
-        assert_eq!(
-            tbm::_blcfill_u64(0b1111_1111u64),
-            0u64);
+        assert_eq!(tbm::_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
+        assert_eq!(tbm::_blcfill_u64(0b1111_1111u64), 0u64);
    }

    #[simd_test = "tbm"]
    unsafe fn _blci_u32() {
        assert_eq!(
            tbm::_blci_u32(0b0101_0000u32),
-            0b1111_1111_1111_1111_1111_1111_1111_1110u32);
+            0b1111_1111_1111_1111_1111_1111_1111_1110u32
+        );
        assert_eq!(
            tbm::_blci_u32(0b1111_1111u32),
-            0b1111_1111_1111_1111_1111_1110_1111_1111u32);
+            0b1111_1111_1111_1111_1111_1110_1111_1111u32
+        );
    }

    #[simd_test = "tbm"]
    #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
    unsafe fn _blci_u64() {
        assert_eq!(
            tbm::_blci_u64(0b0101_0000u64),
-            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64);
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64
+        );
        assert_eq!(
            tbm::_blci_u64(0b1111_1111u64),
-            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64);
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64
+        );
    }

    #[simd_test = "tbm"]
    unsafe fn _blcic_u32() {
-        assert_eq!(
-            tbm::_blcic_u32(0b0101_0001u32),
-            0b0000_0010u32);
-        assert_eq!(
-            tbm::_blcic_u32(0b1111_1111u32),
-            0b1_0000_0000u32);
+        assert_eq!(tbm::_blcic_u32(0b0101_0001u32), 0b0000_0010u32);
+        assert_eq!(tbm::_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32);
    }

    #[simd_test = "tbm"]
    #[cfg(not(target_arch = "x86"))]
    unsafe fn _blcic_u64() {
-        assert_eq!(
-            tbm::_blcic_u64(0b0101_0001u64),
-            0b0000_0010u64);
-        assert_eq!(
-            tbm::_blcic_u64(0b1111_1111u64),
-            0b1_0000_0000u64);
+        assert_eq!(tbm::_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
+        assert_eq!(tbm::_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
    }

    #[simd_test = "tbm"]
    unsafe fn _blcmsk_u32() {
-        assert_eq!(
-            tbm::_blcmsk_u32(0b0101_0001u32),
-            0b0000_0011u32);
-        assert_eq!(
-            tbm::_blcmsk_u32(0b1111_1111u32),
-            0b1_1111_1111u32);
+        assert_eq!(tbm::_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32);
+        assert_eq!(tbm::_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32);
    }

    #[simd_test = "tbm"]
    #[cfg(not(target_arch = "x86"))]
    unsafe fn _blcmsk_u64() {
-        assert_eq!(
-            tbm::_blcmsk_u64(0b0101_0001u64),
-            0b0000_0011u64);
-        assert_eq!(
-            tbm::_blcmsk_u64(0b1111_1111u64),
-            0b1_1111_1111u64);
+        assert_eq!(tbm::_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
+        assert_eq!(tbm::_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
    }

    #[simd_test = "tbm"]
    unsafe fn _blcs_u32() {
-       assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
-       assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
+        assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
+        assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
    }

    #[simd_test = "tbm"]
    #[cfg(not(target_arch = "x86"))]
    unsafe fn _blcs_u64() {
-       assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
-       assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
+        assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
+        assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
    }

    #[simd_test = "tbm"]
    unsafe fn _blsfill_u32() {
-        assert_eq!(
-            tbm::_blsfill_u32(0b0101_0100u32),
-            0b0101_0111u32);
+        assert_eq!(tbm::_blsfill_u32(0b0101_0100u32), 0b0101_0111u32);
        assert_eq!(
            tbm::_blsfill_u32(0u32),
-            0b1111_1111_1111_1111_1111_1111_1111_1111u32);
+            0b1111_1111_1111_1111_1111_1111_1111_1111u32
+        );
    }

    #[simd_test = "tbm"]
    #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
    unsafe fn _blsfill_u64() {
-        assert_eq!(
-            tbm::_blsfill_u64(0b0101_0100u64),
-            0b0101_0111u64);
+        assert_eq!(tbm::_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
        assert_eq!(
            tbm::_blsfill_u64(0u64),
-            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
+        );
    }

    #[simd_test = "tbm"]
    unsafe fn _blsic_u32() {
        assert_eq!(
            tbm::_blsic_u32(0b0101_0100u32),
-            0b1111_1111_1111_1111_1111_1111_1111_1011u32);
+            0b1111_1111_1111_1111_1111_1111_1111_1011u32
+        );
        assert_eq!(
            tbm::_blsic_u32(0u32),
-            0b1111_1111_1111_1111_1111_1111_1111_1111u32);
+            0b1111_1111_1111_1111_1111_1111_1111_1111u32
+        );
    }

    #[simd_test = "tbm"]
    #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
    unsafe fn _blsic_u64() {
        assert_eq!(
            tbm::_blsic_u64(0b0101_0100u64),
-            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64);
-       assert_eq!(
-           tbm::_blsic_u64(0u64),
-           0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64
+        );
+        assert_eq!(
+            tbm::_blsic_u64(0u64),
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
+        );
    }

    #[simd_test = "tbm"]
    unsafe fn _t1mskc_u32() {
-       assert_eq!(
-           tbm::_t1mskc_u32(0b0101_0111u32),
-           0b1111_1111_1111_1111_1111_1111_1111_1000u32);
-       assert_eq!(
-           tbm::_t1mskc_u32(0u32),
-           0b1111_1111_1111_1111_1111_1111_1111_1111u32);
+        assert_eq!(
+            tbm::_t1mskc_u32(0b0101_0111u32),
+            0b1111_1111_1111_1111_1111_1111_1111_1000u32
+        );
+        assert_eq!(
+            tbm::_t1mskc_u32(0u32),
+            0b1111_1111_1111_1111_1111_1111_1111_1111u32
+        );
    }

    #[simd_test = "tbm"]
    #[cfg(not(target_arch = "x86"))]
+    #[cfg_attr(rustfmt, rustfmt_skip)]
    unsafe fn _t1mksc_u64() {
-       assert_eq!(
-           tbm::_t1mskc_u64(0b0101_0111u64),
-           0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64);
-       assert_eq!(
-           tbm::_t1mskc_u64(0u64),
-           0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64);
+        assert_eq!(
+            tbm::_t1mskc_u64(0b0101_0111u64),
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64
+        );
+        assert_eq!(
+            tbm::_t1mskc_u64(0u64),
+            0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
+        );
    }

    #[simd_test = "tbm"]
--- a/library/stdarch/stdsimd-test/assert-instr-macro/build.rs
+++ b/library/stdarch/stdsimd-test/assert-instr-macro/build.rs
@ -2,7 +2,10 @@ use std::env;

 fn main() {
    println!("cargo:rerun-if-changed=build.rs");
-    let opt_level = env::var("OPT_LEVEL").ok().and_then(|s| s.parse().ok()).unwrap_or(0);
+    let opt_level = env::var("OPT_LEVEL")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(0);
    let profile = env::var("PROFILE").unwrap_or(String::new());
    if profile == "release" || opt_level >= 2 {
        println!("cargo:rustc-cfg=optimized");
--- a/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs
@ -21,13 +21,13 @@ extern crate synom;
 use proc_macro2::TokenStream;

 #[proc_macro_attribute]
-pub fn assert_instr(attr: proc_macro::TokenStream,
-                    item: proc_macro::TokenStream)
-    -> proc_macro::TokenStream
-{
+pub fn assert_instr(
+    attr: proc_macro::TokenStream, item: proc_macro::TokenStream
+) -> proc_macro::TokenStream {
    let invoc = syn::parse::<Invoc>(attr)
        .expect("expected #[assert_instr(instr, a = b, ...)]");
-    let item = syn::parse::<syn::Item>(item).expect("must be attached to an item");
+    let item =
+        syn::parse::<syn::Item>(item).expect("must be attached to an item");
    let func = match item.node {
        syn::ItemKind::Fn(ref f) => f,
        _ => panic!("must be attached to a function"),
@ -40,10 +40,11 @@ pub fn assert_instr(attr: proc_macro::TokenStream,
        (quote! { #[ignore] }).into()
    };
    let name = &func.ident;
-    let assert_name = syn::Ident::from(&format!("assert_{}_{}",
-                                                name.sym.as_str(),
-                                                instr.sym.as_str())[..]);
-    let shim_name = syn::Ident::from(&format!("{}_shim", name.sym.as_str())[..]);
+    let assert_name = syn::Ident::from(
+        &format!("assert_{}_{}", name.sym.as_str(), instr.sym.as_str())[..],
+    );
+    let shim_name =
+        syn::Ident::from(&format!("{}_shim", name.sym.as_str())[..]);
    let (to_test, test_name) = if invoc.args.len() == 0 {
        (TokenStream::empty(), &func.ident)
    } else {
@ -69,16 +70,29 @@ pub fn assert_instr(attr: proc_macro::TokenStream,
                }
            };
        }
-        let attrs = item.attrs.iter().filter(|attr| {
-            attr.path.segments.get(0).item().ident.sym.as_str().starts_with("target")
-        }).collect::<Vec<_>>();
+        let attrs = item.attrs
+            .iter()
+            .filter(|attr| {
+                attr.path
+                    .segments
+                    .get(0)
+                    .item()
+                    .ident
+                    .sym
+                    .as_str()
+                    .starts_with("target")
+            })
+            .collect::<Vec<_>>();
        let attrs = Append(&attrs);
-        (quote! {
-            #attrs
-            unsafe fn #shim_name(#(#inputs),*) #ret {
-                #name(#(#input_vals),*)
-            }
-        }.into(), &shim_name)
+        (
+            quote! {
+                #attrs
+                unsafe fn #shim_name(#(#inputs),*) #ret {
+                    #name(#(#input_vals),*)
+                }
+            }.into(),
+            &shim_name,
+        )
    };

    let tts: TokenStream = quote! {
@ -128,8 +142,9 @@ impl synom::Synom for Invoc {
 struct Append<T>(T);

 impl<T> quote::ToTokens for Append<T>
-    where T: Clone + IntoIterator,
-          T::Item: quote::ToTokens
+where
+    T: Clone + IntoIterator,
+    T::Item: quote::ToTokens,
 {
    fn to_tokens(&self, tokens: &mut quote::Tokens) {
        for item in self.0.clone() {
--- a/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs
+++ b/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs
@ -1,16 +1,16 @@
 //! Implementation of the `#[simd_test]` macro
 //!
-//! This macro expands to a `#[test]` function which tests the local machine for
-//! the appropriate cfg before calling the inner test function.
+//! This macro expands to a `#[test]` function which tests the local machine
+//! for the appropriate cfg before calling the inner test function.

 #![feature(proc_macro)]

+extern crate proc_macro2;
+extern crate proc_macro;
 #[macro_use]
 extern crate quote;
-extern crate proc_macro;
-extern crate proc_macro2;

-use proc_macro2::{TokenStream, Term, TokenNode, TokenTree};
+use proc_macro2::{Term, TokenNode, TokenStream, TokenTree};
 use proc_macro2::Literal;

 fn string(s: &str) -> TokenTree {
@ -22,8 +22,9 @@ fn string(s: &str) -> TokenTree {
 }

 #[proc_macro_attribute]
-pub fn simd_test(attr: proc_macro::TokenStream,
-                 item: proc_macro::TokenStream) -> proc_macro::TokenStream {
+pub fn simd_test(
+    attr: proc_macro::TokenStream, item: proc_macro::TokenStream
+) -> proc_macro::TokenStream {
    let tokens = TokenStream::from(attr).into_iter().collect::<Vec<_>>();
    if tokens.len() != 2 {
        panic!("expected #[simd_test = \"feature\"]");
@ -37,8 +38,9 @@ pub fn simd_test(attr: proc_macro::TokenStream,
        TokenNode::Literal(ref l) => l.to_string(),
        _ => panic!("expected #[simd_test = \"feature\"]"),
    };
-    let enable_feature = enable_feature.trim_left_matches('"')
-                                       .trim_right_matches('"');
+    let enable_feature = enable_feature
+        .trim_left_matches('"')
+        .trim_right_matches('"');
    let enable_feature = string(&format!("+{}", enable_feature));
    let item = TokenStream::from(item);
    let name = find_name(item.clone());
@ -67,7 +69,7 @@ fn find_name(item: TokenStream) -> Term {
    while let Some(tok) = tokens.next() {
        if let TokenNode::Term(word) = tok.kind {
            if word.as_str() == "fn" {
-                break
+                break;
            }
        }
    }
--- a/library/stdarch/stdsimd-test/src/lib.rs
+++ b/library/stdarch/stdsimd-test/src/lib.rs
@ -7,12 +7,12 @@
 #![feature(proc_macro)]

 extern crate assert_instr_macro;
-extern crate simd_test_macro;
 extern crate backtrace;
 extern crate cc;
-extern crate rustc_demangle;
 #[macro_use]
 extern crate lazy_static;
+extern crate rustc_demangle;
+extern crate simd_test_macro;

 use std::collections::HashMap;
 use std::env;
@ -23,7 +23,8 @@ pub use assert_instr_macro::*;
 pub use simd_test_macro::*;

 lazy_static! {
-    static ref DISASSEMBLY: HashMap<String, Vec<Function>> = disassemble_myself();
+    static ref DISASSEMBLY: HashMap<String, Vec<Function>>
+        = disassemble_myself();
 }

 struct Function {
@ -37,14 +38,22 @@ struct Instruction {
 fn disassemble_myself() -> HashMap<String, Vec<Function>> {
    let me = env::current_exe().expect("failed to get current exe");

-    if cfg!(target_arch = "x86_64") &&
-        cfg!(target_os = "windows") &&
-        cfg!(target_env = "msvc") {
-        let mut cmd = cc::windows_registry::find("x86_64-pc-windows-msvc", "dumpbin.exe")
-            .expect("failed to find `dumpbin` tool");
-        let output = cmd.arg("/DISASM").arg(&me).output()
+    if cfg!(target_arch = "x86_64") && cfg!(target_os = "windows")
+        && cfg!(target_env = "msvc")
+    {
+        let mut cmd = cc::windows_registry::find(
+            "x86_64-pc-windows-msvc",
+            "dumpbin.exe",
+        ).expect("failed to find `dumpbin` tool");
+        let output = cmd.arg("/DISASM")
+            .arg(&me)
+            .output()
            .expect("failed to execute dumpbin");
-        println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr));
+        println!(
+            "{}\n{}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
        assert!(output.status.success());
        parse_dumpbin(&String::from_utf8_lossy(&output.stdout))
    } else if cfg!(target_os = "windows") {
@ -55,7 +64,11 @@ fn disassemble_myself() -> HashMap<String, Vec<Function>> {
            .arg(&me)
            .output()
            .expect("failed to execute otool");
-        println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr));
+        println!(
+            "{}\n{}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
        assert!(output.status.success());

        parse_otool(&str::from_utf8(&output.stdout).expect("stdout not utf8"))
@ -66,10 +79,16 @@ fn disassemble_myself() -> HashMap<String, Vec<Function>> {
            .arg(&me)
            .output()
            .expect("failed to execute objdump");
-        println!("{}\n{}", output.status, String::from_utf8_lossy(&output.stderr));
+        println!(
+            "{}\n{}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr)
+        );
        assert!(output.status.success());

-        parse_objdump(&str::from_utf8(&output.stdout).expect("stdout not utf8"))
+        parse_objdump(
+            &str::from_utf8(&output.stdout).expect("stdout not utf8"),
+        )
    }
 }

@ -91,7 +110,7 @@ fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {
    while let Some(header) = lines.next() {
        // symbols should start with `$hex_addr <$name>:`
        if !header.ends_with(">:") {
-            continue
+            continue;
        }
        let start = header.find("<").unwrap();
        let symbol = &header[start + 1..header.len() - 2];
@ -99,15 +118,17 @@ fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {
        let mut instructions = Vec::new();
        while let Some(instruction) = lines.next() {
            if instruction.is_empty() {
-                break
+                break;
            }
            // Each line of instructions should look like:
            //
            //      $rel_offset: ab cd ef 00    $instruction...
-            let parts = instruction.split_whitespace()
+            let parts = instruction
+                .split_whitespace()
                .skip(1)
                .skip_while(|s| {
-                    s.len() == expected_len && usize::from_str_radix(s, 16).is_ok()
+                    s.len() == expected_len
+                        && usize::from_str_radix(s, 16).is_ok()
                })
                .map(|s| s.to_string())
                .collect::<Vec<String>>();
@ -116,10 +137,12 @@ fn parse_objdump(output: &str) -> HashMap<String, Vec<Function>> {

        ret.entry(normalize(symbol))
            .or_insert(Vec::new())
-            .push(Function { instrs: instructions });
+            .push(Function {
+                instrs: instructions,
+            });
    }

-    return ret
+    return ret;
 }

 fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
@ -138,7 +161,7 @@ fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
        };
        // symbols should start with `$symbol:`
        if !header.ends_with(":") {
-            continue
+            continue;
        }
        // strip the leading underscore and the trailing colon
        let symbol = &header[1..header.len() - 1];
@ -147,12 +170,13 @@ fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {
        while let Some(instruction) = lines.next() {
            if instruction.ends_with(":") {
                cached_header = Some(instruction);
-                break
+                break;
            }
            // Each line of instructions should look like:
            //
            //      $addr    $instruction...
-            let parts = instruction.split_whitespace()
+            let parts = instruction
+                .split_whitespace()
                .skip(1)
                .map(|s| s.to_string())
                .collect::<Vec<String>>();
@ -161,10 +185,12 @@ fn parse_otool(output: &str) -> HashMap<String, Vec<Function>> {

        ret.entry(normalize(symbol))
            .or_insert(Vec::new())
-            .push(Function { instrs: instructions });
+            .push(Function {
+                instrs: instructions,
+            });
    }

-    return ret
+    return ret;
 }

 fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
@ -183,7 +209,7 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
        };
        // symbols should start with `$symbol:`
        if !header.ends_with(":") {
-            continue
+            continue;
        }
        // strip the trailing colon
        let symbol = &header[..header.len() - 1];
@ -192,20 +218,21 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {
        while let Some(instruction) = lines.next() {
            if !instruction.starts_with("  ") {
                cached_header = Some(instruction);
-                break
+                break;
            }
            // Each line looks like:
            //
            // >  $addr: ab cd ef     $instr..
            // >         00 12          # this line os optional
            if instruction.starts_with("       ") {
-                continue
+                continue;
            }
-            let parts = instruction.split_whitespace()
+            let parts = instruction
+                .split_whitespace()
                .skip(1)
-                .skip_while(|s| {
-                    s.len() == 2 && usize::from_str_radix(s, 16).is_ok()
-                })
+                .skip_while(
+                    |s| s.len() == 2 && usize::from_str_radix(s, 16).is_ok(),
+                )
                .map(|s| s.to_string())
                .collect::<Vec<String>>();
            instructions.push(Instruction { parts });
@ -213,10 +240,12 @@ fn parse_dumpbin(output: &str) -> HashMap<String, Vec<Function>> {

        ret.entry(normalize(symbol))
            .or_insert(Vec::new())
-            .push(Function { instrs: instructions });
+            .push(Function {
+                instrs: instructions,
+            });
    }

-    return ret
+    return ret;
 }

 fn normalize(symbol: &str) -> String {
@ -266,7 +295,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
            // instruction: tzcntl => tzcnt and compares that.
            if part.starts_with(expected) {
                found = true;
-                break
+                break;
            }
        }
    }
@ -274,7 +303,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
    let probably_only_one_instruction = function.instrs.len() < 30;

    if found && probably_only_one_instruction {
-        return
+        return;
    }

    // Help debug by printing out the found disassembly, and then panic as we
--- a/library/stdarch/tests/cpu-detection.rs
+++ b/library/stdarch/tests/cpu-detection.rs
@ -1,10 +1,10 @@
 #![cfg_attr(feature = "strict", deny(warnings))]
 #![feature(cfg_target_feature)]

+extern crate cupid;
 #[macro_use]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 extern crate stdsimd;
-extern crate cupid;

 #[test]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]