diff --git a/library/stdarch/src/x86/avx.rs b/library/stdarch/src/x86/avx.rs
index 7b23d1e6cde5..ea3e5082ea4b 100644
--- a/library/stdarch/src/x86/avx.rs
+++ b/library/stdarch/src/x86/avx.rs
@@ -1,5 +1,8 @@
 use v256::*;
 
+// #[cfg(test)]
+// use assert_instr::assert_instr;
+
 /// Add packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`.
 #[inline(always)]
@@ -23,11 +26,85 @@ pub fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 {
     unsafe { addsubpd256(a, b) }
 }
 
-
 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.x86.avx.addsub.pd.256"]
-    fn addsubpd256(a: f64x4, b:f64x4) -> f64x4;
+    fn addsubpd256(a: f64x4, b: f64x4) -> f64x4;
+}
+
+/// Subtract packed double-precision (64-bit) floating-point elements in `b`
+/// from packed elements in `a`.
+#[inline(always)]
+#[target_feature = "+avx"]
+// #[cfg_attr(test, assert_instr(subpd))]
+pub fn _mm256_sub_pd(a: f64x4, b: f64x4) -> f64x4 {
+    unsafe { subpd256(a, b) }
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.avx.sub.pd.256"]
+    fn subpd256(a: f64x4, b: f64x4) -> f64x4;
+}
+
+/// Subtract packed single-precision (32-bit) floating-point elements in `b`
+/// from packed elements in `a`.
+#[inline(always)]
+#[target_feature = "+avx"]
+// #[cfg_attr(test, assert_instr(subps))]
+pub fn _mm256_sub_ps(a: f32x8, b: f32x8) -> f32x8 {
+    unsafe { subps256(a, b) }
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.avx.sub.ps.256"]
+    fn subps256(a: f32x8, b: f32x8) -> f32x8;
+}
+
+/// Round packed double-precision (64-bit) floating point elements in `a`
+/// according to the flag `b`. The value of `b` may be as follows:
+///    Bits [7:4] are reserved.
+///    Bit [3] is a precision exception value:
+///      0: A normal PE exception is used.
+///      1: The PE field is not updated.
+///    Bit [2] is the rounding control source:
+///      0: Use bits [1:0] of \a M.
+///      1: Use the current MXCSR setting.
+///    Bits [1:0] contain the rounding control definition:
+///      00: Nearest.
+///      01: Downward (toward negative infinity).
+///      10: Upward (toward positive infinity).
+///      11: Truncated.
+#[inline(always)]
+#[target_feature = "+avx"]
+// #[cfg_attr(test, assert_instr(vroundpd))]
+pub fn _mm256_round_pd(a: f64x4, b: i32) -> f64x4 {
+    unsafe { roundpd256(a, b) }
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.avx.round.pd.256"]
+    fn roundpd256(a: f64x4, b: i32) -> f64x4;
+}
+
+/// Round packed double-precision (64-bit) floating point elements in `a` toward
+/// positive infinity.
+#[inline(always)]
+#[target_feature = "+avx"]
+// #[cfg_attr(test, assert_instr(vroundpd))]
+pub fn _mm256_ceil_pd(a: f64x4) -> f64x4 {
+    _mm256_round_pd(a, 0b00000010)
+}
+
+/// Round packed double-precision (64-bit) floating point elements in `a` toward
+/// positive infinity.
+#[inline(always)]
+#[target_feature = "+avx"]
+// #[cfg_attr(test, assert_instr(vroundpd))]
+pub fn _mm256_floor_pd(a: f64x4) -> f64x4 {
+    _mm256_round_pd(a, 0b00000001)
 }
 
 
@@ -65,4 +142,58 @@ mod tests {
         let e = f64x4::new(-4.0,8.0,-4.0,12.0);
         assert_eq!(r, e);
     }
+
+    #[test]
+    #[target_feature = "+avx"]
+    fn _mm256_sub_pd() {
+        let a = f64x4::new(1.0, 2.0, 3.0, 4.0);
+        let b = f64x4::new(5.0, 6.0, 7.0, 8.0);
+        let r = avx::_mm256_sub_pd(a, b);
+        let e = f64x4::new(-4.0,-4.0,-4.0,-4.0);
+        assert_eq!(r, e);
+    }
+
+    #[test]
+    #[target_feature = "+avx"]
+    fn _mm256_sub_ps() {
+        let a = f32x8::new(1.0, 2.0, 3.0, 4.0, -1.0, -2.0, -3.0, -4.0);
+        let b = f32x8::new(5.0, 6.0, 7.0, 8.0, 3.0, 2.0, 1.0, 0.0);
+        let r = avx::_mm256_sub_ps(a, b);
+        let e = f32x8::new(-4.0, -4.0, -4.0, -4.0, -4.0, -4.0, -4.0, -4.0);
+        assert_eq!(r, e);
+    }
+
+    #[test]
+    #[target_feature = "+avx"]
+    pub fn _mm256_round_pd() {
+        let a = f64x4::new(1.55, 2.2, 3.99, -1.2);
+        let result_closest = avx::_mm256_round_pd(a, 0b00000000);
+        let result_down = avx::_mm256_round_pd(a, 0b00000001);
+        let result_up = avx::_mm256_round_pd(a, 0b00000010);
+        let expected_closest = f64x4::new(2.0, 2.0, 4.0, -1.0);
+        let expected_down = f64x4::new(1.0, 2.0, 3.0, -2.0);
+        let expected_up = f64x4::new(2.0, 3.0, 4.0, -1.0);
+        assert_eq!(result_closest, expected_closest);
+        assert_eq!(result_down, expected_down);
+        assert_eq!(result_up, expected_up);
+    }
+
+    #[test]
+    #[target_feature = "+avx"]
+    pub fn _mm256_floor_pd() {
+        let a = f64x4::new(1.55, 2.2, 3.99, -1.2);
+        let result_down = avx::_mm256_floor_pd(a);
+        let expected_down = f64x4::new(1.0, 2.0, 3.0, -2.0);
+        assert_eq!(result_down, expected_down);
+    }
+
+    #[test]
+    #[target_feature = "+avx"]
+    pub fn _mm256_ceil_pd() {
+        let a = f64x4::new(1.55, 2.2, 3.99, -1.2);
+        let result_up = avx::_mm256_ceil_pd(a, );
+        let expected_up = f64x4::new(2.0, 3.0, 4.0, -1.0);
+        assert_eq!(result_up, expected_up);
+    }
+
 }