diff --git a/library/compiler-builtins/libm/.travis.yml b/library/compiler-builtins/libm/.travis.yml
index 47f2b2f205df..758316178196 100644
--- a/library/compiler-builtins/libm/.travis.yml
+++ b/library/compiler-builtins/libm/.travis.yml
@@ -29,6 +29,13 @@ matrix:
     - env: TARGET=cargo-fmt
       rust: beta
 
+    - env: TARGET=wasm32-unknown-unknown
+      rust: nightly
+      install: rustup target add $TARGET
+      script:
+        - cargo build --target $TARGET
+        - cargo build --no-default-features --target $TARGET
+
 before_install: set -e
 
 install:
diff --git a/library/compiler-builtins/libm/Cargo.toml b/library/compiler-builtins/libm/Cargo.toml
index cedf8d267db8..f7a528334241 100644
--- a/library/compiler-builtins/libm/Cargo.toml
+++ b/library/compiler-builtins/libm/Cargo.toml
@@ -24,3 +24,7 @@ members = [
 
 [dev-dependencies]
 shared = { path = "shared" }
+
+[features]
+default = ['stable']
+stable = []
diff --git a/library/compiler-builtins/libm/src/lib.rs b/library/compiler-builtins/libm/src/lib.rs
index 627c6443e3dc..6be458728197 100644
--- a/library/compiler-builtins/libm/src/lib.rs
+++ b/library/compiler-builtins/libm/src/lib.rs
@@ -11,6 +11,10 @@
 
 #![deny(warnings)]
 #![no_std]
+#![cfg_attr(
+    all(target_arch = "wasm32", not(feature = "stable")),
+    feature(core_intrinsics)
+)]
 
 mod math;
 
diff --git a/library/compiler-builtins/libm/src/math/ceil.rs b/library/compiler-builtins/libm/src/math/ceil.rs
index 4db2ca840368..5dbfa6a2c0ad 100644
--- a/library/compiler-builtins/libm/src/math/ceil.rs
+++ b/library/compiler-builtins/libm/src/math/ceil.rs
@@ -4,6 +4,14 @@ const TOINT: f64 = 1. / f64::EPSILON;
 
 #[inline]
 pub fn ceil(x: f64) -> f64 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f64.ceil` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return unsafe { ::core::intrinsics::ceilf64(x) }
+        }
+    }
     let u: u64 = x.to_bits();
     let e: i64 = (u >> 52 & 0x7ff) as i64;
     let y: f64;
diff --git a/library/compiler-builtins/libm/src/math/ceilf.rs b/library/compiler-builtins/libm/src/math/ceilf.rs
index 16bffb3002e5..c8cd4b5aa5b5 100644
--- a/library/compiler-builtins/libm/src/math/ceilf.rs
+++ b/library/compiler-builtins/libm/src/math/ceilf.rs
@@ -2,6 +2,14 @@ use core::f32;
 
 #[inline]
 pub fn ceilf(x: f32) -> f32 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f32.ceil` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return unsafe { ::core::intrinsics::ceilf32(x) }
+        }
+    }
     let mut ui = x.to_bits();
     let e = (((ui >> 23) & 0xff) - 0x7f) as i32;
 
diff --git a/library/compiler-builtins/libm/src/math/fabs.rs b/library/compiler-builtins/libm/src/math/fabs.rs
index 9e081f3f9f69..7c804653c996 100644
--- a/library/compiler-builtins/libm/src/math/fabs.rs
+++ b/library/compiler-builtins/libm/src/math/fabs.rs
@@ -2,5 +2,13 @@ use core::u64;
 
 #[inline]
 pub fn fabs(x: f64) -> f64 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f64.abs` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return unsafe { ::core::intrinsics::fabsf64(x) }
+        }
+    }
     f64::from_bits(x.to_bits() & (u64::MAX / 2))
 }
diff --git a/library/compiler-builtins/libm/src/math/fabsf.rs b/library/compiler-builtins/libm/src/math/fabsf.rs
index 4cc9411169ab..884c20f6c410 100644
--- a/library/compiler-builtins/libm/src/math/fabsf.rs
+++ b/library/compiler-builtins/libm/src/math/fabsf.rs
@@ -1,4 +1,12 @@
 #[inline]
 pub fn fabsf(x: f32) -> f32 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f32.abs` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return unsafe { ::core::intrinsics::fabsf32(x) }
+        }
+    }
     f32::from_bits(x.to_bits() & 0x7fffffff)
 }
diff --git a/library/compiler-builtins/libm/src/math/floor.rs b/library/compiler-builtins/libm/src/math/floor.rs
index 997865d39e93..b14a48d55bc7 100644
--- a/library/compiler-builtins/libm/src/math/floor.rs
+++ b/library/compiler-builtins/libm/src/math/floor.rs
@@ -4,6 +4,14 @@ const TOINT: f64 = 1. / f64::EPSILON;
 
 #[inline]
 pub fn floor(x: f64) -> f64 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f64.floor` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return unsafe { ::core::intrinsics::floorf64(x) }
+        }
+    }
     let ui = x.to_bits();
     let e = ((ui >> 52) & 0x7ff) as i32;
 
diff --git a/library/compiler-builtins/libm/src/math/floorf.rs b/library/compiler-builtins/libm/src/math/floorf.rs
index 9c263b51828b..71b5953df3e2 100644
--- a/library/compiler-builtins/libm/src/math/floorf.rs
+++ b/library/compiler-builtins/libm/src/math/floorf.rs
@@ -2,6 +2,14 @@ use core::f32;
 
 #[inline]
 pub fn floorf(x: f32) -> f32 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f32.floor` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return unsafe { ::core::intrinsics::floorf32(x) }
+        }
+    }
     let mut ui = x.to_bits();
     let e = (((ui >> 23) & 0xff) - 0x7f) as i32;
 
diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs
index da34fb4cecd1..e51b1511dfcb 100644
--- a/library/compiler-builtins/libm/src/math/mod.rs
+++ b/library/compiler-builtins/libm/src/math/mod.rs
@@ -58,6 +58,17 @@ macro_rules! i {
     };
 }
 
+macro_rules! llvm_intrinsically_optimized {
+    (#[cfg($($clause:tt)*)] $e:expr) => {
+        #[cfg(all(not(feature = "stable"), $($clause)*))]
+        {
+            if true { // thwart the dead code lint
+                $e
+            }
+        }
+    };
+}
+
 // Public modules
 mod acos;
 mod acosf;
diff --git a/library/compiler-builtins/libm/src/math/sqrt.rs b/library/compiler-builtins/libm/src/math/sqrt.rs
index cbadb49bba03..b2387a26e750 100644
--- a/library/compiler-builtins/libm/src/math/sqrt.rs
+++ b/library/compiler-builtins/libm/src/math/sqrt.rs
@@ -82,6 +82,18 @@ const TINY: f64 = 1.0e-300;
 
 #[inline]
 pub fn sqrt(x: f64) -> f64 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f64.sqrt` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return if x < 0.0 {
+                f64::NAN
+            } else {
+                unsafe { ::core::intrinsics::sqrtf64(x) }
+            }
+        }
+    }
     let mut z: f64;
     let sign: u32 = 0x80000000;
     let mut ix0: i32;
diff --git a/library/compiler-builtins/libm/src/math/sqrtf.rs b/library/compiler-builtins/libm/src/math/sqrtf.rs
index 49984689efc2..33cafbcbda36 100644
--- a/library/compiler-builtins/libm/src/math/sqrtf.rs
+++ b/library/compiler-builtins/libm/src/math/sqrtf.rs
@@ -17,6 +17,18 @@ const TINY: f32 = 1.0e-30;
 
 #[inline]
 pub fn sqrtf(x: f32) -> f32 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f32.sqrt` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return if x < 0.0 {
+                ::core::f32::NAN
+            } else {
+                unsafe { ::core::intrinsics::sqrtf32(x) }
+            }
+        }
+    }
     let mut z: f32;
     let sign: i32 = 0x80000000u32 as i32;
     let mut ix: i32;
diff --git a/library/compiler-builtins/libm/src/math/trunc.rs b/library/compiler-builtins/libm/src/math/trunc.rs
index 6bea67cbc165..8eecfcf538e5 100644
--- a/library/compiler-builtins/libm/src/math/trunc.rs
+++ b/library/compiler-builtins/libm/src/math/trunc.rs
@@ -2,6 +2,14 @@ use core::f64;
 
 #[inline]
 pub fn trunc(x: f64) -> f64 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f64.trunc` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return unsafe { ::core::intrinsics::truncf64(x) }
+        }
+    }
     let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
 
     let mut i: u64 = x.to_bits();
diff --git a/library/compiler-builtins/libm/src/math/truncf.rs b/library/compiler-builtins/libm/src/math/truncf.rs
index 9d42620d9666..0d74fea9c9ee 100644
--- a/library/compiler-builtins/libm/src/math/truncf.rs
+++ b/library/compiler-builtins/libm/src/math/truncf.rs
@@ -2,6 +2,14 @@ use core::f32;
 
 #[inline]
 pub fn truncf(x: f32) -> f32 {
+    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
+    // `f32.trunc` native instruction, so we can leverage this for both code size
+    // and speed.
+    llvm_intrinsically_optimized! {
+        #[cfg(target_arch = "wasm32")] {
+            return unsafe { ::core::intrinsics::truncf32(x) }
+        }
+    }
     let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
 
     let mut i: u32 = x.to_bits();