From 42fce292ab45c4d28de82799a83f3313b9ca9907 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 13:27:22 +0000
Subject: [PATCH 1/3] Add a generic version of `floor`

Additionally, make use of this version to implement `floor` and
`floorf`.

Similar to `ceil`, musl'f `ceilf` routine seems to work better for all
float widths than the `ceil` algorithm. Trying with the `ceil` (`f64`)
algorithm produced the following regressions:

    icount::icount_bench_floor_group::icount_bench_floor logspace:setup_floor()
    Performance has regressed: Instructions (14064 > 13171) regressed by +6.78005% (>+5.00000)
      Baselines:                      softfloat|softfloat
      Instructions:                       14064|13171                (+6.78005%) [+1.06780x]
      L1 Hits:                            16821|15802                (+6.44855%) [+1.06449x]
      L2 Hits:                                0|0                    (No change)
      RAM Hits:                               8|9                    (-11.1111%) [-1.12500x]
      Total read+write:                   16829|15811                (+6.43856%) [+1.06439x]
      Estimated Cycles:                   17101|16117                (+6.10535%) [+1.06105x]
    icount::icount_bench_floorf128_group::icount_bench_floorf128 logspace:setup_floorf128()
      Baselines:                      softfloat|softfloat
      Instructions:                      166868|N/A                  (*********)
      L1 Hits:                           221429|N/A                  (*********)
      L2 Hits:                                1|N/A                  (*********)
      RAM Hits:                              34|N/A                  (*********)
      Total read+write:                  221464|N/A                  (*********)
      Estimated Cycles:                  222624|N/A                  (*********)
    icount::icount_bench_floorf16_group::icount_bench_floorf16 logspace:setup_floorf16()
      Baselines:                      softfloat|softfloat
      Instructions:                      143029|N/A                  (*********)
      L1 Hits:                           176517|N/A                  (*********)
      L2 Hits:                                1|N/A                  (*********)
      RAM Hits:                              13|N/A                  (*********)
      Total read+write:                  176531|N/A                  (*********)
      Estimated Cycles:                  176977|N/A                  (*********)
    icount::icount_bench_floorf_group::icount_bench_floorf logspace:setup_floorf()
    Performance has regressed: Instructions (14732 > 10441) regressed by +41.0976% (>+5.00000)
      Baselines:                      softfloat|softfloat
      Instructions:                       14732|10441                (+41.0976%) [+1.41098x]
      L1 Hits:                            17616|13027                (+35.2268%) [+1.35227x]
      L2 Hits:                                0|0                    (No change)
      RAM Hits:                               8|6                    (+33.3333%) [+1.33333x]
      Total read+write:                   17624|13033                (+35.2260%) [+1.35226x]
      Estimated Cycles:                   17896|13237                (+35.1968%) [+1.35197x]
---
 .../libm/etc/function-definitions.json        |   6 +-
 .../compiler-builtins/libm/src/math/floor.rs  |  41 +------
 .../compiler-builtins/libm/src/math/floorf.rs |  52 +--------
 .../libm/src/math/generic/floor.rs            | 106 ++++++++++++++++++
 .../libm/src/math/generic/mod.rs              |   2 +
 5 files changed, 114 insertions(+), 93 deletions(-)
 create mode 100644 library/compiler-builtins/libm/src/math/generic/floor.rs

diff --git a/library/compiler-builtins/libm/etc/function-definitions.json b/library/compiler-builtins/libm/etc/function-definitions.json
index c75152f63019..6a865f42784d 100644
--- a/library/compiler-builtins/libm/etc/function-definitions.json
+++ b/library/compiler-builtins/libm/etc/function-definitions.json
@@ -336,14 +336,16 @@
             "src/libm_helper.rs",
             "src/math/arch/i586.rs",
             "src/math/arch/wasm32.rs",
-            "src/math/floor.rs"
+            "src/math/floor.rs",
+            "src/math/generic/floor.rs"
         ],
         "type": "f64"
     },
     "floorf": {
         "sources": [
             "src/math/arch/wasm32.rs",
-            "src/math/floorf.rs"
+            "src/math/floorf.rs",
+            "src/math/generic/floor.rs"
         ],
         "type": "f32"
     },
diff --git a/library/compiler-builtins/libm/src/math/floor.rs b/library/compiler-builtins/libm/src/math/floor.rs
index 2823bf44d9c3..b4f02abc4911 100644
--- a/library/compiler-builtins/libm/src/math/floor.rs
+++ b/library/compiler-builtins/libm/src/math/floor.rs
@@ -1,8 +1,3 @@
-#![allow(unreachable_code)]
-use core::f64;
-
-const TOINT: f64 = 1. / f64::EPSILON;
-
 /// Floor (f64)
 ///
 /// Finds the nearest integer less than or equal to `x`.
@@ -15,39 +10,5 @@ pub fn floor(x: f64) -> f64 {
         args: x,
     }
 
-    let ui = x.to_bits();
-    let e = ((ui >> 52) & 0x7ff) as i32;
-
-    if (e >= 0x3ff + 52) || (x == 0.) {
-        return x;
-    }
-    /* y = int(x) - x, where int(x) is an integer neighbor of x */
-    let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x };
-    /* special case because of non-nearest rounding modes */
-    if e < 0x3ff {
-        force_eval!(y);
-        return if (ui >> 63) != 0 { -1. } else { 0. };
-    }
-    if y > 0. { x + y - 1. } else { x + y }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(floor(1.1), 1.0);
-        assert_eq!(floor(2.9), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(floor(f64::NAN).is_nan());
-        for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
-            assert_eq!(floor(f), f);
-        }
-    }
+    return super::generic::floor(x);
 }
diff --git a/library/compiler-builtins/libm/src/math/floorf.rs b/library/compiler-builtins/libm/src/math/floorf.rs
index 23a18c0f7376..16957b7f3557 100644
--- a/library/compiler-builtins/libm/src/math/floorf.rs
+++ b/library/compiler-builtins/libm/src/math/floorf.rs
@@ -1,5 +1,3 @@
-use core::f32;
-
 /// Floor (f32)
 ///
 /// Finds the nearest integer less than or equal to `x`.
@@ -11,53 +9,5 @@ pub fn floorf(x: f32) -> f32 {
         args: x,
     }
 
-    let mut ui = x.to_bits();
-    let e = (((ui >> 23) as i32) & 0xff) - 0x7f;
-
-    if e >= 23 {
-        return x;
-    }
-    if e >= 0 {
-        let m: u32 = 0x007fffff >> e;
-        if (ui & m) == 0 {
-            return x;
-        }
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 != 0 {
-            ui += m;
-        }
-        ui &= !m;
-    } else {
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 == 0 {
-            ui = 0;
-        } else if ui << 1 != 0 {
-            return -1.0;
-        }
-    }
-    f32::from_bits(ui)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(floorf(0.5), 0.0);
-        assert_eq!(floorf(1.1), 1.0);
-        assert_eq!(floorf(2.9), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(floorf(f32::NAN).is_nan());
-        for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
-            assert_eq!(floorf(f), f);
-        }
-    }
+    return super::generic::floor(x);
 }
diff --git a/library/compiler-builtins/libm/src/math/generic/floor.rs b/library/compiler-builtins/libm/src/math/generic/floor.rs
new file mode 100644
index 000000000000..6754c08f870b
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/generic/floor.rs
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: MIT
+ * origin: musl src/math/floor.c */
+
+//! Generic `floor` algorithm.
+//!
+//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because
+//! performance seems to be better (based on icount) and it does not seem to experience rounding
+//! errors on i386.
+
+use super::super::{Float, Int, IntTy, MinInt};
+
+pub fn floor<F: Float>(x: F) -> F {
+    let zero = IntTy::<F>::ZERO;
+
+    let mut ix = x.to_bits();
+    let e = x.exp_unbiased();
+
+    // If the represented value has no fractional part, no truncation is needed.
+    if e >= F::SIG_BITS as i32 {
+        return x;
+    }
+
+    if e >= 0 {
+        // |x| >= 1.0
+
+        let m = F::SIG_MASK >> e.unsigned();
+        if ix & m == zero {
+            // Portion to be masked is already zero; no adjustment needed.
+            return x;
+        }
+
+        // Otherwise, raise an inexact exception.
+        force_eval!(x + F::MAX);
+
+        if x.is_sign_negative() {
+            ix += m;
+        }
+
+        ix &= !m;
+        F::from_bits(ix)
+    } else {
+        // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
+        force_eval!(x + F::MAX);
+
+        if x.is_sign_positive() {
+            // 0.0 <= x < 1.0; rounding down goes toward +0.0.
+            F::ZERO
+        } else if ix << 1 != zero {
+            // -1.0 < x < 0.0; rounding down goes toward -1.0.
+            F::NEG_ONE
+        } else {
+            // -0.0 remains unchanged
+            x
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor
+    fn spec_test<F: Float>() {
+        // Not Asserted: that the current rounding mode has no effect.
+        for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() {
+            assert_biteq!(floor(f), f);
+        }
+    }
+
+    /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(floor(0.5f32), 0.0);
+        assert_eq!(floor(1.1f32), 1.0);
+        assert_eq!(floor(2.9f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(floor(1.1f64), 1.0);
+        assert_eq!(floor(2.9f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+}
diff --git a/library/compiler-builtins/libm/src/math/generic/mod.rs b/library/compiler-builtins/libm/src/math/generic/mod.rs
index f8bb9fa6adb1..b08a77d5d649 100644
--- a/library/compiler-builtins/libm/src/math/generic/mod.rs
+++ b/library/compiler-builtins/libm/src/math/generic/mod.rs
@@ -2,6 +2,7 @@ mod ceil;
 mod copysign;
 mod fabs;
 mod fdim;
+mod floor;
 mod sqrt;
 mod trunc;
 
@@ -9,5 +10,6 @@ pub use ceil::ceil;
 pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
+pub use floor::floor;
 pub use sqrt::sqrt;
 pub use trunc::trunc;

From 6a8bb0fa80ba2e777dea045441a0a879c4247b93 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 13:33:24 +0000
Subject: [PATCH 2/3] Add `floorf16` and `floorf128`

Use the generic algorithms to provide implementations for these
routines.
---
 .../crates/compiler-builtins-smoke-test/src/lib.rs |  2 ++
 .../libm/crates/libm-macros/src/shared.rs          |  4 ++--
 .../libm/crates/libm-test/benches/icount.rs        |  2 ++
 .../libm/crates/libm-test/benches/random.rs        |  4 +++-
 .../libm/crates/libm-test/src/mpfloat.rs           |  4 ++++
 .../crates/libm-test/tests/compare_built_musl.rs   |  2 ++
 .../compiler-builtins/libm/crates/util/src/main.rs |  2 ++
 .../libm/etc/function-definitions.json             | 14 ++++++++++++++
 .../compiler-builtins/libm/etc/function-list.txt   |  2 ++
 .../compiler-builtins/libm/src/math/floorf128.rs   |  7 +++++++
 .../compiler-builtins/libm/src/math/floorf16.rs    |  7 +++++++
 library/compiler-builtins/libm/src/math/mod.rs     |  4 ++++
 12 files changed, 51 insertions(+), 3 deletions(-)
 create mode 100644 library/compiler-builtins/libm/src/math/floorf128.rs
 create mode 100644 library/compiler-builtins/libm/src/math/floorf16.rs

diff --git a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs
index b9521eb07152..4834ba256eb2 100644
--- a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -97,6 +97,8 @@ no_mangle! {
     fdimf16(x: f16, y: f16) -> f16;
     floor(x: f64) -> f64;
     floorf(x: f32) -> f32;
+    floorf128(x: f128) -> f128;
+    floorf16(x: f16) -> f16;
     fma(x: f64, y: f64, z: f64) -> f64;
     fmaf(x: f32, y: f32, z: f32) -> f32;
     fmax(x: f64, y: f64) -> f64;
diff --git a/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs b/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs
index e7d3d18d9a1b..64623658d655 100644
--- a/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs
+++ b/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs
@@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16], returns: &[Ty::F16] },
         None,
-        &["ceilf16", "fabsf16", "sqrtf16", "truncf16"],
+        &["ceilf16", "fabsf16", "floorf16", "sqrtf16", "truncf16"],
     ),
     (
         // `fn(f32) -> f32`
@@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128], returns: &[Ty::F128] },
         None,
-        &["ceilf128", "fabsf128", "sqrtf128", "truncf128"],
+        &["ceilf128", "fabsf128", "floorf128", "sqrtf128", "truncf128"],
     ),
     (
         // `(f16, f16) -> f16`
diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs
index 84be3d5245b6..eae63619c579 100644
--- a/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs
+++ b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs
@@ -101,6 +101,8 @@ main!(
     icount_bench_fdimf16_group,
     icount_bench_fdimf_group,
     icount_bench_floor_group,
+    icount_bench_floorf128_group,
+    icount_bench_floorf16_group,
     icount_bench_floorf_group,
     icount_bench_fma_group,
     icount_bench_fmaf_group,
diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs
index 511e26d91267..bd7b3597196c 100644
--- a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs
+++ b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs
@@ -125,8 +125,10 @@ libm_macros::for_each_function! {
         | fabsf16
         | fdimf128
         | fdimf16
-        | sqrtf16
+        | floorf128
+        | floorf16
         | sqrtf128
+        | sqrtf16
         | truncf128
         | truncf16 => (false, None),
 
diff --git a/library/compiler-builtins/libm/crates/libm-test/src/mpfloat.rs b/library/compiler-builtins/libm/crates/libm-test/src/mpfloat.rs
index bbd19dbb07ba..53fade7d0ef6 100644
--- a/library/compiler-builtins/libm/crates/libm-test/src/mpfloat.rs
+++ b/library/compiler-builtins/libm/crates/libm-test/src/mpfloat.rs
@@ -148,6 +148,8 @@ libm_macros::for_each_function! {
         fabsf128,
         fabsf16,floor,
         floorf,
+        floorf128,
+        floorf16,
         fmod,
         fmodf,
         frexp,
@@ -240,6 +242,7 @@ impl_no_round! {
 impl_no_round! {
     fabsf16 => abs_mut;
     ceilf16 => ceil_mut;
+    floorf16 => floor_mut;
     truncf16 => trunc_mut;
 }
 
@@ -247,6 +250,7 @@ impl_no_round! {
 impl_no_round! {
     fabsf128 => abs_mut;
     ceilf128 => ceil_mut;
+    floorf128 => floor_mut;
     truncf128 => trunc_mut;
 }
 
diff --git a/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs b/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs
index e13acf3de216..335496fce36c 100644
--- a/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs
+++ b/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs
@@ -87,6 +87,8 @@ libm_macros::for_each_function! {
         fabsf16,
         fdimf128,
         fdimf16,
+        floorf128,
+        floorf16,
         truncf128,
         truncf16,
         sqrtf16,
diff --git a/library/compiler-builtins/libm/crates/util/src/main.rs b/library/compiler-builtins/libm/crates/util/src/main.rs
index 810919339ec6..988c01d07607 100644
--- a/library/compiler-builtins/libm/crates/util/src/main.rs
+++ b/library/compiler-builtins/libm/crates/util/src/main.rs
@@ -92,6 +92,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fabsf16
             | fdimf128
             | fdimf16
+            | floorf128
+            | floorf16
             | sqrtf128
             | sqrtf16
             | truncf128
diff --git a/library/compiler-builtins/libm/etc/function-definitions.json b/library/compiler-builtins/libm/etc/function-definitions.json
index 6a865f42784d..eef176fb57c8 100644
--- a/library/compiler-builtins/libm/etc/function-definitions.json
+++ b/library/compiler-builtins/libm/etc/function-definitions.json
@@ -349,6 +349,20 @@
         ],
         "type": "f32"
     },
+    "floorf128": {
+        "sources": [
+            "src/math/floorf128.rs",
+            "src/math/generic/floor.rs"
+        ],
+        "type": "f128"
+    },
+    "floorf16": {
+        "sources": [
+            "src/math/floorf16.rs",
+            "src/math/generic/floor.rs"
+        ],
+        "type": "f16"
+    },
     "fma": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/library/compiler-builtins/libm/etc/function-list.txt b/library/compiler-builtins/libm/etc/function-list.txt
index 337e7e434e5f..3bb895f4a297 100644
--- a/library/compiler-builtins/libm/etc/function-list.txt
+++ b/library/compiler-builtins/libm/etc/function-list.txt
@@ -49,6 +49,8 @@ fdimf128
 fdimf16
 floor
 floorf
+floorf128
+floorf16
 fma
 fmaf
 fmax
diff --git a/library/compiler-builtins/libm/src/math/floorf128.rs b/library/compiler-builtins/libm/src/math/floorf128.rs
new file mode 100644
index 000000000000..9a9fe4151152
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/floorf128.rs
@@ -0,0 +1,7 @@
+/// Floor (f128)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf128(x: f128) -> f128 {
+    return super::generic::floor(x);
+}
diff --git a/library/compiler-builtins/libm/src/math/floorf16.rs b/library/compiler-builtins/libm/src/math/floorf16.rs
new file mode 100644
index 000000000000..f9b868e04109
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/floorf16.rs
@@ -0,0 +1,7 @@
+/// Floor (f16)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf16(x: f16) -> f16 {
+    return super::generic::floor(x);
+}
diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs
index 5228e78b7f33..68d201524b4b 100644
--- a/library/compiler-builtins/libm/src/math/mod.rs
+++ b/library/compiler-builtins/libm/src/math/mod.rs
@@ -345,6 +345,7 @@ cfg_if! {
         mod copysignf16;
         mod fabsf16;
         mod fdimf16;
+        mod floorf16;
         mod sqrtf16;
         mod truncf16;
 
@@ -352,6 +353,7 @@ cfg_if! {
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
+        pub use self::floorf16::floorf16;
         pub use self::sqrtf16::sqrtf16;
         pub use self::truncf16::truncf16;
     }
@@ -363,6 +365,7 @@ cfg_if! {
         mod copysignf128;
         mod fabsf128;
         mod fdimf128;
+        mod floorf128;
         mod sqrtf128;
         mod truncf128;
 
@@ -370,6 +373,7 @@ cfg_if! {
         pub use self::copysignf128::copysignf128;
         pub use self::fabsf128::fabsf128;
         pub use self::fdimf128::fdimf128;
+        pub use self::floorf128::floorf128;
         pub use self::sqrtf128::sqrtf128;
         pub use self::truncf128::truncf128;
     }

From 3ae70a4a6cdf1bd11be554ac3ba272707ce8da56 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 08:48:02 +0000
Subject: [PATCH 3/3] Adjust `ceil` style to be more similar to `floor`

---
 .../crates/compiler-builtins-smoke-test/src/lib.rs   |  2 ++
 .../compiler-builtins/libm/src/math/generic/ceil.rs  | 12 ++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs
index 4834ba256eb2..1a7aa983ee44 100644
--- a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -67,6 +67,8 @@ no_mangle! {
     cbrtf(x: f32) -> f32;
     ceil(x: f64) -> f64;
     ceilf(x: f32) -> f32;
+    ceilf128(x: f128) -> f128;
+    ceilf16(x: f16) -> f16;
     copysign(x: f64, y: f64) -> f64;
     copysignf(x: f32, y: f32) -> f32;
     copysignf128(x: f128, y: f128) -> f128;
diff --git a/library/compiler-builtins/libm/src/math/generic/ceil.rs b/library/compiler-builtins/libm/src/math/generic/ceil.rs
index 34261faf782f..971a4d3d8c5f 100644
--- a/library/compiler-builtins/libm/src/math/generic/ceil.rs
+++ b/library/compiler-builtins/libm/src/math/generic/ceil.rs
@@ -31,24 +31,28 @@ pub fn ceil<F: Float>(x: F) -> F {
 
         // Otherwise, raise an inexact exception.
         force_eval!(x + F::MAX);
+
         if x.is_sign_positive() {
             ix += m;
         }
+
         ix &= !m;
+        F::from_bits(ix)
     } else {
         // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
         force_eval!(x + F::MAX);
 
         if x.is_sign_negative() {
             // -1.0 < x <= -0.0; rounding up goes toward -0.0.
-            return F::NEG_ZERO;
+            F::NEG_ZERO
         } else if ix << 1 != zero {
             // 0.0 < x < 1.0; rounding up goes toward +1.0.
-            return F::ONE;
+            F::ONE
+        } else {
+            // +0.0 remains unchanged
+            x
         }
     }
-
-    F::from_bits(ix)
 }
 
 #[cfg(test)]