Merge compiler-builtins as a Josh subtree

Use the Josh [1] utility to add `compiler-builtins` as a subtree, which
will allow us to stop using crates.io for updates. This is intended to
help resolve some problems when unstable features change and require
code changes in `compiler-builtins`, which sometimes gets trapped in a
bootstrap cycle.

This was done using `josh-filter` built from the r24.10.04 tag:

    git fetch https://github.com/rust-lang/compiler-builtins.git 233434412fe7eced8f1ddbfeddabef1d55e493bd
    josh-filter ":prefix=library/compiler-builtins" FETCH_HEAD
    git merge --allow-unrelated FILTERED_HEAD

The HEAD in the `compiler-builtins` repository is 233434412f ("fix an if
statement that can be collapsed").

[1]: https://github.com/josh-project/josh
This commit is contained in:
Trevor Gross 2025-05-18 15:08:03 +00:00
commit fcb3000340
380 changed files with 52998 additions and 0 deletions

View file

@ -0,0 +1,16 @@
# EditorConfig helps developers define and maintain consistent
# coding styles between different editors and IDEs
# editorconfig.org
root = true
[*]
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
indent_style = space
indent_size = 4
[*.yml]
indent_size = 2

View file

@ -0,0 +1,6 @@
# Use `git config blame.ignorerevsfile .git-blame-ignore-revs` to make
# `git blame` ignore the following commits.
# Reformat with a new `.rustfmt.toml`
# In rust-lang/libm this was 5882cabb83c30bf7c36023f9a55a80583636b0e8
4bb07a6275cc628ef81c65ac971dc6479963322f

View file

@ -0,0 +1,344 @@
name: CI
on:
push: { branches: [master] }
pull_request:
concurrency:
# Make sure that new pushes cancel running jobs
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
RUSTDOCFLAGS: -Dwarnings
RUSTFLAGS: -Dwarnings
RUST_BACKTRACE: full
BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results
jobs:
# Determine which tests should be run based on changed files.
calculate_vars:
name: Calculate workflow variables
runs-on: ubuntu-24.04
timeout-minutes: 10
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
outputs:
extensive_matrix: ${{ steps.script.outputs.extensive_matrix }}
may_skip_libm_ci: ${{ steps.script.outputs.may_skip_libm_ci }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 500
- name: Fetch pull request ref
run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
if: github.event_name == 'pull_request'
- run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT"
id: script
test:
name: Build and test
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- target: aarch64-apple-darwin
os: macos-15
- target: aarch64-unknown-linux-gnu
os: ubuntu-24.04-arm
- target: aarch64-pc-windows-msvc
os: windows-2025
test_verbatim: 1
build_only: 1
- target: arm-unknown-linux-gnueabi
os: ubuntu-24.04
- target: arm-unknown-linux-gnueabihf
os: ubuntu-24.04
- target: armv7-unknown-linux-gnueabihf
os: ubuntu-24.04
- target: i586-unknown-linux-gnu
os: ubuntu-24.04
- target: i686-unknown-linux-gnu
os: ubuntu-24.04
- target: loongarch64-unknown-linux-gnu
os: ubuntu-24.04
- target: powerpc-unknown-linux-gnu
os: ubuntu-24.04
- target: powerpc64-unknown-linux-gnu
os: ubuntu-24.04
- target: powerpc64le-unknown-linux-gnu
os: ubuntu-24.04
- target: riscv64gc-unknown-linux-gnu
os: ubuntu-24.04
- target: thumbv6m-none-eabi
os: ubuntu-24.04
- target: thumbv7em-none-eabi
os: ubuntu-24.04
- target: thumbv7em-none-eabihf
os: ubuntu-24.04
- target: thumbv7m-none-eabi
os: ubuntu-24.04
- target: wasm32-unknown-unknown
os: ubuntu-24.04
- target: x86_64-unknown-linux-gnu
os: ubuntu-24.04
- target: x86_64-apple-darwin
os: macos-13
- target: i686-pc-windows-msvc
os: windows-2025
test_verbatim: 1
- target: x86_64-pc-windows-msvc
os: windows-2025
test_verbatim: 1
- target: i686-pc-windows-gnu
os: windows-2025
channel: nightly-i686-gnu
- target: x86_64-pc-windows-gnu
os: windows-2025
channel: nightly-x86_64-gnu
runs-on: ${{ matrix.os }}
needs: [calculate_vars]
env:
BUILD_ONLY: ${{ matrix.build_only }}
TEST_VERBATIM: ${{ matrix.test_verbatim }}
MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }}
steps:
- name: Print runner information
run: uname -a
- uses: actions/checkout@v4
with:
submodules: true
- name: Install Rust (rustup)
shell: bash
run: |
channel="nightly"
# Account for channels that have required components (MinGW)
[ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}"
rustup update "$channel" --no-self-update
rustup default "$channel"
rustup target add "${{ matrix.target }}"
rustup component add llvm-tools-preview
- uses: taiki-e/install-action@nextest
- uses: Swatinem/rust-cache@v2
with:
key: ${{ matrix.target }}
- name: Cache Docker layers
uses: actions/cache@v4
if: matrix.os == 'ubuntu-24.04'
with:
path: /tmp/.buildx-cache
key: ${{ matrix.target }}-buildx-${{ github.sha }}
restore-keys: ${{ matrix.target }}-buildx-
# Configure buildx to use Docker layer caching
- uses: docker/setup-buildx-action@v3
if: matrix.os == 'ubuntu-24.04'
- name: Cache compiler-rt
id: cache-compiler-rt
uses: actions/cache@v4
with:
path: compiler-rt
key: ${{ runner.os }}-compiler-rt-${{ hashFiles('ci/download-compiler-rt.sh') }}
- name: Download compiler-rt reference sources
if: steps.cache-compiler-rt.outputs.cache-hit != 'true'
run: ./ci/download-compiler-rt.sh
shell: bash
- run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV"
shell: bash
- name: Verify API list
if: matrix.os == 'ubuntu-24.04'
run: python3 etc/update-api-list.py --check
# Non-linux tests just use our raw script
- name: Run locally
if: matrix.os != 'ubuntu-24.04'
shell: bash
run: ./ci/run.sh ${{ matrix.target }}
# Otherwise we use our docker containers to run builds
- name: Run in Docker
if: matrix.os == 'ubuntu-24.04'
run: ./ci/run-docker.sh ${{ matrix.target }}
- name: Print test logs if available
if: always()
run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
shell: bash
# Workaround to keep Docker cache smaller
# https://github.com/docker/build-push-action/issues/252
# https://github.com/moby/buildkit/issues/1896
- name: Move Docker cache
if: matrix.os == 'ubuntu-24.04'
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
clippy:
name: Clippy
runs-on: ubuntu-24.04
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
with:
submodules: true
# Unlike rustfmt, stable clippy does not work on code with nightly features.
- name: Install nightly `clippy`
run: |
rustup set profile minimal
rustup default nightly
rustup component add clippy
- uses: Swatinem/rust-cache@v2
- run: cargo clippy --workspace --all-targets
benchmarks:
name: Benchmarks
runs-on: ubuntu-24.04
timeout-minutes: 20
steps:
- uses: actions/checkout@master
with:
submodules: true
- uses: taiki-e/install-action@cargo-binstall
- name: Set up dependencies
run: |
sudo apt-get update
sudo apt-get install -y valgrind gdb libc6-dbg # Needed for iai-callgrind
rustup update "$BENCHMARK_RUSTC" --no-self-update
rustup default "$BENCHMARK_RUSTC"
# Install the version of iai-callgrind-runner that is specified in Cargo.toml
iai_version="$(cargo metadata --format-version=1 --features icount |
jq -r '.packages[] | select(.name == "iai-callgrind").version')"
cargo binstall -y iai-callgrind-runner --version "$iai_version"
sudo apt-get install valgrind
- uses: Swatinem/rust-cache@v2
- name: Run icount benchmarks
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
run: ./ci/bench-icount.sh
- name: Upload the benchmark baseline
uses: actions/upload-artifact@v4
with:
name: ${{ env.BASELINE_NAME }}
path: ${{ env.BASELINE_NAME }}.tar.xz
- name: Run wall time benchmarks
run: |
# Always use the same seed for benchmarks. Ideally we should switch to a
# non-random generator.
export LIBM_SEED=benchesbenchesbenchesbencheswoo!
cargo bench --package libm-test \
--no-default-features \
--features short-benchmarks,build-musl,libm/force-soft-floats
- name: Print test logs if available
if: always()
run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
shell: bash
miri:
name: Miri
runs-on: ubuntu-24.04
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install Rust (rustup)
run: rustup update nightly --no-self-update && rustup default nightly
shell: bash
- run: rustup component add miri
- run: cargo miri setup
- uses: Swatinem/rust-cache@v2
- run: ./ci/miri.sh
msrv:
name: Check libm MSRV
runs-on: ubuntu-24.04
timeout-minutes: 10
env:
RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings`
steps:
- uses: actions/checkout@master
- name: Install Rust
run: |
msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)"
echo "MSRV: $msrv"
rustup update "$msrv" --no-self-update && rustup default "$msrv"
- uses: Swatinem/rust-cache@v2
- run: |
# FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see
# `edition = "2024"` and get spooked.
rm Cargo.toml
cargo build --manifest-path libm/Cargo.toml
rustfmt:
name: Rustfmt
runs-on: ubuntu-24.04
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install stable `rustfmt`
run: rustup set profile minimal && rustup default stable && rustup component add rustfmt
- run: cargo fmt -- --check
extensive:
name: Extensive tests for ${{ matrix.ty }}
needs:
# Wait on `clippy` so we have some confidence that the crate will build
- clippy
- calculate_vars
runs-on: ubuntu-24.04
timeout-minutes: 240 # 4 hours
strategy:
matrix:
# Use the output from `calculate_vars` to create the matrix
# FIXME: it would be better to run all jobs (i.e. all types) but mark those that
# didn't change as skipped, rather than completely excluding the job. However,
# this is not currently possible https://github.com/actions/runner/issues/1985.
include: ${{ fromJSON(needs.calculate_vars.outputs.extensive_matrix).extensive_matrix }}
env:
TO_TEST: ${{ matrix.to_test }}
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install Rust
run: |
rustup update nightly --no-self-update
rustup default nightly
- uses: Swatinem/rust-cache@v2
- name: Run extensive tests
run: ./ci/run-extensive.sh
- name: Print test logs if available
run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
shell: bash
success:
needs:
- benchmarks
- clippy
- extensive
- miri
- msrv
- rustfmt
- test
runs-on: ubuntu-24.04
timeout-minutes: 10
# GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
# failed" as success. So we have to do some contortions to ensure the job fails if any of its
# dependencies fails.
if: always() # make sure this is never "skipped"
steps:
# Manually check the status of all dependencies. `if: failure()` does not work.
- name: check if any dependency failed
run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}'

View file

@ -0,0 +1,25 @@
name: Release-plz
permissions:
pull-requests: write
contents: write
on:
push: { branches: [master] }
jobs:
release-plz:
name: Release-plz
runs-on: ubuntu-24.04
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Rust (rustup)
run: rustup update nightly --no-self-update && rustup default nightly
- name: Run release-plz
uses: MarcoIeni/release-plz-action@v0.5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}

16
library/compiler-builtins/.gitignore vendored Normal file
View file

@ -0,0 +1,16 @@
# Rust files
Cargo.lock
target
# Sources for external files
compiler-rt
*.tar.gz
# Benchmark cache
baseline-*
iai-home
# Temporary files
*.bk
*.rs.bk
.#*

4
library/compiler-builtins/.gitmodules vendored Normal file
View file

@ -0,0 +1,4 @@
[submodule "crates/musl-math-sys/musl"]
path = crates/musl-math-sys/musl
url = https://git.musl-libc.org/git/musl
shallow = true

View file

@ -0,0 +1,13 @@
[workspace]
# As part of the release process, we delete `libm/Cargo.toml`. Since
# this is only run in CI, we shouldn't need to worry about it.
allow_dirty = true
publish_allow_dirty = true
[[package]]
name = "compiler_builtins"
semver_check = false
changelog_include = ["libm"] # libm is included as part of builtins
[[package]]
name = "libm"

View file

@ -0,0 +1,4 @@
# This matches rustc
style_edition = "2024"
group_imports = "StdExternalCrate"
imports_granularity = "Module"

View file

@ -0,0 +1,167 @@
# How to contribute
## compiler-builtins
1. From the [pending list](compiler-builtins/README.md#progress), pick one or
more intrinsics.
2. Port the version from [`compiler-rt`] and, if applicable, their
[tests][rt-tests]. Note that this crate has generic implementations for a lot
of routines, which may be usable without porting the entire implementation.
3. Add a test to `builtins-test`, comparing the behavior of the ported
intrinsic(s) with their implementation on the testing host.
4. Add the intrinsic to `builtins-test-intrinsics/src/main.rs` to verify it can
be linked on all targets.
5. Send a Pull Request (PR) :tada:.
[`compiler-rt`]: https://github.com/llvm/llvm-project/tree/b6820c35c59a4da3e59c11f657093ffbd79ae1db/compiler-rt/lib/builtins
[rt-tests]: https://github.com/llvm/llvm-project/tree/b6820c35c59a4da3e59c11f657093ffbd79ae1db/compiler-rt/test/builtins
## Porting Reminders
1. [Rust][prec-rust] and [C][prec-c] have slightly different operator
precedence. C evaluates comparisons (`== !=`) before bitwise operations
(`& | ^`), while Rust evaluates the other way.
2. C assumes wrapping operations everywhere. Rust panics on overflow when in
debug mode. Consider using the [Wrapping][wrap-ty] type or the explicit
[wrapping_*][wrap-fn] functions where applicable.
3. Note [C implicit casts][casts], especially integer promotion. Rust is much
more explicit about casting, so be sure that any cast which affects the
output is ported to the Rust implementation.
4. Rust has [many functions][i32] for integer or floating point manipulation in
the standard library. Consider using one of these functions rather than
porting a new one.
[prec-rust]: https://doc.rust-lang.org/reference/expressions.html#expression-precedence
[prec-c]: http://en.cppreference.com/w/c/language/operator_precedence
[wrap-ty]: https://doc.rust-lang.org/core/num/struct.Wrapping.html
[wrap-fn]: https://doc.rust-lang.org/std/primitive.i32.html#method.wrapping_add
[casts]: http://en.cppreference.com/w/cpp/language/implicit_conversion
[i32]: https://doc.rust-lang.org/std/primitive.i32.html
## Tips and tricks
- _IMPORTANT_ The code in this crate will end up being used in the `core` crate
so it can **not** have any external dependencies (other than a subset of
`core` itself).
- Only use relative imports within the `math` directory / module, e.g.
`use self::fabs::fabs` or `use super::k_cos`. Absolute imports from core are
OK, e.g. `use core::u64`.
- To reinterpret a float as an integer use the `to_bits` method. The MUSL code
uses the `GET_FLOAT_WORD` macro, or a union, to do this operation.
- To reinterpret an integer as a float use the `f32::from_bits` constructor. The
MUSL code uses the `SET_FLOAT_WORD` macro, or a union, to do this operation.
- You may use other methods from core like `f64::is_nan`, etc. as appropriate.
- Rust does not have hex float literals. This crate provides two `hf16!`,
`hf32!`, `hf64!`, and `hf128!` which convert string literals to floats at
compile time.
```rust
assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000);
assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000);
```
- Rust code panics on arithmetic overflows when not optimized. You may need to
use the [`Wrapping`] newtype to avoid this problem, or individual methods like
[`wrapping_add`].
[`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html
[`wrapping_add`]: https://doc.rust-lang.org/std/primitive.u32.html#method.wrapping_add
## Testing
Testing for these crates can be somewhat complex, so feel free to rely on CI.
The easiest way replicate CI testing is using Docker. This can be done by
running `./ci/run-docker.sh [target]`. If no target is specified, all targets
will be run.
Tests can also be run without Docker:
```sh
# Run basic tests
#
# --no-default-features always needs to be passed, an unfortunate limitation
# since the `#![compiler_builtins]` feature is enabled by default.
cargo test --workspace --no-default-features
# Test with all interesting features
cargo test --workspace --no-default-features \
--features arch,unstable-float,unstable-intrinsics,mem
# Run with more detailed tests for libm
cargo test --workspace --no-default-features \
--features arch,unstable-float,unstable-intrinsics,mem \
--features build-mpfr,build-musl \
--profile release-checked
```
The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can be
difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help.
`build-musl` does not build with MSVC, Wasm, or Thumb.
[`rug`]: https://docs.rs/rug/latest/rug/
[`gmp_mpfr_sys`]: https://docs.rs/gmp-mpfr-sys/1.6.4/gmp_mpfr_sys/
In order to run all tests, some dependencies may be required:
```sh
# Allow testing compiler-builtins
./ci/download-compiler-rt.sh
# Optional, initialize musl for `--features build-musl`
git submodule init
git submodule update
# `--release` ables more test cases
cargo test --release
```
### Extensive tests
Libm also has tests that are exhaustive (for single-argument `f32` and 1- or 2-
argument `f16`) or extensive (for all other float and argument combinations).
These take quite a long time to run, but are launched in CI when relevant files
are changed.
Exhaustive tests can be selected by passing an environment variable:
```sh
LIBM_EXTENSIVE_TESTS=sqrt,sqrtf cargo test --features build-mpfr \
--test z_extensive \
--profile release-checked
# Run all tests for one type
LIBM_EXTENSIVE_TESTS=all_f16 cargo test ...
# Ensure `f64` tests can run exhaustively. Estimated completion test for a
# single test is 57306 years on my machine so this may be worth skipping.
LIBM_EXTENSIVE_TESTS=all LIBM_EXTENSIVE_ITERATIONS=18446744073709551615 cargo test ...
```
## Benchmarking
Regular walltime benchmarks can be run with `cargo bench`:
```sh
cargo bench --no-default-features \
--features arch,unstable-float,unstable-intrinsics,mem \
--features benchmarking-reports
```
There are also benchmarks that check instruction count behind the `icount`
feature. These require [`iai-callgrind-runner`] (via Cargo) and [Valgrind]
to be installed, which means these only run on limited platforms.
Instruction count benchmarks are run as part of CI to flag performance
regresions.
```sh
cargo bench --no-default-features \
--features arch,unstable-float,unstable-intrinsics,mem \
--features icount \
--bench icount --bench mem_icount
```
[`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner
[Valgrind]: https://valgrind.org/

View file

@ -0,0 +1,50 @@
[workspace]
resolver = "2"
members = [
"builtins-test",
"compiler-builtins",
"crates/libm-macros",
"crates/musl-math-sys",
"crates/panic-handler",
"crates/util",
"libm",
"libm-test",
]
default-members = [
"builtins-test",
"compiler-builtins",
"crates/libm-macros",
"libm",
"libm-test",
]
exclude = [
# `builtins-test-intrinsics` needs the feature `compiler-builtins` enabled
# and `mangled-names` disabled, which is the opposite of what is needed for
# other tests, so it makes sense to keep it out of the workspace.
"builtins-test-intrinsics",
]
[profile.release]
panic = "abort"
[profile.dev]
panic = "abort"
# Release mode with debug assertions
[profile.release-checked]
inherits = "release"
debug-assertions = true
overflow-checks = true
# Release with maximum optimizations, which is very slow to build. This is also
# what is needed to check `no-panic`.
[profile.release-opt]
inherits = "release"
codegen-units = 1
lto = "fat"
[profile.bench]
# Required for iai-callgrind
debug = true

View file

@ -0,0 +1,275 @@
The compiler-builtins crate is available for use under both the MIT license
and the Apache-2.0 license with the LLVM exception (MIT AND Apache-2.0 WITH
LLVM-exception).
The libm crate is available for use under the MIT license.
As a contributor, you agree that your code may be used under any of the
following: the MIT license, the Apache-2.0 license, or the Apache-2.0 license
with the LLVM exception. In other words, original (non-derivative) work is
licensed under MIT OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception. This is
the default license for all other source in this repository.
Text of the relevant licenses is provided below:
------------------------------------------------------------------------------
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
------------------------------------------------------------------------------
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
---- LLVM Exceptions to the Apache 2.0 License ----
As an exception, if, as a result of your compiling your source code, portions
of this Software are embedded into an Object form of such source code, you
may redistribute such embedded portions in such Object form without complying
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
In addition, if you combine or link compiled forms of this Software with
software that is licensed under the GPLv2 ("Combined Software") and if a
court of competent jurisdiction determines that the patent provision (Section
3), the indemnity provision (Section 9) or other Section of the License
conflicts with the conditions of the GPLv2, you may retroactively and
prospectively choose to deem waived or otherwise exclude such Section(s) of
the License, but only in their entirety and only with respect to the Combined
Software.
------------------------------------------------------------------------------
Portions of this software are derived from third-party works licensed under
terms compatible with the above Apache-2.0 WITH LLVM-exception AND MIT
license:
* compiler-builtins is derived from LLVM's compiler-rt (https://llvm.org/).
Work derived from compiler-rt prior to 2019-01-19 is usable under the MIT
license, with the following copyright:
Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT
The relevant CREDITS.TXT is located at
https://github.com/llvm/llvm-project/blob/main/compiler-rt/CREDITS.TXT.
* Work derived from compiler-rt after 2019-01-19 is usable under the
Apache-2.0 license with the LLVM exception.
* The bundled `math` module is from the libm crate, usable under the MIT
license. For further details and copyrights, see see libm/LICENSE.txt at
https://github.com/rust-lang/compiler-builtins.
Additionally, some source files may contain comments with specific copyrights
or licenses.

View file

@ -0,0 +1,16 @@
# Publishing to crates.io
Publishing `compiler-builtins` to crates.io takes a few steps unfortunately.
It's not great, but it works for now. PRs to improve this process would be
greatly appreciated!
1. Make sure you've got a clean working tree and it's updated with the latest
changes on `master`
2. Edit `Cargo.toml` to bump the version number
3. Commit this change
4. Run `git tag` to create a tag for this version
5. Delete the `libm/Cargo.toml` file
6. Run `cargo +nightly publish`
7. Push the tag
8. Push the commit
9. Undo changes to `Cargo.toml` and the `libm` submodule

View file

@ -0,0 +1,27 @@
# `compiler-builtins` and `libm`
This repository contains two main crates:
* `compiler-builtins`: symbols that the compiler expects to be available at
link time
* `libm`: a Rust implementation of C math libraries, used to provide
implementations in `ocre`.
More details are at [compiler-builtins/README.md](compiler-builtins/README.md)
and [libm/README.md](libm/README.md).
For instructions on contributing, see [CONTRIBUTING.md](CONTRIBUTING.md).
## License
* `libm` may be used under the [MIT License]
* `compiler-builtins` may be used under the [MIT License] and the
[Apache License, Version 2.0] with the LLVM exception.
* All original contributions must be under all of: the MIT license, the
Apache-2.0 license, and the Apache-2.0 license with the LLVM exception.
More details are in [LICENSE.txt](LICENSE.txt) and
[libm/LICENSE.txt](libm/LICENSE.txt).
[MIT License]: https://opensource.org/license/mit
[Apache License, Version 2.0]: htps://www.apache.org/licenses/LICENSE-2.0

View file

@ -0,0 +1,19 @@
[package]
name = "builtins-test-intrinsics"
version = "0.1.0"
edition = "2021"
publish = false
license = "MIT OR Apache-2.0"
[dependencies]
compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"]}
panic-handler = { path = "../crates/panic-handler" }
[features]
c = ["compiler_builtins/c"]
[profile.release]
panic = "abort"
[profile.dev]
panic = "abort"

View file

@ -0,0 +1,11 @@
mod builtins_configure {
include!("../compiler-builtins/configure.rs");
}
fn main() {
println!("cargo::rerun-if-changed=../configure.rs");
let target = builtins_configure::Target::from_env();
builtins_configure::configure_f16_f128(&target);
builtins_configure::configure_aliases(&target);
}

View file

@ -0,0 +1,697 @@
// By compiling this file we check that all the intrinsics we care about continue to be provided by
// the `compiler_builtins` crate regardless of the changes we make to it. If we, by mistake, stop
// compiling a C implementation and forget to implement that intrinsic in Rust, this file will fail
// to link due to the missing intrinsic (symbol).
#![allow(unused_features)]
#![allow(internal_features)]
#![deny(dead_code)]
#![feature(allocator_api)]
#![feature(f128)]
#![feature(f16)]
#![feature(lang_items)]
#![no_std]
#![no_main]
extern crate panic_handler;
#[cfg(all(not(thumb), not(windows), not(target_arch = "wasm32")))]
#[link(name = "c")]
extern "C" {}
// Every function in this module maps will be lowered to an intrinsic by LLVM, if the platform
// doesn't have native support for the operation used in the function. ARM has a naming convention
// convention for its intrinsics that's different from other architectures; that's why some function
// have an additional comment: the function name is the ARM name for the intrinsic and the comment
// in the non-ARM name for the intrinsic.
mod intrinsics {
/* f16 operations */
#[cfg(f16_enabled)]
pub fn extendhfsf(x: f16) -> f32 {
x as f32
}
#[cfg(f16_enabled)]
pub fn extendhfdf(x: f16) -> f64 {
x as f64
}
#[cfg(all(
f16_enabled,
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
pub fn extendhftf(x: f16) -> f128 {
x as f128
}
/* f32 operations */
#[cfg(f16_enabled)]
pub fn truncsfhf(x: f32) -> f16 {
x as f16
}
// extendsfdf2
pub fn aeabi_f2d(x: f32) -> f64 {
x as f64
}
#[cfg(f128_enabled)]
pub fn extendsftf(x: f32) -> f128 {
x as f128
}
// fixsfsi
pub fn aeabi_f2iz(x: f32) -> i32 {
x as i32
}
// fixsfdi
pub fn aeabi_f2lz(x: f32) -> i64 {
x as i64
}
pub fn fixsfti(x: f32) -> i128 {
x as i128
}
// fixunssfsi
pub fn aeabi_f2uiz(x: f32) -> u32 {
x as u32
}
// fixunssfdi
pub fn aeabi_f2ulz(x: f32) -> u64 {
x as u64
}
pub fn fixunssfti(x: f32) -> u128 {
x as u128
}
// addsf3
pub fn aeabi_fadd(a: f32, b: f32) -> f32 {
a + b
}
// eqsf2
pub fn aeabi_fcmpeq(a: f32, b: f32) -> bool {
a == b
}
// gtsf2
pub fn aeabi_fcmpgt(a: f32, b: f32) -> bool {
a > b
}
// ltsf2
pub fn aeabi_fcmplt(a: f32, b: f32) -> bool {
a < b
}
// divsf3
pub fn aeabi_fdiv(a: f32, b: f32) -> f32 {
a / b
}
// mulsf3
pub fn aeabi_fmul(a: f32, b: f32) -> f32 {
a * b
}
// subsf3
pub fn aeabi_fsub(a: f32, b: f32) -> f32 {
a - b
}
/* f64 operations */
// truncdfsf2
pub fn aeabi_d2f(x: f64) -> f32 {
x as f32
}
// fixdfsi
pub fn aeabi_d2i(x: f64) -> i32 {
x as i32
}
// fixdfdi
pub fn aeabi_d2l(x: f64) -> i64 {
x as i64
}
pub fn fixdfti(x: f64) -> i128 {
x as i128
}
// fixunsdfsi
pub fn aeabi_d2uiz(x: f64) -> u32 {
x as u32
}
// fixunsdfdi
pub fn aeabi_d2ulz(x: f64) -> u64 {
x as u64
}
pub fn fixunsdfti(x: f64) -> u128 {
x as u128
}
// adddf3
pub fn aeabi_dadd(a: f64, b: f64) -> f64 {
a + b
}
// eqdf2
pub fn aeabi_dcmpeq(a: f64, b: f64) -> bool {
a == b
}
// gtdf2
pub fn aeabi_dcmpgt(a: f64, b: f64) -> bool {
a > b
}
// ltdf2
pub fn aeabi_dcmplt(a: f64, b: f64) -> bool {
a < b
}
// divdf3
pub fn aeabi_ddiv(a: f64, b: f64) -> f64 {
a / b
}
// muldf3
pub fn aeabi_dmul(a: f64, b: f64) -> f64 {
a * b
}
// subdf3
pub fn aeabi_dsub(a: f64, b: f64) -> f64 {
a - b
}
/* f128 operations */
#[cfg(all(
f16_enabled,
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
pub fn trunctfhf(x: f128) -> f16 {
x as f16
}
#[cfg(f128_enabled)]
pub fn trunctfsf(x: f128) -> f32 {
x as f32
}
#[cfg(f128_enabled)]
pub fn trunctfdf(x: f128) -> f64 {
x as f64
}
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
pub fn fixtfsi(x: f128) -> i32 {
x as i32
}
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
pub fn fixtfdi(x: f128) -> i64 {
x as i64
}
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
pub fn fixtfti(x: f128) -> i128 {
x as i128
}
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
pub fn fixunstfsi(x: f128) -> u32 {
x as u32
}
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
pub fn fixunstfdi(x: f128) -> u64 {
x as u64
}
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
pub fn fixunstfti(x: f128) -> u128 {
x as u128
}
#[cfg(f128_enabled)]
pub fn addtf(a: f128, b: f128) -> f128 {
a + b
}
#[cfg(f128_enabled)]
pub fn eqtf(a: f128, b: f128) -> bool {
a == b
}
#[cfg(f128_enabled)]
pub fn gttf(a: f128, b: f128) -> bool {
a > b
}
#[cfg(f128_enabled)]
pub fn lttf(a: f128, b: f128) -> bool {
a < b
}
#[cfg(f128_enabled)]
pub fn multf(a: f128, b: f128) -> f128 {
a * b
}
#[cfg(f128_enabled)]
pub fn divtf(a: f128, b: f128) -> f128 {
a / b
}
#[cfg(f128_enabled)]
pub fn subtf(a: f128, b: f128) -> f128 {
a - b
}
/* i32 operations */
// floatsisf
pub fn aeabi_i2f(x: i32) -> f32 {
x as f32
}
// floatsidf
pub fn aeabi_i2d(x: i32) -> f64 {
x as f64
}
#[cfg(f128_enabled)]
pub fn floatsitf(x: i32) -> f128 {
x as f128
}
pub fn aeabi_idiv(a: i32, b: i32) -> i32 {
a.wrapping_div(b)
}
pub fn aeabi_idivmod(a: i32, b: i32) -> i32 {
a % b
}
/* i64 operations */
// floatdisf
pub fn aeabi_l2f(x: i64) -> f32 {
x as f32
}
// floatdidf
pub fn aeabi_l2d(x: i64) -> f64 {
x as f64
}
#[cfg(f128_enabled)]
pub fn floatditf(x: i64) -> f128 {
x as f128
}
pub fn mulodi4(a: i64, b: i64) -> i64 {
a * b
}
// divdi3
pub fn aeabi_ldivmod(a: i64, b: i64) -> i64 {
a / b
}
pub fn moddi3(a: i64, b: i64) -> i64 {
a % b
}
// muldi3
pub fn aeabi_lmul(a: i64, b: i64) -> i64 {
a.wrapping_mul(b)
}
/* i128 operations */
pub fn floattisf(x: i128) -> f32 {
x as f32
}
pub fn floattidf(x: i128) -> f64 {
x as f64
}
#[cfg(f128_enabled)]
pub fn floattitf(x: i128) -> f128 {
x as f128
}
pub fn lshrti3(a: i128, b: usize) -> i128 {
a >> b
}
pub fn divti3(a: i128, b: i128) -> i128 {
a / b
}
pub fn modti3(a: i128, b: i128) -> i128 {
a % b
}
/* u32 operations */
// floatunsisf
pub fn aeabi_ui2f(x: u32) -> f32 {
x as f32
}
// floatunsidf
pub fn aeabi_ui2d(x: u32) -> f64 {
x as f64
}
#[cfg(f128_enabled)]
pub fn floatunsitf(x: u32) -> f128 {
x as f128
}
pub fn aeabi_uidiv(a: u32, b: u32) -> u32 {
a / b
}
pub fn aeabi_uidivmod(a: u32, b: u32) -> u32 {
a % b
}
/* u64 operations */
// floatundisf
pub fn aeabi_ul2f(x: u64) -> f32 {
x as f32
}
// floatundidf
pub fn aeabi_ul2d(x: u64) -> f64 {
x as f64
}
#[cfg(f128_enabled)]
pub fn floatunditf(x: u64) -> f128 {
x as f128
}
// udivdi3
pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 {
a * b
}
pub fn umoddi3(a: u64, b: u64) -> u64 {
a % b
}
/* u128 operations */
pub fn floatuntisf(x: u128) -> f32 {
x as f32
}
pub fn floatuntidf(x: u128) -> f64 {
x as f64
}
#[cfg(f128_enabled)]
pub fn floatuntitf(x: u128) -> f128 {
x as f128
}
pub fn muloti4(a: u128, b: u128) -> Option<u128> {
a.checked_mul(b)
}
pub fn multi3(a: u128, b: u128) -> u128 {
a.wrapping_mul(b)
}
pub fn ashlti3(a: u128, b: usize) -> u128 {
a >> b
}
pub fn ashrti3(a: u128, b: usize) -> u128 {
a << b
}
pub fn udivti3(a: u128, b: u128) -> u128 {
a / b
}
pub fn umodti3(a: u128, b: u128) -> u128 {
a % b
}
}
fn run() {
use core::hint::black_box as bb;
use intrinsics::*;
// FIXME(f16_f128): some PPC f128 <-> int conversion functions have the wrong names
#[cfg(f128_enabled)]
bb(addtf(bb(2.), bb(2.)));
bb(aeabi_d2f(bb(2.)));
bb(aeabi_d2i(bb(2.)));
bb(aeabi_d2l(bb(2.)));
bb(aeabi_d2uiz(bb(2.)));
bb(aeabi_d2ulz(bb(2.)));
bb(aeabi_dadd(bb(2.), bb(3.)));
bb(aeabi_dcmpeq(bb(2.), bb(3.)));
bb(aeabi_dcmpgt(bb(2.), bb(3.)));
bb(aeabi_dcmplt(bb(2.), bb(3.)));
bb(aeabi_ddiv(bb(2.), bb(3.)));
bb(aeabi_dmul(bb(2.), bb(3.)));
bb(aeabi_dsub(bb(2.), bb(3.)));
bb(aeabi_f2d(bb(2.)));
bb(aeabi_f2iz(bb(2.)));
bb(aeabi_f2lz(bb(2.)));
bb(aeabi_f2uiz(bb(2.)));
bb(aeabi_f2ulz(bb(2.)));
bb(aeabi_fadd(bb(2.), bb(3.)));
bb(aeabi_fcmpeq(bb(2.), bb(3.)));
bb(aeabi_fcmpgt(bb(2.), bb(3.)));
bb(aeabi_fcmplt(bb(2.), bb(3.)));
bb(aeabi_fdiv(bb(2.), bb(3.)));
bb(aeabi_fmul(bb(2.), bb(3.)));
bb(aeabi_fsub(bb(2.), bb(3.)));
bb(aeabi_i2d(bb(2)));
bb(aeabi_i2f(bb(2)));
bb(aeabi_idiv(bb(2), bb(3)));
bb(aeabi_idivmod(bb(2), bb(3)));
bb(aeabi_l2d(bb(2)));
bb(aeabi_l2f(bb(2)));
bb(aeabi_ldivmod(bb(2), bb(3)));
bb(aeabi_lmul(bb(2), bb(3)));
bb(aeabi_ui2d(bb(2)));
bb(aeabi_ui2f(bb(2)));
bb(aeabi_uidiv(bb(2), bb(3)));
bb(aeabi_uidivmod(bb(2), bb(3)));
bb(aeabi_ul2d(bb(2)));
bb(aeabi_ul2f(bb(2)));
bb(aeabi_uldivmod(bb(2), bb(3)));
bb(ashlti3(bb(2), bb(2)));
bb(ashrti3(bb(2), bb(2)));
#[cfg(f128_enabled)]
bb(divtf(bb(2.), bb(2.)));
bb(divti3(bb(2), bb(2)));
#[cfg(f128_enabled)]
bb(eqtf(bb(2.), bb(2.)));
#[cfg(f16_enabled)]
bb(extendhfdf(bb(2.)));
#[cfg(f16_enabled)]
bb(extendhfsf(bb(2.)));
#[cfg(all(
f16_enabled,
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
bb(extendhftf(bb(2.)));
#[cfg(f128_enabled)]
bb(extendsftf(bb(2.)));
bb(fixdfti(bb(2.)));
bb(fixsfti(bb(2.)));
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
bb(fixtfdi(bb(2.)));
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
bb(fixtfsi(bb(2.)));
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
bb(fixtfti(bb(2.)));
bb(fixunsdfti(bb(2.)));
bb(fixunssfti(bb(2.)));
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
bb(fixunstfdi(bb(2.)));
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
bb(fixunstfsi(bb(2.)));
#[cfg(all(
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
bb(fixunstfti(bb(2.)));
#[cfg(f128_enabled)]
bb(floatditf(bb(2)));
#[cfg(f128_enabled)]
bb(floatsitf(bb(2)));
bb(floattidf(bb(2)));
bb(floattisf(bb(2)));
#[cfg(f128_enabled)]
bb(floattitf(bb(2)));
#[cfg(f128_enabled)]
bb(floatunditf(bb(2)));
#[cfg(f128_enabled)]
bb(floatunsitf(bb(2)));
bb(floatuntidf(bb(2)));
bb(floatuntisf(bb(2)));
#[cfg(f128_enabled)]
bb(floatuntitf(bb(2)));
#[cfg(f128_enabled)]
bb(gttf(bb(2.), bb(2.)));
bb(lshrti3(bb(2), bb(2)));
#[cfg(f128_enabled)]
bb(lttf(bb(2.), bb(2.)));
bb(moddi3(bb(2), bb(3)));
bb(modti3(bb(2), bb(2)));
bb(mulodi4(bb(2), bb(3)));
bb(muloti4(bb(2), bb(2)));
#[cfg(f128_enabled)]
bb(multf(bb(2.), bb(2.)));
bb(multi3(bb(2), bb(2)));
#[cfg(f128_enabled)]
bb(subtf(bb(2.), bb(2.)));
#[cfg(f16_enabled)]
bb(truncsfhf(bb(2.)));
#[cfg(f128_enabled)]
bb(trunctfdf(bb(2.)));
#[cfg(all(
f16_enabled,
f128_enabled,
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
))]
bb(trunctfhf(bb(2.)));
#[cfg(f128_enabled)]
bb(trunctfsf(bb(2.)));
bb(udivti3(bb(2), bb(2)));
bb(umoddi3(bb(2), bb(3)));
bb(umodti3(bb(2), bb(2)));
something_with_a_dtor(&|| assert_eq!(bb(1), 1));
// FIXME(#802): This should be re-enabled once a workaround is found.
// extern "C" {
// fn rust_begin_unwind(x: usize);
// }
// unsafe {
// rust_begin_unwind(0);
// }
}
fn something_with_a_dtor(f: &dyn Fn()) {
struct A<'a>(&'a (dyn Fn() + 'a));
impl Drop for A<'_> {
fn drop(&mut self) {
(self.0)();
}
}
let _a = A(f);
f();
}
#[unsafe(no_mangle)]
#[cfg(not(thumb))]
fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int {
run();
0
}
#[unsafe(no_mangle)]
#[cfg(thumb)]
pub fn _start() -> ! {
run();
loop {}
}
#[cfg(windows)]
#[link(name = "kernel32")]
#[link(name = "msvcrt")]
extern "C" {}
// ARM targets need these symbols
#[unsafe(no_mangle)]
pub fn __aeabi_unwind_cpp_pr0() {}
#[unsafe(no_mangle)]
pub fn __aeabi_unwind_cpp_pr1() {}
#[cfg(not(any(windows, target_os = "cygwin")))]
#[allow(non_snake_case)]
#[unsafe(no_mangle)]
pub fn _Unwind_Resume() {}
#[cfg(not(any(windows, target_os = "cygwin")))]
#[lang = "eh_personality"]
pub extern "C" fn eh_personality() {}
#[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin"))]
mod mingw_unwinding {
#[unsafe(no_mangle)]
pub fn rust_eh_personality() {}
#[unsafe(no_mangle)]
pub fn rust_eh_unwind_resume() {}
#[unsafe(no_mangle)]
pub fn rust_eh_register_frames() {}
#[unsafe(no_mangle)]
pub fn rust_eh_unregister_frames() {}
}

View file

@ -0,0 +1,99 @@
[package]
name = "builtins-test"
version = "0.1.0"
authors = ["Alex Crichton <alex@alexcrichton.com>"]
edition = "2024"
publish = false
license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
[dependencies]
# For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential
# problems with system RNGs on the variety of platforms this crate is tested on.
# `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts.
rand_xoshiro = "0.6"
# To compare float builtins against
rustc_apfloat = "0.2.1"
# Really a dev dependency, but dev dependencies can't be optional
iai-callgrind = { version = "0.14.0", optional = true }
[dependencies.compiler_builtins]
path = "../compiler-builtins"
default-features = false
features = ["unstable-public-internals"]
[dev-dependencies]
criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
paste = "1.0.15"
[target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies]
test = { git = "https://github.com/japaric/utest" }
utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" }
utest-macros = { git = "https://github.com/japaric/utest" }
[features]
default = ["mangled-names"]
c = ["compiler_builtins/c"]
no-asm = ["compiler_builtins/no-asm"]
no-f16-f128 = ["compiler_builtins/no-f16-f128"]
mem = ["compiler_builtins/mem"]
mangled-names = ["compiler_builtins/mangled-names"]
# Skip tests that rely on f128 symbols being available on the system
no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
# Some platforms have some f128 functions but everything except integer conversions
no-sys-f128-int-convert = []
no-sys-f16-f128-convert = []
no-sys-f16-f64-convert = []
# Skip tests that rely on f16 symbols being available on the system
no-sys-f16 = ["no-sys-f16-f64-convert"]
# Enable icount benchmarks (requires iai-callgrind and valgrind)
icount = ["dep:iai-callgrind"]
# Enable report generation without bringing in more dependencies by default
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
# NOTE: benchmarks must be run with `--no-default-features` or with
# `-p builtins-test`, otherwise the default `compiler-builtins` feature
# of the `compiler_builtins` crate gets activated, resulting in linker
# errors.
[[bench]]
name = "float_add"
harness = false
[[bench]]
name = "float_sub"
harness = false
[[bench]]
name = "float_mul"
harness = false
[[bench]]
name = "float_div"
harness = false
[[bench]]
name = "float_cmp"
harness = false
[[bench]]
name = "float_conv"
harness = false
[[bench]]
name = "float_extend"
harness = false
[[bench]]
name = "float_trunc"
harness = false
[[bench]]
name = "float_pow"
harness = false
[[bench]]
name = "mem_icount"
harness = false
required-features = ["icount"]

View file

@ -0,0 +1,93 @@
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::float_bench;
use compiler_builtins::float::add;
use criterion::{Criterion, criterion_main};
float_bench! {
name: add_f32,
sig: (a: f32, b: f32) -> f32,
crate_fn: add::__addsf3,
sys_fn: __addsf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"addss {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fadd {a:s}, {a:s}, {b:s}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: add_f64,
sig: (a: f64, b: f64) -> f64,
crate_fn: add::__adddf3,
sys_fn: __adddf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"addsd {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fadd {a:d}, {a:d}, {b:d}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
#[cfg(f128_enabled)]
float_bench! {
name: add_f128,
sig: (a: f128, b: f128) -> f128,
crate_fn: add::__addtf3,
crate_fn_ppc: add::__addkf3,
sys_fn: __addtf3,
sys_fn_ppc: __addkf3,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
pub fn float_add() {
let mut criterion = Criterion::default().configure_from_args();
add_f32(&mut criterion);
add_f64(&mut criterion);
#[cfg(f128_enabled)]
{
add_f128(&mut criterion);
}
}
criterion_main!(float_add);

View file

@ -0,0 +1,207 @@
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::float_bench;
use compiler_builtins::float::cmp;
use criterion::{Criterion, criterion_main};
/// `gt` symbols are allowed to return differing results, they just get compared
/// to 0.
fn gt_res_eq(a: i32, b: i32) -> bool {
let a_lt_0 = a <= 0;
let b_lt_0 = b <= 0;
(a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0)
}
float_bench! {
name: cmp_f32_gt,
sig: (a: f32, b: f32) -> i32,
crate_fn: cmp::__gtsf2,
sys_fn: __gtsf2,
sys_available: all(),
output_eq: gt_res_eq,
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: i32;
asm!(
"xor {ret:e}, {ret:e}",
"ucomiss {a}, {b}",
"seta {ret:l}",
a = in(xmm_reg) a,
b = in(xmm_reg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcmp {a:s}, {b:s}",
"cset {ret:w}, gt",
a = in(vreg) a,
b = in(vreg) b,
ret = out(reg) ret,
options(nomem,nostack),
);
ret
};
],
}
float_bench! {
name: cmp_f32_unord,
sig: (a: f32, b: f32) -> i32,
crate_fn: cmp::__unordsf2,
sys_fn: __unordsf2,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: i32;
asm!(
"xor {ret:e}, {ret:e}",
"ucomiss {a}, {b}",
"setp {ret:l}",
a = in(xmm_reg) a,
b = in(xmm_reg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcmp {a:s}, {b:s}",
"cset {ret:w}, vs",
a = in(vreg) a,
b = in(vreg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
],
}
float_bench! {
name: cmp_f64_gt,
sig: (a: f64, b: f64) -> i32,
crate_fn: cmp::__gtdf2,
sys_fn: __gtdf2,
sys_available: all(),
output_eq: gt_res_eq,
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: i32;
asm!(
"xor {ret:e}, {ret:e}",
"ucomisd {a}, {b}",
"seta {ret:l}",
a = in(xmm_reg) a,
b = in(xmm_reg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcmp {a:d}, {b:d}",
"cset {ret:w}, gt",
a = in(vreg) a,
b = in(vreg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
],
}
float_bench! {
name: cmp_f64_unord,
sig: (a: f64, b: f64) -> i32,
crate_fn: cmp::__unorddf2,
sys_fn: __unorddf2,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: i32;
asm!(
"xor {ret:e}, {ret:e}",
"ucomisd {a}, {b}",
"setp {ret:l}",
a = in(xmm_reg) a,
b = in(xmm_reg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcmp {a:d}, {b:d}",
"cset {ret:w}, vs",
a = in(vreg) a,
b = in(vreg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
],
}
float_bench! {
name: cmp_f128_gt,
sig: (a: f128, b: f128) -> i32,
crate_fn: cmp::__gttf2,
crate_fn_ppc: cmp::__gtkf2,
sys_fn: __gttf2,
sys_fn_ppc: __gtkf2,
sys_available: not(feature = "no-sys-f128"),
output_eq: gt_res_eq,
asm: []
}
float_bench! {
name: cmp_f128_unord,
sig: (a: f128, b: f128) -> i32,
crate_fn: cmp::__unordtf2,
crate_fn_ppc: cmp::__unordkf2,
sys_fn: __unordtf2,
sys_fn_ppc: __unordkf2,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
pub fn float_cmp() {
let mut criterion = Criterion::default().configure_from_args();
cmp_f32_gt(&mut criterion);
cmp_f32_unord(&mut criterion);
cmp_f64_gt(&mut criterion);
cmp_f64_unord(&mut criterion);
#[cfg(f128_enabled)]
{
cmp_f128_gt(&mut criterion);
cmp_f128_unord(&mut criterion);
}
}
criterion_main!(float_cmp);

View file

@ -0,0 +1,688 @@
#![allow(improper_ctypes)]
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::float_bench;
use compiler_builtins::float::conv;
use criterion::{Criterion, criterion_main};
/* unsigned int -> float */
float_bench! {
name: conv_u32_f32,
sig: (a: u32) -> f32,
crate_fn: conv::__floatunsisf,
sys_fn: __floatunsisf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f32;
asm!(
"mov {tmp:e}, {a:e}",
"cvtsi2ss {ret}, {tmp}",
a = in(reg) a,
tmp = out(reg) _,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"ucvtf {ret:s}, {a:w}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_u32_f64,
sig: (a: u32) -> f64,
crate_fn: conv::__floatunsidf,
sys_fn: __floatunsidf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f64;
asm!(
"mov {tmp:e}, {a:e}",
"cvtsi2sd {ret}, {tmp}",
a = in(reg) a,
tmp = out(reg) _,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"ucvtf {ret:d}, {a:w}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_u32_f128,
sig: (a: u32) -> f128,
crate_fn: conv::__floatunsitf,
crate_fn_ppc: conv::__floatunsikf,
sys_fn: __floatunsitf,
sys_fn_ppc: __floatunsikf,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
float_bench! {
name: conv_u64_f32,
sig: (a: u64) -> f32,
crate_fn: conv::__floatundisf,
sys_fn: __floatundisf,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"ucvtf {ret:s}, {a:x}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_u64_f64,
sig: (a: u64) -> f64,
crate_fn: conv::__floatundidf,
sys_fn: __floatundidf,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"ucvtf {ret:d}, {a:x}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_u64_f128,
sig: (a: u64) -> f128,
crate_fn: conv::__floatunditf,
crate_fn_ppc: conv::__floatundikf,
sys_fn: __floatunditf,
sys_fn_ppc: __floatundikf,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
float_bench! {
name: conv_u128_f32,
sig: (a: u128) -> f32,
crate_fn: conv::__floatuntisf,
sys_fn: __floatuntisf,
sys_available: all(),
asm: []
}
float_bench! {
name: conv_u128_f64,
sig: (a: u128) -> f64,
crate_fn: conv::__floatuntidf,
sys_fn: __floatuntidf,
sys_available: all(),
asm: []
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_u128_f128,
sig: (a: u128) -> f128,
crate_fn: conv::__floatuntitf,
crate_fn_ppc: conv::__floatuntikf,
sys_fn: __floatuntitf,
sys_fn_ppc: __floatuntikf,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
/* signed int -> float */
float_bench! {
name: conv_i32_f32,
sig: (a: i32) -> f32,
crate_fn: conv::__floatsisf,
sys_fn: __floatsisf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f32;
asm!(
"cvtsi2ss {ret}, {a:e}",
a = in(reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"scvtf {ret:s}, {a:w}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_i32_f64,
sig: (a: i32) -> f64,
crate_fn: conv::__floatsidf,
sys_fn: __floatsidf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f64;
asm!(
"cvtsi2sd {ret}, {a:e}",
a = in(reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"scvtf {ret:d}, {a:w}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_i32_f128,
sig: (a: i32) -> f128,
crate_fn: conv::__floatsitf,
crate_fn_ppc: conv::__floatsikf,
sys_fn: __floatsitf,
sys_fn_ppc: __floatsikf,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
float_bench! {
name: conv_i64_f32,
sig: (a: i64) -> f32,
crate_fn: conv::__floatdisf,
sys_fn: __floatdisf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f32;
asm!(
"cvtsi2ss {ret}, {a:r}",
a = in(reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"scvtf {ret:s}, {a:x}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_i64_f64,
sig: (a: i64) -> f64,
crate_fn: conv::__floatdidf,
sys_fn: __floatdidf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f64;
asm!(
"cvtsi2sd {ret}, {a:r}",
a = in(reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"scvtf {ret:d}, {a:x}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_i64_f128,
sig: (a: i64) -> f128,
crate_fn: conv::__floatditf,
crate_fn_ppc: conv::__floatdikf,
sys_fn: __floatditf,
sys_fn_ppc: __floatdikf,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
float_bench! {
name: conv_i128_f32,
sig: (a: i128) -> f32,
crate_fn: conv::__floattisf,
sys_fn: __floattisf,
sys_available: all(),
asm: []
}
float_bench! {
name: conv_i128_f64,
sig: (a: i128) -> f64,
crate_fn: conv::__floattidf,
sys_fn: __floattidf,
sys_available: all(),
asm: []
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_i128_f128,
sig: (a: i128) -> f128,
crate_fn: conv::__floattitf,
crate_fn_ppc: conv::__floattikf,
sys_fn: __floattitf,
sys_fn_ppc: __floattikf,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
/* float -> unsigned int */
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_u32,
sig: (a: f32) -> u32,
crate_fn: conv::__fixunssfsi,
sys_fn: __fixunssfsi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: u32;
asm!(
"fcvtzu {ret:w}, {a:s}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_u64,
sig: (a: f32) -> u64,
crate_fn: conv::__fixunssfdi,
sys_fn: __fixunssfdi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: u64;
asm!(
"fcvtzu {ret:x}, {a:s}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_u128,
sig: (a: f32) -> u128,
crate_fn: conv::__fixunssfti,
sys_fn: __fixunssfti,
sys_available: all(),
asm: []
}
float_bench! {
name: conv_f64_u32,
sig: (a: f64) -> u32,
crate_fn: conv::__fixunsdfsi,
sys_fn: __fixunsdfsi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: u32;
asm!(
"fcvtzu {ret:w}, {a:d}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_f64_u64,
sig: (a: f64) -> u64,
crate_fn: conv::__fixunsdfdi,
sys_fn: __fixunsdfdi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: u64;
asm!(
"fcvtzu {ret:x}, {a:d}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_f64_u128,
sig: (a: f64) -> u128,
crate_fn: conv::__fixunsdfti,
sys_fn: __fixunsdfti,
sys_available: all(),
asm: []
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_f128_u32,
sig: (a: f128) -> u32,
crate_fn: conv::__fixunstfsi,
crate_fn_ppc: conv::__fixunskfsi,
sys_fn: __fixunstfsi,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_f128_u64,
sig: (a: f128) -> u64,
crate_fn: conv::__fixunstfdi,
crate_fn_ppc: conv::__fixunskfdi,
sys_fn: __fixunstfdi,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_f128_u128,
sig: (a: f128) -> u128,
crate_fn: conv::__fixunstfti,
crate_fn_ppc: conv::__fixunskfti,
sys_fn: __fixunstfti,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
/* float -> signed int */
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_i32,
sig: (a: f32) -> i32,
crate_fn: conv::__fixsfsi,
sys_fn: __fixsfsi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcvtzs {ret:w}, {a:s}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_i64,
sig: (a: f32) -> i64,
crate_fn: conv::__fixsfdi,
sys_fn: __fixsfdi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: i64;
asm!(
"fcvtzs {ret:x}, {a:s}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_i128,
sig: (a: f32) -> i128,
crate_fn: conv::__fixsfti,
sys_fn: __fixsfti,
sys_available: all(),
asm: []
}
float_bench! {
name: conv_f64_i32,
sig: (a: f64) -> i32,
crate_fn: conv::__fixdfsi,
sys_fn: __fixdfsi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcvtzs {ret:w}, {a:d}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_f64_i64,
sig: (a: f64) -> i64,
crate_fn: conv::__fixdfdi,
sys_fn: __fixdfdi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: i64;
asm!(
"fcvtzs {ret:x}, {a:d}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_f64_i128,
sig: (a: f64) -> i128,
crate_fn: conv::__fixdfti,
sys_fn: __fixdfti,
sys_available: all(),
asm: []
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_f128_i32,
sig: (a: f128) -> i32,
crate_fn: conv::__fixtfsi,
crate_fn_ppc: conv::__fixkfsi,
sys_fn: __fixtfsi,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_f128_i64,
sig: (a: f128) -> i64,
crate_fn: conv::__fixtfdi,
crate_fn_ppc: conv::__fixkfdi,
sys_fn: __fixtfdi,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
#[cfg(f128_enabled)]
float_bench! {
name: conv_f128_i128,
sig: (a: f128) -> i128,
crate_fn: conv::__fixtfti,
crate_fn_ppc: conv::__fixkfti,
sys_fn: __fixtfti,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: []
}
pub fn float_conv() {
let mut criterion = Criterion::default().configure_from_args();
conv_u32_f32(&mut criterion);
conv_u32_f64(&mut criterion);
conv_u64_f32(&mut criterion);
conv_u64_f64(&mut criterion);
conv_u128_f32(&mut criterion);
conv_u128_f64(&mut criterion);
conv_i32_f32(&mut criterion);
conv_i32_f64(&mut criterion);
conv_i64_f32(&mut criterion);
conv_i64_f64(&mut criterion);
conv_i128_f32(&mut criterion);
conv_i128_f64(&mut criterion);
conv_f64_u32(&mut criterion);
conv_f64_u64(&mut criterion);
conv_f64_u128(&mut criterion);
conv_f64_i32(&mut criterion);
conv_f64_i64(&mut criterion);
conv_f64_i128(&mut criterion);
#[cfg(f128_enabled)]
// FIXME: ppc64le has a sporadic overflow panic in the crate functions
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
{
conv_u32_f128(&mut criterion);
conv_u64_f128(&mut criterion);
conv_u128_f128(&mut criterion);
conv_i32_f128(&mut criterion);
conv_i64_f128(&mut criterion);
conv_i128_f128(&mut criterion);
conv_f128_u32(&mut criterion);
conv_f128_u64(&mut criterion);
conv_f128_u128(&mut criterion);
conv_f128_i32(&mut criterion);
conv_f128_i64(&mut criterion);
conv_f128_i128(&mut criterion);
}
}
criterion_main!(float_conv);

View file

@ -0,0 +1,93 @@
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::float_bench;
use compiler_builtins::float::div;
use criterion::{Criterion, criterion_main};
float_bench! {
name: div_f32,
sig: (a: f32, b: f32) -> f32,
crate_fn: div::__divsf3,
sys_fn: __divsf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"divss {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fdiv {a:s}, {a:s}, {b:s}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: div_f64,
sig: (a: f64, b: f64) -> f64,
crate_fn: div::__divdf3,
sys_fn: __divdf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"divsd {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fdiv {a:d}, {a:d}, {b:d}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
#[cfg(f128_enabled)]
float_bench! {
name: div_f128,
sig: (a: f128, b: f128) -> f128,
crate_fn: div::__divtf3,
crate_fn_ppc: div::__divkf3,
sys_fn: __divtf3,
sys_fn_ppc: __divkf3,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
pub fn float_div() {
let mut criterion = Criterion::default().configure_from_args();
div_f32(&mut criterion);
div_f64(&mut criterion);
#[cfg(f128_enabled)]
{
div_f128(&mut criterion);
}
}
criterion_main!(float_div);

View file

@ -0,0 +1,133 @@
#![allow(unused_variables)] // "unused" f16 registers
#![cfg_attr(f128_enabled, feature(f128))]
#![cfg_attr(f16_enabled, feature(f16))]
use builtins_test::float_bench;
use compiler_builtins::float::extend;
use criterion::{Criterion, criterion_main};
#[cfg(f16_enabled)]
float_bench! {
name: extend_f16_f32,
sig: (a: f16) -> f32,
crate_fn: extend::__extendhfsf2,
sys_fn: __extendhfsf2,
sys_available: not(feature = "no-sys-f16"),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"fcvt {ret:s}, {a:h}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(f16_enabled)]
float_bench! {
name: extend_f16_f64,
sig: (a: f16) -> f64,
crate_fn: extend::__extendhfdf2,
sys_fn: __extendhfdf2,
sys_available: not(feature = "no-sys-f16-f64-convert"),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"fcvt {ret:d}, {a:h}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(all(f16_enabled, f128_enabled))]
float_bench! {
name: extend_f16_f128,
sig: (a: f16) -> f128,
crate_fn: extend::__extendhftf2,
crate_fn_ppc: extend::__extendhfkf2,
sys_fn: __extendhftf2,
sys_fn_ppc: __extendhfkf2,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: [],
}
float_bench! {
name: extend_f32_f64,
sig: (a: f32) -> f64,
crate_fn: extend::__extendsfdf2,
sys_fn: __extendsfdf2,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"fcvt {ret:d}, {a:s}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(f128_enabled)]
float_bench! {
name: extend_f32_f128,
sig: (a: f32) -> f128,
crate_fn: extend::__extendsftf2,
crate_fn_ppc: extend::__extendsfkf2,
sys_fn: __extendsftf2,
sys_fn_ppc: __extendsfkf2,
sys_available: not(feature = "no-sys-f128"),
asm: [],
}
#[cfg(f128_enabled)]
float_bench! {
name: extend_f64_f128,
sig: (a: f64) -> f128,
crate_fn: extend::__extenddftf2,
crate_fn_ppc: extend::__extenddfkf2,
sys_fn: __extenddftf2,
sys_fn_ppc: __extenddfkf2,
sys_available: not(feature = "no-sys-f128"),
asm: [],
}
pub fn float_extend() {
let mut criterion = Criterion::default().configure_from_args();
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
#[cfg(f16_enabled)]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
{
extend_f16_f32(&mut criterion);
extend_f16_f64(&mut criterion);
#[cfg(f128_enabled)]
extend_f16_f128(&mut criterion);
}
extend_f32_f64(&mut criterion);
#[cfg(f128_enabled)]
{
extend_f32_f128(&mut criterion);
extend_f64_f128(&mut criterion);
}
}
criterion_main!(float_extend);

View file

@ -0,0 +1,93 @@
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::float_bench;
use compiler_builtins::float::mul;
use criterion::{Criterion, criterion_main};
float_bench! {
name: mul_f32,
sig: (a: f32, b: f32) -> f32,
crate_fn: mul::__mulsf3,
sys_fn: __mulsf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"mulss {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fmul {a:s}, {a:s}, {b:s}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: mul_f64,
sig: (a: f64, b: f64) -> f64,
crate_fn: mul::__muldf3,
sys_fn: __muldf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"mulsd {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fmul {a:d}, {a:d}, {b:d}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
#[cfg(f128_enabled)]
float_bench! {
name: mul_f128,
sig: (a: f128, b: f128) -> f128,
crate_fn: mul::__multf3,
crate_fn_ppc: mul::__mulkf3,
sys_fn: __multf3,
sys_fn_ppc: __mulkf3,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
pub fn float_mul() {
let mut criterion = Criterion::default().configure_from_args();
mul_f32(&mut criterion);
mul_f64(&mut criterion);
#[cfg(f128_enabled)]
{
mul_f128(&mut criterion);
}
}
criterion_main!(float_mul);

View file

@ -0,0 +1,49 @@
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::float_bench;
use compiler_builtins::float::pow;
use criterion::{Criterion, criterion_main};
float_bench! {
name: powi_f32,
sig: (a: f32, b: i32) -> f32,
crate_fn: pow::__powisf2,
sys_fn: __powisf2,
sys_available: all(),
asm: [],
}
float_bench! {
name: powi_f64,
sig: (a: f64, b: i32) -> f64,
crate_fn: pow::__powidf2,
sys_fn: __powidf2,
sys_available: all(),
asm: [],
}
// FIXME(f16_f128): can be changed to only `f128_enabled` once `__multf3` and `__divtf3` are
// distributed by nightly.
#[cfg(all(f128_enabled, not(feature = "no-sys-f128")))]
float_bench! {
name: powi_f128,
sig: (a: f128, b: i32) -> f128,
crate_fn: pow::__powitf2,
crate_fn_ppc: pow::__powikf2,
sys_fn: __powitf2,
sys_fn_ppc: __powikf2,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
pub fn float_pow() {
let mut criterion = Criterion::default().configure_from_args();
powi_f32(&mut criterion);
powi_f64(&mut criterion);
#[cfg(all(f128_enabled, not(feature = "no-sys-f128")))]
powi_f128(&mut criterion);
}
criterion_main!(float_pow);

View file

@ -0,0 +1,93 @@
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::float_bench;
use compiler_builtins::float::sub;
use criterion::{Criterion, criterion_main};
float_bench! {
name: sub_f32,
sig: (a: f32, b: f32) -> f32,
crate_fn: sub::__subsf3,
sys_fn: __subsf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"subss {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fsub {a:s}, {a:s}, {b:s}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: sub_f64,
sig: (a: f64, b: f64) -> f64,
crate_fn: sub::__subdf3,
sys_fn: __subdf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"subsd {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fsub {a:d}, {a:d}, {b:d}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
#[cfg(f128_enabled)]
float_bench! {
name: sub_f128,
sig: (a: f128, b: f128) -> f128,
crate_fn: sub::__subtf3,
crate_fn_ppc: sub::__subkf3,
sys_fn: __subtf3,
sys_fn_ppc: __subkf3,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
pub fn float_sub() {
let mut criterion = Criterion::default().configure_from_args();
sub_f32(&mut criterion);
sub_f64(&mut criterion);
#[cfg(f128_enabled)]
{
sub_f128(&mut criterion);
}
}
criterion_main!(float_sub);

View file

@ -0,0 +1,146 @@
#![cfg_attr(f128_enabled, feature(f128))]
#![cfg_attr(f16_enabled, feature(f16))]
use builtins_test::float_bench;
use compiler_builtins::float::trunc;
use criterion::{Criterion, criterion_main};
#[cfg(f16_enabled)]
float_bench! {
name: trunc_f32_f16,
sig: (a: f32) -> f16,
crate_fn: trunc::__truncsfhf2,
sys_fn: __truncsfhf2,
sys_available: not(feature = "no-sys-f16"),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f16;
asm!(
"fcvt {ret:h}, {a:s}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(f16_enabled)]
float_bench! {
name: trunc_f64_f16,
sig: (a: f64) -> f16,
crate_fn: trunc::__truncdfhf2,
sys_fn: __truncdfhf2,
sys_available: not(feature = "no-sys-f16-f64-convert"),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f16;
asm!(
"fcvt {ret:h}, {a:d}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: trunc_f64_f32,
sig: (a: f64) -> f32,
crate_fn: trunc::__truncdfsf2,
sys_fn: __truncdfsf2,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f32;
asm!(
"cvtsd2ss {ret}, {a}",
a = in(xmm_reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"fcvt {ret:s}, {a:d}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(all(f16_enabled, f128_enabled))]
float_bench! {
name: trunc_f128_f16,
sig: (a: f128) -> f16,
crate_fn: trunc::__trunctfhf2,
crate_fn_ppc: trunc::__trunckfhf2,
sys_fn: __trunctfhf2,
sys_fn_ppc: __trunckfhf2,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: [],
}
#[cfg(f128_enabled)]
float_bench! {
name: trunc_f128_f32,
sig: (a: f128) -> f32,
crate_fn: trunc::__trunctfsf2,
crate_fn_ppc: trunc::__trunckfsf2,
sys_fn: __trunctfsf2,
sys_fn_ppc: __trunckfsf2,
sys_available: not(feature = "no-sys-f128"),
asm: [],
}
#[cfg(f128_enabled)]
float_bench! {
name: trunc_f128_f64,
sig: (a: f128) -> f64,
crate_fn: trunc::__trunctfdf2,
crate_fn_ppc: trunc::__trunckfdf2,
sys_fn: __trunctfdf2,
sys_fn_ppc: __trunckfdf2,
sys_available: not(feature = "no-sys-f128"),
asm: [],
}
pub fn float_trunc() {
let mut criterion = Criterion::default().configure_from_args();
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
#[cfg(f16_enabled)]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
{
trunc_f32_f16(&mut criterion);
trunc_f64_f16(&mut criterion);
}
trunc_f64_f32(&mut criterion);
#[cfg(f128_enabled)]
{
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
#[cfg(f16_enabled)]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
trunc_f128_f16(&mut criterion);
trunc_f128_f32(&mut criterion);
trunc_f128_f64(&mut criterion);
}
}
criterion_main!(float_trunc);

View file

@ -0,0 +1,364 @@
#![feature(test)]
extern crate test;
use test::{Bencher, black_box};
extern crate compiler_builtins;
use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
const WORD_SIZE: usize = core::mem::size_of::<usize>();
struct AlignedVec {
vec: Vec<usize>,
size: usize,
}
impl AlignedVec {
fn new(fill: u8, size: usize) -> Self {
let mut broadcast = fill as usize;
let mut bits = 8;
while bits < WORD_SIZE * 8 {
broadcast |= broadcast << bits;
bits *= 2;
}
let vec = vec![broadcast; (size + WORD_SIZE - 1) & !WORD_SIZE];
AlignedVec { vec, size }
}
}
impl core::ops::Deref for AlignedVec {
type Target = [u8];
fn deref(&self) -> &[u8] {
unsafe { core::slice::from_raw_parts(self.vec.as_ptr() as *const u8, self.size) }
}
}
impl core::ops::DerefMut for AlignedVec {
fn deref_mut(&mut self) -> &mut [u8] {
unsafe { core::slice::from_raw_parts_mut(self.vec.as_mut_ptr() as *mut u8, self.size) }
}
}
fn memcpy_builtin(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) {
let v1 = AlignedVec::new(1, n + offset1);
let mut v2 = AlignedVec::new(0, n + offset2);
b.bytes = n as u64;
b.iter(|| {
let src: &[u8] = black_box(&v1[offset1..]);
let dst: &mut [u8] = black_box(&mut v2[offset2..]);
dst.copy_from_slice(src);
})
}
fn memcpy_rust(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) {
let v1 = AlignedVec::new(1, n + offset1);
let mut v2 = AlignedVec::new(0, n + offset2);
b.bytes = n as u64;
b.iter(|| {
let src: &[u8] = black_box(&v1[offset1..]);
let dst: &mut [u8] = black_box(&mut v2[offset2..]);
unsafe { memcpy(dst.as_mut_ptr(), src.as_ptr(), n) }
})
}
fn memset_builtin(b: &mut Bencher, n: usize, offset: usize) {
let mut v1 = AlignedVec::new(0, n + offset);
b.bytes = n as u64;
b.iter(|| {
let dst: &mut [u8] = black_box(&mut v1[offset..]);
let val: u8 = black_box(27);
for b in dst {
*b = val;
}
})
}
fn memset_rust(b: &mut Bencher, n: usize, offset: usize) {
let mut v1 = AlignedVec::new(0, n + offset);
b.bytes = n as u64;
b.iter(|| {
let dst: &mut [u8] = black_box(&mut v1[offset..]);
let val = black_box(27);
unsafe { memset(dst.as_mut_ptr(), val, n) }
})
}
fn memcmp_builtin(b: &mut Bencher, n: usize) {
let v1 = AlignedVec::new(0, n);
let mut v2 = AlignedVec::new(0, n);
v2[n - 1] = 1;
b.bytes = n as u64;
b.iter(|| {
let s1: &[u8] = black_box(&v1);
let s2: &[u8] = black_box(&v2);
s1.cmp(s2)
})
}
fn memcmp_builtin_unaligned(b: &mut Bencher, n: usize) {
let v1 = AlignedVec::new(0, n);
let mut v2 = AlignedVec::new(0, n);
v2[n - 1] = 1;
b.bytes = n as u64;
b.iter(|| {
let s1: &[u8] = black_box(&v1[0..]);
let s2: &[u8] = black_box(&v2[1..]);
s1.cmp(s2)
})
}
fn memcmp_rust(b: &mut Bencher, n: usize) {
let v1 = AlignedVec::new(0, n);
let mut v2 = AlignedVec::new(0, n);
v2[n - 1] = 1;
b.bytes = n as u64;
b.iter(|| {
let s1: &[u8] = black_box(&v1);
let s2: &[u8] = black_box(&v2);
unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n) }
})
}
fn memcmp_rust_unaligned(b: &mut Bencher, n: usize) {
let v1 = AlignedVec::new(0, n);
let mut v2 = AlignedVec::new(0, n);
v2[n - 1] = 1;
b.bytes = n as u64;
b.iter(|| {
let s1: &[u8] = black_box(&v1[0..]);
let s2: &[u8] = black_box(&v2[1..]);
unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n - 1) }
})
}
fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) {
let mut v = AlignedVec::new(0, n + n / 2 + offset);
b.bytes = n as u64;
b.iter(|| {
let s: &mut [u8] = black_box(&mut v);
s.copy_within(0..n, n / 2 + offset);
})
}
fn memmove_rust(b: &mut Bencher, n: usize, offset: usize) {
let mut v = AlignedVec::new(0, n + n / 2 + offset);
b.bytes = n as u64;
b.iter(|| {
let dst: *mut u8 = black_box(&mut v[n / 2 + offset..]).as_mut_ptr();
let src: *const u8 = black_box(&v).as_ptr();
unsafe { memmove(dst, src, n) };
})
}
#[bench]
fn memcpy_builtin_4096(b: &mut Bencher) {
memcpy_builtin(b, 4096, 0, 0)
}
#[bench]
fn memcpy_rust_4096(b: &mut Bencher) {
memcpy_rust(b, 4096, 0, 0)
}
#[bench]
fn memcpy_builtin_1048576(b: &mut Bencher) {
memcpy_builtin(b, 1048576, 0, 0)
}
#[bench]
fn memcpy_rust_1048576(b: &mut Bencher) {
memcpy_rust(b, 1048576, 0, 0)
}
#[bench]
fn memcpy_builtin_4096_offset(b: &mut Bencher) {
memcpy_builtin(b, 4096, 65, 65)
}
#[bench]
fn memcpy_rust_4096_offset(b: &mut Bencher) {
memcpy_rust(b, 4096, 65, 65)
}
#[bench]
fn memcpy_builtin_1048576_offset(b: &mut Bencher) {
memcpy_builtin(b, 1048576, 65, 65)
}
#[bench]
fn memcpy_rust_1048576_offset(b: &mut Bencher) {
memcpy_rust(b, 1048576, 65, 65)
}
#[bench]
fn memcpy_builtin_4096_misalign(b: &mut Bencher) {
memcpy_builtin(b, 4096, 65, 66)
}
#[bench]
fn memcpy_rust_4096_misalign(b: &mut Bencher) {
memcpy_rust(b, 4096, 65, 66)
}
#[bench]
fn memcpy_builtin_1048576_misalign(b: &mut Bencher) {
memcpy_builtin(b, 1048576, 65, 66)
}
#[bench]
fn memcpy_rust_1048576_misalign(b: &mut Bencher) {
memcpy_rust(b, 1048576, 65, 66)
}
#[bench]
fn memset_builtin_4096(b: &mut Bencher) {
memset_builtin(b, 4096, 0)
}
#[bench]
fn memset_rust_4096(b: &mut Bencher) {
memset_rust(b, 4096, 0)
}
#[bench]
fn memset_builtin_1048576(b: &mut Bencher) {
memset_builtin(b, 1048576, 0)
}
#[bench]
fn memset_rust_1048576(b: &mut Bencher) {
memset_rust(b, 1048576, 0)
}
#[bench]
fn memset_builtin_4096_offset(b: &mut Bencher) {
memset_builtin(b, 4096, 65)
}
#[bench]
fn memset_rust_4096_offset(b: &mut Bencher) {
memset_rust(b, 4096, 65)
}
#[bench]
fn memset_builtin_1048576_offset(b: &mut Bencher) {
memset_builtin(b, 1048576, 65)
}
#[bench]
fn memset_rust_1048576_offset(b: &mut Bencher) {
memset_rust(b, 1048576, 65)
}
#[bench]
fn memcmp_builtin_8(b: &mut Bencher) {
memcmp_builtin(b, 8)
}
#[bench]
fn memcmp_rust_8(b: &mut Bencher) {
memcmp_rust(b, 8)
}
#[bench]
fn memcmp_builtin_16(b: &mut Bencher) {
memcmp_builtin(b, 16)
}
#[bench]
fn memcmp_rust_16(b: &mut Bencher) {
memcmp_rust(b, 16)
}
#[bench]
fn memcmp_builtin_32(b: &mut Bencher) {
memcmp_builtin(b, 32)
}
#[bench]
fn memcmp_rust_32(b: &mut Bencher) {
memcmp_rust(b, 32)
}
#[bench]
fn memcmp_builtin_64(b: &mut Bencher) {
memcmp_builtin(b, 64)
}
#[bench]
fn memcmp_rust_64(b: &mut Bencher) {
memcmp_rust(b, 64)
}
#[bench]
fn memcmp_builtin_4096(b: &mut Bencher) {
memcmp_builtin(b, 4096)
}
#[bench]
fn memcmp_rust_4096(b: &mut Bencher) {
memcmp_rust(b, 4096)
}
#[bench]
fn memcmp_builtin_1048576(b: &mut Bencher) {
memcmp_builtin(b, 1048576)
}
#[bench]
fn memcmp_rust_1048576(b: &mut Bencher) {
memcmp_rust(b, 1048576)
}
#[bench]
fn memcmp_builtin_unaligned_7(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 8)
}
#[bench]
fn memcmp_rust_unaligned_7(b: &mut Bencher) {
memcmp_rust_unaligned(b, 8)
}
#[bench]
fn memcmp_builtin_unaligned_15(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 16)
}
#[bench]
fn memcmp_rust_unaligned_15(b: &mut Bencher) {
memcmp_rust_unaligned(b, 16)
}
#[bench]
fn memcmp_builtin_unaligned_31(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 32)
}
#[bench]
fn memcmp_rust_unaligned_31(b: &mut Bencher) {
memcmp_rust_unaligned(b, 32)
}
#[bench]
fn memcmp_builtin_unaligned_63(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 64)
}
#[bench]
fn memcmp_rust_unaligned_63(b: &mut Bencher) {
memcmp_rust_unaligned(b, 64)
}
#[bench]
fn memcmp_builtin_unaligned_4095(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 4096)
}
#[bench]
fn memcmp_rust_unaligned_4095(b: &mut Bencher) {
memcmp_rust_unaligned(b, 4096)
}
#[bench]
fn memcmp_builtin_unaligned_1048575(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 1048576)
}
#[bench]
fn memcmp_rust_unaligned_1048575(b: &mut Bencher) {
memcmp_rust_unaligned(b, 1048576)
}
#[bench]
fn memmove_builtin_4096(b: &mut Bencher) {
memmove_builtin(b, 4096, 0)
}
#[bench]
fn memmove_rust_4096(b: &mut Bencher) {
memmove_rust(b, 4096, 0)
}
#[bench]
fn memmove_builtin_1048576(b: &mut Bencher) {
memmove_builtin(b, 1048576, 0)
}
#[bench]
fn memmove_rust_1048576(b: &mut Bencher) {
memmove_rust(b, 1048576, 0)
}
#[bench]
fn memmove_builtin_4096_misalign(b: &mut Bencher) {
memmove_builtin(b, 4096, 1)
}
#[bench]
fn memmove_rust_4096_misalign(b: &mut Bencher) {
memmove_rust(b, 4096, 1)
}
#[bench]
fn memmove_builtin_1048576_misalign(b: &mut Bencher) {
memmove_builtin(b, 1048576, 1)
}
#[bench]
fn memmove_rust_1048576_misalign(b: &mut Bencher) {
memmove_rust(b, 1048576, 1)
}

View file

@ -0,0 +1,500 @@
//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This
//! is stable enough to be tested in CI.
use std::hint::black_box;
use std::{ops, slice};
use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
use iai_callgrind::{library_benchmark, library_benchmark_group, main};
const PAGE_SIZE: usize = 0x1000; // 4 kiB
const MAX_ALIGN: usize = 512; // assume we may use avx512 operations one day
const MEG1: usize = 1 << 20; // 1 MiB
#[derive(Clone)]
#[repr(C, align(0x1000))]
struct Page([u8; PAGE_SIZE]);
/// A buffer that is page-aligned by default, with an optional offset to create a
/// misalignment.
struct AlignedSlice {
buf: Box<[Page]>,
len: usize,
offset: usize,
}
impl AlignedSlice {
/// Allocate a slice aligned to ALIGN with at least `len` items, with `offset` from
/// page alignment.
fn new_zeroed(len: usize, offset: usize) -> Self {
assert!(offset < PAGE_SIZE);
let total_len = len + offset;
let items = (total_len / PAGE_SIZE) + if total_len % PAGE_SIZE > 0 { 1 } else { 0 };
let buf = vec![Page([0u8; PAGE_SIZE]); items].into_boxed_slice();
AlignedSlice { buf, len, offset }
}
}
impl ops::Deref for AlignedSlice {
type Target = [u8];
fn deref(&self) -> &Self::Target {
unsafe { slice::from_raw_parts(self.buf.as_ptr().cast::<u8>().add(self.offset), self.len) }
}
}
impl ops::DerefMut for AlignedSlice {
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe {
slice::from_raw_parts_mut(
self.buf.as_mut_ptr().cast::<u8>().add(self.offset),
self.len,
)
}
}
}
mod mcpy {
use super::*;
struct Cfg {
len: usize,
s_off: usize,
d_off: usize,
}
fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) {
let Cfg { len, s_off, d_off } = cfg;
println!("bytes: {len} bytes, src offset: {s_off}, dst offset: {d_off}");
let mut src = AlignedSlice::new_zeroed(len, s_off);
let dst = AlignedSlice::new_zeroed(len, d_off);
src.fill(1);
(len, src, dst)
}
#[library_benchmark]
#[benches::aligned(
// Both aligned
args = [
Cfg { len: 16, s_off: 0, d_off: 0 },
Cfg { len: 32, s_off: 0, d_off: 0 },
Cfg { len: 64, s_off: 0, d_off: 0 },
Cfg { len: 512, s_off: 0, d_off: 0 },
Cfg { len: 4096, s_off: 0, d_off: 0 },
Cfg { len: MEG1, s_off: 0, d_off: 0 },
],
setup = setup,
)]
#[benches::offset(
// Both at the same offset
args = [
Cfg { len: 16, s_off: 65, d_off: 65 },
Cfg { len: 32, s_off: 65, d_off: 65 },
Cfg { len: 64, s_off: 65, d_off: 65 },
Cfg { len: 512, s_off: 65, d_off: 65 },
Cfg { len: 4096, s_off: 65, d_off: 65 },
Cfg { len: MEG1, s_off: 65, d_off: 65 },
],
setup = setup,
)]
#[benches::misaligned(
// `src` and `dst` both misaligned by different amounts
args = [
Cfg { len: 16, s_off: 65, d_off: 66 },
Cfg { len: 32, s_off: 65, d_off: 66 },
Cfg { len: 64, s_off: 65, d_off: 66 },
Cfg { len: 512, s_off: 65, d_off: 66 },
Cfg { len: 4096, s_off: 65, d_off: 66 },
Cfg { len: MEG1, s_off: 65, d_off: 66 },
],
setup = setup,
)]
fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) {
unsafe {
black_box(memcpy(
black_box(dst.as_mut_ptr()),
black_box(src.as_ptr()),
black_box(len),
));
}
}
library_benchmark_group!(name = memcpy; benchmarks = bench);
}
mod mset {
use super::*;
struct Cfg {
len: usize,
offset: usize,
}
fn setup(Cfg { len, offset }: Cfg) -> (usize, AlignedSlice) {
println!("bytes: {len}, offset: {offset}");
(len, AlignedSlice::new_zeroed(len, offset))
}
#[library_benchmark]
#[benches::aligned(
args = [
Cfg { len: 16, offset: 0 },
Cfg { len: 32, offset: 0 },
Cfg { len: 64, offset: 0 },
Cfg { len: 512, offset: 0 },
Cfg { len: 4096, offset: 0 },
Cfg { len: MEG1, offset: 0 },
],
setup = setup,
)]
#[benches::offset(
args = [
Cfg { len: 16, offset: 65 },
Cfg { len: 32, offset: 65 },
Cfg { len: 64, offset: 65 },
Cfg { len: 512, offset: 65 },
Cfg { len: 4096, offset: 65 },
Cfg { len: MEG1, offset: 65 },
],
setup = setup,
)]
fn bench((len, mut dst): (usize, AlignedSlice)) {
unsafe {
black_box(memset(
black_box(dst.as_mut_ptr()),
black_box(27),
black_box(len),
));
}
}
library_benchmark_group!(name = memset; benchmarks = bench);
}
mod mcmp {
use super::*;
struct Cfg {
len: usize,
s_off: usize,
d_off: usize,
}
fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) {
let Cfg { len, s_off, d_off } = cfg;
println!("bytes: {len}, src offset: {s_off}, dst offset: {d_off}");
let b1 = AlignedSlice::new_zeroed(len, s_off);
let mut b2 = AlignedSlice::new_zeroed(len, d_off);
b2[len - 1] = 1;
(len, b1, b2)
}
#[library_benchmark]
#[benches::aligned(
// Both aligned
args = [
Cfg { len: 16, s_off: 0, d_off: 0 },
Cfg { len: 32, s_off: 0, d_off: 0 },
Cfg { len: 64, s_off: 0, d_off: 0 },
Cfg { len: 512, s_off: 0, d_off: 0 },
Cfg { len: 4096, s_off: 0, d_off: 0 },
Cfg { len: MEG1, s_off: 0, d_off: 0 },
],
setup = setup
)]
#[benches::offset(
// Both at the same offset
args = [
Cfg { len: 16, s_off: 65, d_off: 65 },
Cfg { len: 32, s_off: 65, d_off: 65 },
Cfg { len: 64, s_off: 65, d_off: 65 },
Cfg { len: 512, s_off: 65, d_off: 65 },
Cfg { len: 4096, s_off: 65, d_off: 65 },
Cfg { len: MEG1, s_off: 65, d_off: 65 },
],
setup = setup
)]
#[benches::misaligned(
// `src` and `dst` both misaligned by different amounts
args = [
Cfg { len: 16, s_off: 65, d_off: 66 },
Cfg { len: 32, s_off: 65, d_off: 66 },
Cfg { len: 64, s_off: 65, d_off: 66 },
Cfg { len: 512, s_off: 65, d_off: 66 },
Cfg { len: 4096, s_off: 65, d_off: 66 },
Cfg { len: MEG1, s_off: 65, d_off: 66 },
],
setup = setup
)]
fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) {
unsafe {
black_box(memcmp(
black_box(dst.as_mut_ptr()),
black_box(src.as_ptr()),
black_box(len),
));
}
}
library_benchmark_group!(name = memcmp; benchmarks = bench);
}
mod mmove {
use Spread::{Aligned, Large, Medium, Small};
use super::*;
struct Cfg {
len: usize,
spread: Spread,
off: usize,
}
enum Spread {
/// `src` and `dst` are close and have the same alignment (or offset).
Aligned,
/// `src` and `dst` are close.
Small,
/// `src` and `dst` are halfway offset in the buffer.
Medium,
/// `src` and `dst` only overlap by a single byte.
Large,
}
// Note that small and large are
fn calculate_spread(len: usize, spread: Spread) -> usize {
match spread {
// Note that this test doesn't make sense for lengths less than len=128
Aligned => {
assert!(len > MAX_ALIGN, "aligned memset would have no overlap");
MAX_ALIGN
}
Small => 1,
Medium => (len / 2) + 1, // add 1 so all are misaligned
Large => len - 1,
}
}
fn setup_forward(cfg: Cfg) -> (usize, usize, AlignedSlice) {
let Cfg { len, spread, off } = cfg;
let spread = calculate_spread(len, spread);
println!("bytes: {len}, spread: {spread}, offset: {off}, forward");
assert!(spread < len, "memmove tests should have some overlap");
let mut buf = AlignedSlice::new_zeroed(len + spread, off);
let mut fill: usize = 0;
buf[..len].fill_with(|| {
fill += 1;
fill as u8
});
(len, spread, buf)
}
fn setup_backward(cfg: Cfg) -> (usize, usize, AlignedSlice) {
let Cfg { len, spread, off } = cfg;
let spread = calculate_spread(len, spread);
println!("bytes: {len}, spread: {spread}, offset: {off}, backward");
assert!(spread < len, "memmove tests should have some overlap");
let mut buf = AlignedSlice::new_zeroed(len + spread, off);
let mut fill: usize = 0;
buf[spread..].fill_with(|| {
fill += 1;
fill as u8
});
(len, spread, buf)
}
#[library_benchmark]
#[benches::aligned(
args = [
// Don't test small spreads since there is no overlap
Cfg { len: 4096, spread: Aligned, off: 0 },
Cfg { len: MEG1, spread: Aligned, off: 0 },
],
setup = setup_forward
)]
#[benches::small_spread(
args = [
Cfg { len: 16, spread: Small, off: 0 },
Cfg { len: 32, spread: Small, off: 0 },
Cfg { len: 64, spread: Small, off: 0 },
Cfg { len: 512, spread: Small, off: 0 },
Cfg { len: 4096, spread: Small, off: 0 },
Cfg { len: MEG1, spread: Small, off: 0 },
],
setup = setup_forward
)]
#[benches::medium_spread(
args = [
Cfg { len: 16, spread: Medium, off: 0 },
Cfg { len: 32, spread: Medium, off: 0 },
Cfg { len: 64, spread: Medium, off: 0 },
Cfg { len: 512, spread: Medium, off: 0 },
Cfg { len: 4096, spread: Medium, off: 0 },
Cfg { len: MEG1, spread: Medium, off: 0 },
],
setup = setup_forward
)]
#[benches::large_spread(
args = [
Cfg { len: 16, spread: Large, off: 0 },
Cfg { len: 32, spread: Large, off: 0 },
Cfg { len: 64, spread: Large, off: 0 },
Cfg { len: 512, spread: Large, off: 0 },
Cfg { len: 4096, spread: Large, off: 0 },
Cfg { len: MEG1, spread: Large, off: 0 },
],
setup = setup_forward
)]
#[benches::aligned_off(
args = [
Cfg { len: 4096, spread: Aligned, off: 65 },
Cfg { len: MEG1, spread: Aligned, off: 65 },
],
setup = setup_forward
)]
#[benches::small_spread_off(
args = [
Cfg { len: 16, spread: Small, off: 65 },
Cfg { len: 32, spread: Small, off: 65 },
Cfg { len: 64, spread: Small, off: 65 },
Cfg { len: 512, spread: Small, off: 65 },
Cfg { len: 4096, spread: Small, off: 65 },
Cfg { len: MEG1, spread: Small, off: 65 },
],
setup = setup_forward
)]
#[benches::medium_spread_off(
args = [
Cfg { len: 16, spread: Medium, off: 65 },
Cfg { len: 32, spread: Medium, off: 65 },
Cfg { len: 64, spread: Medium, off: 65 },
Cfg { len: 512, spread: Medium, off: 65 },
Cfg { len: 4096, spread: Medium, off: 65 },
Cfg { len: MEG1, spread: Medium, off: 65 },
],
setup = setup_forward
)]
#[benches::large_spread_off(
args = [
Cfg { len: 16, spread: Large, off: 65 },
Cfg { len: 32, spread: Large, off: 65 },
Cfg { len: 64, spread: Large, off: 65 },
Cfg { len: 512, spread: Large, off: 65 },
Cfg { len: 4096, spread: Large, off: 65 },
Cfg { len: MEG1, spread: Large, off: 65 },
],
setup = setup_forward
)]
fn forward((len, spread, mut buf): (usize, usize, AlignedSlice)) {
// Test moving from the start of the buffer toward the end
unsafe {
black_box(memmove(
black_box(buf[spread..].as_mut_ptr()),
black_box(buf.as_ptr()),
black_box(len),
));
}
}
#[library_benchmark]
#[benches::aligned(
args = [
// Don't test small spreads since there is no overlap
Cfg { len: 4096, spread: Aligned, off: 0 },
Cfg { len: MEG1, spread: Aligned, off: 0 },
],
setup = setup_backward
)]
#[benches::small_spread(
args = [
Cfg { len: 16, spread: Small, off: 0 },
Cfg { len: 32, spread: Small, off: 0 },
Cfg { len: 64, spread: Small, off: 0 },
Cfg { len: 512, spread: Small, off: 0 },
Cfg { len: 4096, spread: Small, off: 0 },
Cfg { len: MEG1, spread: Small, off: 0 },
],
setup = setup_backward
)]
#[benches::medium_spread(
args = [
Cfg { len: 16, spread: Medium, off: 0 },
Cfg { len: 32, spread: Medium, off: 0 },
Cfg { len: 64, spread: Medium, off: 0 },
Cfg { len: 512, spread: Medium, off: 0 },
Cfg { len: 4096, spread: Medium, off: 0 },
Cfg { len: MEG1, spread: Medium, off: 0 },
],
setup = setup_backward
)]
#[benches::large_spread(
args = [
Cfg { len: 16, spread: Large, off: 0 },
Cfg { len: 32, spread: Large, off: 0 },
Cfg { len: 64, spread: Large, off: 0 },
Cfg { len: 512, spread: Large, off: 0 },
Cfg { len: 4096, spread: Large, off: 0 },
Cfg { len: MEG1, spread: Large, off: 0 },
],
setup = setup_backward
)]
#[benches::aligned_off(
args = [
// Don't test small spreads since there is no overlap
Cfg { len: 4096, spread: Aligned, off: 65 },
Cfg { len: MEG1, spread: Aligned, off: 65 },
],
setup = setup_backward
)]
#[benches::small_spread_off(
args = [
Cfg { len: 16, spread: Small, off: 65 },
Cfg { len: 32, spread: Small, off: 65 },
Cfg { len: 64, spread: Small, off: 65 },
Cfg { len: 512, spread: Small, off: 65 },
Cfg { len: 4096, spread: Small, off: 65 },
Cfg { len: MEG1, spread: Small, off: 65 },
],
setup = setup_backward
)]
#[benches::medium_spread_off(
args = [
Cfg { len: 16, spread: Medium, off: 65 },
Cfg { len: 32, spread: Medium, off: 65 },
Cfg { len: 64, spread: Medium, off: 65 },
Cfg { len: 512, spread: Medium, off: 65 },
Cfg { len: 4096, spread: Medium, off: 65 },
Cfg { len: MEG1, spread: Medium, off: 65 },
],
setup = setup_backward
)]
#[benches::large_spread_off(
args = [
Cfg { len: 16, spread: Large, off: 65 },
Cfg { len: 32, spread: Large, off: 65 },
Cfg { len: 64, spread: Large, off: 65 },
Cfg { len: 512, spread: Large, off: 65 },
Cfg { len: 4096, spread: Large, off: 65 },
Cfg { len: MEG1, spread: Large, off: 65 },
],
setup = setup_backward
)]
fn backward((len, spread, mut buf): (usize, usize, AlignedSlice)) {
// Test moving from the end of the buffer toward the start
unsafe {
black_box(memmove(
black_box(buf.as_mut_ptr()),
black_box(buf[spread..].as_ptr()),
black_box(len),
));
}
}
library_benchmark_group!(name = memmove; benchmarks = forward, backward);
}
use mcmp::memcmp;
use mcpy::memcpy;
use mmove::memmove;
use mset::memset;
main!(library_benchmark_groups = memcpy, memset, memcmp, memmove);

View file

@ -0,0 +1,120 @@
use std::collections::HashSet;
mod builtins_configure {
include!("../compiler-builtins/configure.rs");
}
/// Features to enable
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
enum Feature {
NoSysF128,
NoSysF128IntConvert,
NoSysF16,
NoSysF16F64Convert,
NoSysF16F128Convert,
}
impl Feature {
fn implies(self) -> &'static [Self] {
match self {
Self::NoSysF128 => [Self::NoSysF128IntConvert, Self::NoSysF16F128Convert].as_slice(),
Self::NoSysF128IntConvert => [].as_slice(),
Self::NoSysF16 => [Self::NoSysF16F64Convert, Self::NoSysF16F128Convert].as_slice(),
Self::NoSysF16F64Convert => [].as_slice(),
Self::NoSysF16F128Convert => [].as_slice(),
}
}
}
fn main() {
println!("cargo::rerun-if-changed=../configure.rs");
let target = builtins_configure::Target::from_env();
let mut features = HashSet::new();
// These platforms do not have f128 symbols available in their system libraries, so
// skip related tests.
if target.arch == "arm"
|| target.vendor == "apple"
|| target.env == "msvc"
// GCC and LLVM disagree on the ABI of `f16` and `f128` with MinGW. See
// <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054>.
|| (target.os == "windows" && target.env == "gnu")
// FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86.
// See <https://github.com/llvm/llvm-project/issues/77401>.
|| target.arch == "x86"
// 32-bit PowerPC and 64-bit LE gets code generated that Qemu cannot handle. See
// <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105635926>.
|| target.arch == "powerpc"
|| target.arch == "powerpc64le"
// FIXME: We get different results from the builtin functions. See
// <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105657287>.
|| target.arch == "powerpc64"
{
features.insert(Feature::NoSysF128);
}
if target.arch == "x86" {
// 32-bit x86 does not have `__fixunstfti`/`__fixtfti` but does have everything else
features.insert(Feature::NoSysF128IntConvert);
// FIXME: 32-bit x86 has a bug in `f128 -> f16` system libraries
features.insert(Feature::NoSysF16F128Convert);
}
// These platforms do not have f16 symbols available in their system libraries, so
// skip related tests. Most of these are missing `f16 <-> f32` conversion routines.
if (target.arch == "aarch64" && target.os == "linux")
|| target.arch.starts_with("arm")
|| target.arch == "powerpc"
|| target.arch == "powerpc64"
|| target.arch == "powerpc64le"
|| target.arch == "loongarch64"
|| (target.arch == "x86" && !target.has_feature("sse"))
|| target.os == "windows"
// Linking says "error: function signature mismatch: __extendhfsf2" and seems to
// think the signature is either `(i32) -> f32` or `(f32) -> f32`. See
// <https://github.com/llvm/llvm-project/issues/96438>.
|| target.arch == "wasm32"
|| target.arch == "wasm64"
{
features.insert(Feature::NoSysF16);
}
// These platforms are missing either `__extendhfdf2` or `__truncdfhf2`.
if target.vendor == "apple" || target.os == "windows" {
features.insert(Feature::NoSysF16F64Convert);
}
// Add implied features. Collection is required for borrows.
features.extend(
features
.iter()
.flat_map(|x| x.implies())
.copied()
.collect::<Vec<_>>(),
);
for feature in features {
let (name, warning) = match feature {
Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"),
Feature::NoSysF128IntConvert => (
"no-sys-f128-int-convert",
"using apfloat fallback for f128 <-> int conversions",
),
Feature::NoSysF16F64Convert => (
"no-sys-f16-f64-convert",
"using apfloat fallback for f16 <-> f64 conversions",
),
Feature::NoSysF16F128Convert => (
"no-sys-f16-f128-convert",
"using apfloat fallback for f16 <-> f128 conversions",
),
Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"),
};
println!("cargo:warning={warning}");
println!("cargo:rustc-cfg=feature=\"{name}\"");
}
builtins_configure::configure_aliases(&target);
builtins_configure::configure_f16_f128(&target);
}

View file

@ -0,0 +1,366 @@
use alloc::vec::Vec;
use core::cell::RefCell;
use compiler_builtins::float::Float;
/// Fuzz with these many items to ensure equal functions
pub const CHECK_ITER_ITEMS: u32 = 10_000;
/// Benchmark with this many items to get a variety
pub const BENCH_ITER_ITEMS: u32 = 500;
/// Still run benchmarks/tests but don't check correctness between compiler-builtins and
/// builtin system functions functions
pub fn skip_sys_checks(test_name: &str) -> bool {
const ALWAYS_SKIPPED: &[&str] = &[
// FIXME(f16_f128): system symbols have incorrect results
// <https://github.com/rust-lang/compiler-builtins/issues/617>
"extend_f16_f32",
"trunc_f32_f16",
"trunc_f64_f16",
// FIXME(#616): re-enable once fix is in nightly
// <https://github.com/rust-lang/compiler-builtins/issues/616>
"mul_f32",
"mul_f64",
];
// FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely
// in their benchmark modules due to runtime panics.
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"];
// FIXME(f16_f128): system symbols have incorrect results
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
const X86_NO_SSE_SKIPPED: &[&str] = &[
"add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64",
];
// FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer
// uses `compiler-rt` version.
// <https://github.com/llvm/llvm-project/issues/91840>
const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"];
// FIXME(llvm): system symbols have incorrect results on Windows
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>
const WINDOWS_SKIPPED: &[&str] = &[
"conv_f32_u128",
"conv_f32_i128",
"conv_f64_u128",
"conv_f64_i128",
];
if cfg!(target_arch = "arm") {
// The Arm symbols need a different ABI that our macro doesn't handle, just skip it
return true;
}
if ALWAYS_SKIPPED.contains(&test_name) {
return true;
}
if cfg!(all(target_arch = "powerpc64", target_endian = "little"))
&& PPC64LE_SKIPPED.contains(&test_name)
{
return true;
}
if cfg!(all(target_arch = "x86", not(target_feature = "sse")))
&& X86_NO_SSE_SKIPPED.contains(&test_name)
{
return true;
}
if cfg!(target_arch = "aarch64") && AARCH64_SKIPPED.contains(&test_name) {
return true;
}
if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) {
return true;
}
false
}
/// Still run benchmarks/tests but don't check correctness between compiler-builtins and
/// assembly functions
pub fn skip_asm_checks(_test_name: &str) -> bool {
// Nothing to skip at this time
false
}
/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten
/// assembly.
///
/// # Safety
///
/// The signature must be correct and any assembly must be sound.
#[macro_export]
macro_rules! float_bench {
(
// Name of this benchmark
name: $name:ident,
// The function signature to be tested
sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty,
// Path to the crate in compiler_builtins
crate_fn: $crate_fn:path,
// Optional alias on ppc
$( crate_fn_ppc: $crate_fn_ppc:path, )?
// Name of the system symbol
sys_fn: $sys_fn:ident,
// Optional alias on ppc
$( sys_fn_ppc: $sys_fn_ppc:path, )?
// Meta saying whether the system symbol is available
sys_available: $sys_available:meta,
// An optional function to validate the results of two functions are equal, if not
// just `$ret_ty::check_eq`
$( output_eq: $output_eq:expr, )?
// Assembly implementations, if any.
asm: [
$(
#[cfg($asm_meta:meta)] {
$($asm_tt:tt)*
}
);*
$(;)?
]
$(,)?
) => {paste::paste! {
// SAFETY: macro invocation must use the correct signature
#[cfg($sys_available)]
unsafe extern "C" {
/// Binding for the system function
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty;
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
float_bench! { @coalesce_fn $($sys_fn_ppc)? =>
fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty;
}
}
fn $name(c: &mut Criterion) {
use core::hint::black_box;
use compiler_builtins::float::Float;
use $crate::bench::TestIO;
#[inline(never)] // equalize with external calls
fn crate_fn($($arg: $arg_ty),*) -> $ret_ty {
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
let target_crate_fn = $crate_fn;
// On PPC, use an alias if specified
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
let target_crate_fn = float_bench!(@coalesce $($crate_fn_ppc)?, $crate_fn);
target_crate_fn( $($arg),* )
}
#[inline(always)] // already a branch
#[cfg($sys_available)]
fn sys_fn($($arg: $arg_ty),*) -> $ret_ty {
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
let target_sys_fn = $sys_fn;
// On PPC, use an alias if specified
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
let target_sys_fn = float_bench!(@coalesce $($sys_fn_ppc)?, $sys_fn);
unsafe { target_sys_fn( $($arg),* ) }
}
#[inline(never)] // equalize with external calls
#[cfg(any( $($asm_meta),* ))]
fn asm_fn($(mut $arg: $arg_ty),*) -> $ret_ty {
use core::arch::asm;
$(
#[cfg($asm_meta)]
unsafe { $($asm_tt)* }
)*
}
let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS);
let benchvec = <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS);
let test_name = stringify!($name);
let check_eq = float_bench!(@coalesce $($output_eq)?, $ret_ty::check_eq);
// Verify math lines up. We run the crate functions even if we don't validate the
// output here to make sure there are no panics or crashes.
#[cfg($sys_available)]
for ($($arg),*) in testvec.iter().copied() {
let crate_res = crate_fn($($arg),*);
let sys_res = sys_fn($($arg),*);
if $crate::bench::skip_sys_checks(test_name) {
continue;
}
assert!(
check_eq(crate_res, sys_res),
"{test_name}{:?}: crate: {crate_res:?}, sys: {sys_res:?}",
($($arg),* ,)
);
}
#[cfg(any( $($asm_meta),* ))]
{
for ($($arg),*) in testvec.iter().copied() {
let crate_res = crate_fn($($arg),*);
let asm_res = asm_fn($($arg),*);
if $crate::bench::skip_asm_checks(test_name) {
continue;
}
assert!(
check_eq(crate_res, asm_res),
"{test_name}{:?}: crate: {crate_res:?}, asm: {asm_res:?}",
($($arg),* ,)
);
}
}
let mut group = c.benchmark_group(test_name);
group.bench_function("compiler-builtins", |b| b.iter(|| {
for ($($arg),*) in benchvec.iter().copied() {
black_box(crate_fn( $(black_box($arg)),* ));
}
}));
#[cfg($sys_available)]
group.bench_function("system", |b| b.iter(|| {
for ($($arg),*) in benchvec.iter().copied() {
black_box(sys_fn( $(black_box($arg)),* ));
}
}));
#[cfg(any( $($asm_meta),* ))]
group.bench_function(&format!(
"assembly ({} {})", std::env::consts::ARCH, std::env::consts::FAMILY
), |b| b.iter(|| {
for ($($arg),*) in benchvec.iter().copied() {
black_box(asm_fn( $(black_box($arg)),* ));
}
}));
group.finish();
}
}};
// Allow overriding a default
(@coalesce $specified:expr, $default:expr) => { $specified };
(@coalesce, $default:expr) => { $default };
// Allow overriding a function name
(@coalesce_fn $specified:ident => fn $default_name:ident $($tt:tt)+) => {
fn $specified $($tt)+
};
(@coalesce_fn => fn $default_name:ident $($tt:tt)+) => {
fn $default_name $($tt)+
};
}
/// A type used as either an input or output to/from a benchmark function.
pub trait TestIO: Sized {
fn make_testvec(len: u32) -> Vec<Self>;
fn check_eq(a: Self, b: Self) -> bool;
}
macro_rules! impl_testio {
(float $($f_ty:ty),+) => {$(
impl TestIO for $f_ty {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ret = RefCell::new(Vec::new());
crate::fuzz_float(len, |a| ret.borrow_mut().push(a));
ret.into_inner()
}
fn check_eq(a: Self, b: Self) -> bool {
Float::eq_repr(a, b)
}
}
impl TestIO for ($f_ty, $f_ty) {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ret = RefCell::new(Vec::new());
crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b)));
ret.into_inner()
}
fn check_eq(_a: Self, _b: Self) -> bool {
unimplemented!()
}
}
)*};
(int $($i_ty:ty),+) => {$(
impl TestIO for $i_ty {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ret = RefCell::new(Vec::new());
crate::fuzz(len, |a| ret.borrow_mut().push(a));
ret.into_inner()
}
fn check_eq(a: Self, b: Self) -> bool {
a == b
}
}
impl TestIO for ($i_ty, $i_ty) {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ret = RefCell::new(Vec::new());
crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b)));
ret.into_inner()
}
fn check_eq(_a: Self, _b: Self) -> bool {
unimplemented!()
}
}
)*};
((float, int) ($f_ty:ty, $i_ty:ty)) => {
impl TestIO for ($f_ty, $i_ty) {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ivec = RefCell::new(Vec::new());
let fvec = RefCell::new(Vec::new());
crate::fuzz(len.isqrt(), |a| ivec.borrow_mut().push(a));
crate::fuzz_float(len.isqrt(), |a| fvec.borrow_mut().push(a));
let mut ret = Vec::new();
let ivec = ivec.into_inner();
let fvec = fvec.into_inner();
for f in fvec {
for i in &ivec {
ret.push((f, *i));
}
}
ret
}
fn check_eq(_a: Self, _b: Self) -> bool {
unimplemented!()
}
}
}
}
#[cfg(f16_enabled)]
impl_testio!(float f16);
impl_testio!(float f32, f64);
#[cfg(f128_enabled)]
impl_testio!(float f128);
impl_testio!(int i16, i32, i64, i128);
impl_testio!(int u16, u32, u64, u128);
impl_testio!((float, int)(f32, i32));
impl_testio!((float, int)(f64, i32));
#[cfg(f128_enabled)]
impl_testio!((float, int)(f128, i32));

View file

@ -0,0 +1,337 @@
//! This crate is for integration testing and fuzz testing of functions in `compiler-builtins`. This
//! includes publicly documented intrinsics and some internal alternative implementation functions
//! such as `usize_leading_zeros_riscv` (which are tested because they are configured for
//! architectures not tested by the CI).
//!
//! The general idea is to use a combination of edge case testing and randomized fuzz testing. The
//! edge case testing is crucial for checking cases like where both inputs are equal or equal to
//! special values such as `i128::MIN`, which is unlikely for the random fuzzer by itself to
//! encounter. The randomized fuzz testing is specially designed to cover wide swaths of search
//! space in as few iterations as possible. See `fuzz_values` in `builtins-test/tests/misc.rs` for
//! an example.
//!
//! Some floating point tests are disabled for specific architectures, because they do not have
//! correct rounding.
#![no_std]
#![cfg_attr(f128_enabled, feature(f128))]
#![cfg_attr(f16_enabled, feature(f16))]
pub mod bench;
extern crate alloc;
use compiler_builtins::float::Float;
use compiler_builtins::int::{Int, MinInt};
use rand_xoshiro::Xoshiro128StarStar;
use rand_xoshiro::rand_core::{RngCore, SeedableRng};
/// Sets the number of fuzz iterations run for most tests. In practice, the vast majority of bugs
/// are caught by the edge case testers. Most of the remaining bugs triggered by more complex
/// sequences are caught well within 10_000 fuzz iterations. For classes of algorithms like division
/// that are vulnerable to rare edge cases, we want 1_000_000 iterations to be more confident. In
/// practical CI, however, we only want to run the more strenuous test once to catch algorithmic
/// level bugs, and run the 10_000 iteration test on most targets. Target-dependent bugs are likely
/// to involve miscompilation and misconfiguration that is likely to break algorithms in quickly
/// caught ways. We choose to configure `N = 1_000_000` iterations for `x86_64` targets (and if
/// debug assertions are disabled. Tests without `--release` would take too long) which are likely
/// to have fast hardware, and run `N = 10_000` for all other targets.
pub const N: u32 = if cfg!(target_arch = "x86_64") && !cfg!(debug_assertions) {
1_000_000
} else {
10_000
};
/// Random fuzzing step. When run several times, it results in excellent fuzzing entropy such as:
/// 11110101010101011110111110011111
/// 10110101010100001011101011001010
/// 1000000000000000
/// 10000000000000110111110000001010
/// 1111011111111101010101111110101
/// 101111111110100000000101000000
/// 10000000110100000000100010101
/// 1010101010101000
fn fuzz_step<I: Int>(rng: &mut Xoshiro128StarStar, x: &mut I) {
let ones = !I::ZERO;
let bit_indexing_mask: u32 = I::BITS - 1;
// It happens that all the RNG we need can come from one call. 7 bits are needed to index a
// worst case 128 bit integer, and there are 4 indexes that need to be made plus 4 bits for
// selecting operations
let rng32 = rng.next_u32();
// Randomly OR, AND, and XOR randomly sized and shifted continuous strings of
// ones with `lhs` and `rhs`.
let r0 = bit_indexing_mask & rng32;
let r1 = bit_indexing_mask & (rng32 >> 7);
let mask = ones.wrapping_shl(r0).rotate_left(r1);
match (rng32 >> 14) % 4 {
0 => *x |= mask,
1 => *x &= mask,
// both 2 and 3 to make XORs as common as ORs and ANDs combined
_ => *x ^= mask,
}
// Alternating ones and zeros (e.x. 0b1010101010101010). This catches second-order
// problems that might occur for algorithms with two modes of operation (potentially
// there is some invariant that can be broken and maintained via alternating between modes,
// breaking the algorithm when it reaches the end).
let mut alt_ones = I::ONE;
for _ in 0..(I::BITS / 2) {
alt_ones <<= 2;
alt_ones |= I::ONE;
}
let r0 = bit_indexing_mask & (rng32 >> 16);
let r1 = bit_indexing_mask & (rng32 >> 23);
let mask = alt_ones.wrapping_shl(r0).rotate_left(r1);
match rng32 >> 30 {
0 => *x |= mask,
1 => *x &= mask,
_ => *x ^= mask,
}
}
// We need macros like this, because `#![no_std]` prevents us from using iterators
macro_rules! edge_cases {
($I:ident, $case:ident, $inner:block) => {
for i0 in 0..$I::FUZZ_NUM {
let mask_lo = (!$I::UnsignedInt::ZERO).wrapping_shr($I::FUZZ_LENGTHS[i0] as u32);
for i1 in i0..I::FUZZ_NUM {
let mask_hi =
(!$I::UnsignedInt::ZERO).wrapping_shl($I::FUZZ_LENGTHS[i1 - i0] as u32);
let $case = I::from_unsigned(mask_lo & mask_hi);
$inner
}
}
};
}
/// Feeds a series of fuzzing inputs to `f`. The fuzzer first uses an algorithm designed to find
/// edge cases, followed by a more random fuzzer that runs `n` times.
pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F)
where
<I as MinInt>::UnsignedInt: Int,
{
// edge case tester. Calls `f` 210 times for u128.
// zero gets skipped by the loop
f(I::ZERO);
edge_cases!(I, case, {
f(case);
});
// random fuzzer
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
let mut x: I = MinInt::ZERO;
for _ in 0..n {
fuzz_step(&mut rng, &mut x);
f(x)
}
}
/// The same as `fuzz`, except `f` has two inputs.
pub fn fuzz_2<I: Int, F: Fn(I, I)>(n: u32, f: F)
where
<I as MinInt>::UnsignedInt: Int,
{
// Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`.
edge_cases!(I, case, {
f(I::ZERO, case);
});
edge_cases!(I, case, {
f(case, I::ZERO);
});
// Nested edge tester. Calls `f` 44100 times for `u128`.
edge_cases!(I, case0, {
edge_cases!(I, case1, {
f(case0, case1);
})
});
// random fuzzer
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
let mut x: I = I::ZERO;
let mut y: I = I::ZERO;
for _ in 0..n {
fuzz_step(&mut rng, &mut x);
fuzz_step(&mut rng, &mut y);
f(x, y)
}
}
/// Tester for shift functions
pub fn fuzz_shift<I: Int, F: Fn(I, u32)>(f: F) {
// Shift functions are very simple and do not need anything other than shifting a small
// set of random patterns for every fuzz length.
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
let mut x: I = MinInt::ZERO;
for i in 0..I::FUZZ_NUM {
fuzz_step(&mut rng, &mut x);
f(x, MinInt::ZERO);
f(x, I::FUZZ_LENGTHS[i] as u32);
}
}
fn fuzz_float_step<F: Float>(rng: &mut Xoshiro128StarStar, f: &mut F) {
let rng32 = rng.next_u32();
// we need to fuzz the different parts of the float separately, because the masking on larger
// significands will tend to set the exponent to all ones or all zeros frequently
// sign bit fuzzing
let sign = (rng32 & 1) != 0;
// exponent fuzzing. Only 4 bits for the selector needed.
let ones = (F::Int::ONE << F::EXP_BITS) - F::Int::ONE;
let r0 = (rng32 >> 1) % F::EXP_BITS;
let r1 = (rng32 >> 5) % F::EXP_BITS;
// custom rotate shift. Note that `F::Int` is unsigned, so we can shift right without smearing
// the sign bit.
let mask = if r1 == 0 {
ones.wrapping_shr(r0)
} else {
let tmp = ones.wrapping_shr(r0);
(tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXP_BITS - r1)) & ones
};
let mut exp = (f.to_bits() & F::EXP_MASK) >> F::SIG_BITS;
match (rng32 >> 9) % 4 {
0 => exp |= mask,
1 => exp &= mask,
_ => exp ^= mask,
}
// significand fuzzing
let mut sig = f.to_bits() & F::SIG_MASK;
fuzz_step(rng, &mut sig);
sig &= F::SIG_MASK;
*f = F::from_parts(sign, exp, sig);
}
macro_rules! float_edge_cases {
($F:ident, $case:ident, $inner:block) => {
for exponent in [
F::Int::ZERO,
F::Int::ONE,
F::Int::ONE << (F::EXP_BITS / 2),
(F::Int::ONE << (F::EXP_BITS - 1)) - F::Int::ONE,
F::Int::ONE << (F::EXP_BITS - 1),
(F::Int::ONE << (F::EXP_BITS - 1)) + F::Int::ONE,
(F::Int::ONE << F::EXP_BITS) - F::Int::ONE,
]
.iter()
{
for significand in [
F::Int::ZERO,
F::Int::ONE,
F::Int::ONE << (F::SIG_BITS / 2),
(F::Int::ONE << (F::SIG_BITS - 1)) - F::Int::ONE,
F::Int::ONE << (F::SIG_BITS - 1),
(F::Int::ONE << (F::SIG_BITS - 1)) + F::Int::ONE,
(F::Int::ONE << F::SIG_BITS) - F::Int::ONE,
]
.iter()
{
for sign in [false, true].iter() {
let $case = F::from_parts(*sign, *exponent, *significand);
$inner
}
}
}
};
}
pub fn fuzz_float<F: Float, E: Fn(F)>(n: u32, f: E) {
float_edge_cases!(F, case, {
f(case);
});
// random fuzzer
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
let mut x = F::ZERO;
for _ in 0..n {
fuzz_float_step(&mut rng, &mut x);
f(x);
}
}
pub fn fuzz_float_2<F: Float, E: Fn(F, F)>(n: u32, f: E) {
float_edge_cases!(F, case0, {
float_edge_cases!(F, case1, {
f(case0, case1);
});
});
// random fuzzer
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
let mut x = F::ZERO;
let mut y = F::ZERO;
for _ in 0..n {
fuzz_float_step(&mut rng, &mut x);
fuzz_float_step(&mut rng, &mut y);
f(x, y)
}
}
/// Perform an operation using builtin types if available, falling back to apfloat if not.
#[macro_export]
macro_rules! apfloat_fallback {
(
$float_ty:ty,
// Type name in `rustc_apfloat::ieee`. Not a full path, it automatically gets the prefix.
$apfloat_ty:ident,
// Cfg expression for when builtin system operations should be used
$sys_available:meta,
// The expression to run. This expression may use `FloatTy` for its signature.
// Optionally, the final conversion back to a float can be suppressed using
// `=> no_convert` (for e.g. operations that return a bool).
//
// If the apfloat needs a different operation, it can be provided here.
$op:expr $(=> $convert:ident)? $(; $apfloat_op:expr)?,
// Arguments that get passed to `$op` after converting to a float
$($arg:expr),+
$(,)?
) => {{
#[cfg($sys_available)]
let ret = {
type FloatTy = $float_ty;
$op( $($arg),+ )
};
#[cfg(not($sys_available))]
let ret = {
use rustc_apfloat::Float;
type FloatTy = rustc_apfloat::ieee::$apfloat_ty;
apfloat_fallback!(@inner
fty: $float_ty,
// Apply a conversion to `FloatTy` to each arg, then pass all args to `$op`
op_res: $op( $(FloatTy::from_bits($arg.to_bits().into())),+ ),
$(apfloat_op: $apfloat_op, )?
$(conv_opts: $convert,)?
args: $($arg),+
)
};
ret
}};
// Operations that do not need converting back to a float
(@inner fty: $float_ty:ty, op_res: $val:expr, conv_opts: no_convert, args: $($_arg:expr),+) => {
$val
};
// Some apfloat operations return a `StatusAnd` that we need to extract the value from. This
// is the default.
(@inner fty: $float_ty:ty, op_res: $val:expr, args: $($_arg:expr),+) => {{
// ignore the status, just get the value
let unwrapped = $val.value;
<$float_ty>::from_bits(FloatTy::to_bits(unwrapped).try_into().unwrap())
}};
// This is the case where we can't use the same expression for the default builtin and
// nonstandard apfloat fallback (e.g. `as` casts in std are normal functions in apfloat, so
// two separate expressions must be specified.
(@inner
fty: $float_ty:ty, op_res: $_val:expr,
apfloat_op: $apfloat_op:expr, args: $($arg:expr),+
) => {{
$apfloat_op($($arg),+)
}};
}

View file

@ -0,0 +1,143 @@
#![allow(unused_macros)]
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::*;
mod int_addsub {
use super::*;
macro_rules! sum {
($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => {
$(
#[test]
fn $fn_add() {
use compiler_builtins::int::addsub::{$fn_add, $fn_sub};
fuzz_2(N, |x: $i, y: $i| {
let add0 = x.wrapping_add(y);
let sub0 = x.wrapping_sub(y);
let add1: $i = $fn_add(x, y);
let sub1: $i = $fn_sub(x, y);
if add0 != add1 {
panic!(
"{}({}, {}): std: {}, builtins: {}",
stringify!($fn_add), x, y, add0, add1
);
}
if sub0 != sub1 {
panic!(
"{}({}, {}): std: {}, builtins: {}",
stringify!($fn_sub), x, y, sub0, sub1
);
}
});
}
)*
};
}
macro_rules! overflowing_sum {
($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => {
$(
#[test]
fn $fn_add() {
use compiler_builtins::int::addsub::{$fn_add, $fn_sub};
fuzz_2(N, |x: $i, y: $i| {
let (add0, add_o0)= x.overflowing_add(y);
let (sub0, sub_o0)= x.overflowing_sub(y);
let mut add_o1 = 0;
let mut sub_o1 = 0;
let add1: $i = $fn_add(x, y, &mut add_o1);
let sub1: $i = $fn_sub(x, y, &mut sub_o1);
if add0 != add1 || i32::from(add_o0) != add_o1 {
panic!(
"{}({}, {}): std: {:?}, builtins: {:?}",
stringify!($fn_add), x, y, (add0, add_o0) , (add1, add_o1)
);
}
if sub0 != sub1 || i32::from(sub_o0) != sub_o1 {
panic!(
"{}({}, {}): std: {:?}, builtins: {:?}",
stringify!($fn_sub), x, y, (sub0, sub_o0) , (sub1, sub_o1)
);
}
});
}
)*
};
}
// Integer addition and subtraction is very simple, so 100 fuzzing passes should be plenty.
sum! {
u128, __rust_u128_add, __rust_u128_sub;
i128, __rust_i128_add, __rust_i128_sub;
}
overflowing_sum! {
u128, __rust_u128_addo, __rust_u128_subo;
i128, __rust_i128_addo, __rust_i128_subo;
}
}
macro_rules! float_sum {
($($f:ty, $fn_add:ident, $fn_sub:ident, $apfloat_ty:ident, $sys_available:meta);*;) => {
$(
#[test]
fn $fn_add() {
use core::ops::{Add, Sub};
use compiler_builtins::float::{{add::$fn_add, sub::$fn_sub}, Float};
fuzz_float_2(N, |x: $f, y: $f| {
let add0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Add::add, x, y);
let sub0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Sub::sub, x, y);
let add1: $f = $fn_add(x, y);
let sub1: $f = $fn_sub(x, y);
if !Float::eq_repr(add0, add1) {
panic!(
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
stringify!($fn_add), x, y, add0, add1
);
}
if !Float::eq_repr(sub0, sub1) {
panic!(
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
stringify!($fn_sub), x, y, sub0, sub1
);
}
});
}
)*
}
}
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
mod float_addsub {
use super::*;
float_sum! {
f32, __addsf3, __subsf3, Single, all();
f64, __adddf3, __subdf3, Double, all();
}
}
#[cfg(f128_enabled)]
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
mod float_addsub_f128 {
use super::*;
float_sum! {
f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128");
}
}
#[cfg(f128_enabled)]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
mod float_addsub_f128_ppc {
use super::*;
float_sum! {
f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128");
}
}

View file

@ -0,0 +1,60 @@
#![cfg(all(
target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"
))]
#![feature(compiler_builtins_lib)]
#![no_std]
extern crate compiler_builtins;
// test runner
extern crate utest_cortex_m_qemu;
// overrides `panic!`
#[macro_use]
extern crate utest_macros;
use core::mem;
macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}
extern "C" {
fn __aeabi_memclr4(dest: *mut u8, n: usize);
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
}
struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}
impl Aligned {
fn new() -> Self {
Aligned {
array: [0; 8],
_alignment: [],
}
}
}
#[test]
fn memclr4() {
let mut aligned = Aligned::new();
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
for n in 0..9 {
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, 0xff);
__aeabi_memclr4(xs.as_mut_ptr(), n);
}
assert!(xs[0..n].iter().all(|x| *x == 0));
}
}

View file

@ -0,0 +1,71 @@
#![cfg(all(
target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"
))]
#![feature(compiler_builtins_lib)]
#![no_std]
extern crate compiler_builtins;
// test runner
extern crate utest_cortex_m_qemu;
// overrides `panic!`
#[macro_use]
extern crate utest_macros;
macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}
extern "C" {
fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
}
struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}
impl Aligned {
fn new(array: [u8; 8]) -> Self {
Aligned {
array: array,
_alignment: [],
}
}
}
#[test]
fn memcpy() {
let mut dest = [0; 4];
let src = [0xde, 0xad, 0xbe, 0xef];
for n in 0..dest.len() {
dest.copy_from_slice(&[0; 4]);
unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }
assert_eq!(&dest[0..n], &src[0..n])
}
}
#[test]
fn memcpy4() {
let mut aligned = Aligned::new([0; 8]);
let dest = &mut aligned.array;
let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];
for n in 0..dest.len() {
dest.copy_from_slice(&[0; 8]);
unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }
assert_eq!(&dest[0..n], &src[0..n])
}
}

View file

@ -0,0 +1,240 @@
#![cfg(all(
target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"
))]
#![feature(compiler_builtins_lib)]
#![no_std]
extern crate compiler_builtins;
// test runner
extern crate utest_cortex_m_qemu;
// overrides `panic!`
#[macro_use]
extern crate utest_macros;
use core::mem;
macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}
extern "C" {
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
}
struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}
impl Aligned {
fn new(array: [u8; 8]) -> Self {
Aligned {
array: array,
_alignment: [],
}
}
}
#[test]
fn zero() {
let mut aligned = Aligned::new([0u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) }
assert_eq!(*xs, [0; 8]);
let mut aligned = Aligned::new([1u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) }
assert_eq!(*xs, [1; 8]);
}
#[test]
fn one() {
let mut aligned = Aligned::new([0u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 1;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 1, 1, 1, 1, 1, 1, 1]);
}
#[test]
fn two() {
let mut aligned = Aligned::new([0u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 2;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 1, 1, 1, 1, 1, 1]);
}
#[test]
fn three() {
let mut aligned = Aligned::new([0u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 3;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 1, 1, 1, 1, 1]);
}
#[test]
fn four() {
let mut aligned = Aligned::new([0u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 4;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 1, 1, 1, 1]);
}
#[test]
fn five() {
let mut aligned = Aligned::new([0u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 5;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 1, 1, 1]);
}
#[test]
fn six() {
let mut aligned = Aligned::new([0u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 6;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1, 1]);
}
#[test]
fn seven() {
let mut aligned = Aligned::new([0u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 7;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]);
let mut aligned = Aligned::new([1u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1]);
}
#[test]
fn eight() {
let mut aligned = Aligned::new([0u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 8;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
let mut aligned = Aligned::new([1u8; 8]);
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
}

View file

@ -0,0 +1,134 @@
use compiler_builtins::int::{HInt, MinInt, i256, u256};
const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
/// Print a `u256` as hex since we can't add format implementations
fn hexu(v: u256) -> String {
format!(
"0x{:016x}{:016x}{:016x}{:016x}",
v.0[3], v.0[2], v.0[1], v.0[0]
)
}
#[test]
fn widen_u128() {
assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0]));
assert_eq!(
LOHI_SPLIT.widen(),
u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0])
);
}
#[test]
fn widen_i128() {
assert_eq!((-1i128).widen(), u256::MAX.signed());
assert_eq!(
(LOHI_SPLIT as i128).widen(),
i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX])
);
assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
}
#[test]
fn widen_mul_u128() {
let tests = [
(u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])),
(u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])),
(u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])),
(u128::MIN, u128::MIN, u256::ZERO),
(1234, 0, u256::ZERO),
(0, 1234, u256::ZERO),
];
let mut errors = Vec::new();
for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
let res = a.widen_mul(b);
let res_z = a.zero_widen_mul(b);
assert_eq!(res, res_z);
if res != exp {
errors.push((i, a, b, exp, res));
}
}
for (i, a, b, exp, res) in &errors {
eprintln!(
"FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}",
hexu(*exp),
hexu(*res)
);
}
assert!(errors.is_empty());
}
#[test]
fn not_u128() {
assert_eq!(!u256::ZERO, u256::MAX);
}
#[test]
fn shr_u128() {
let only_low = [
1,
u16::MAX.into(),
u32::MAX.into(),
u64::MAX.into(),
u128::MAX,
];
let mut errors = Vec::new();
for a in only_low {
for perturb in 0..10 {
let a = a.saturating_add(perturb);
for shift in 0..128 {
let res = a.widen() >> shift;
let expected = (a >> shift).widen();
if res != expected {
errors.push((a.widen(), shift, res, expected));
}
}
}
}
let check = [
(
u256::MAX,
1,
u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]),
),
(
u256::MAX,
5,
u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]),
),
(u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
(u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
(u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
(u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
(u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
(u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
(u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
(u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
(u256::MAX, 254, u256([0b11, 0, 0, 0])),
(u256::MAX, 255, u256([1, 0, 0, 0])),
];
for (input, shift, expected) in check {
let res = input >> shift;
if res != expected {
errors.push((input, shift, res, expected));
}
}
for (a, b, res, expected) in &errors {
eprintln!(
"FAILURE: {} >> {b} = {} got {}",
hexu(*a),
hexu(*expected),
hexu(*res),
);
}
assert!(errors.is_empty());
}

View file

@ -0,0 +1,184 @@
#![allow(unused_macros)]
#![allow(unreachable_code)]
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::*;
mod float_comparisons {
use super::*;
macro_rules! cmp {
(
$f:ty, $x:ident, $y:ident, $apfloat_ty:ident, $sys_available:meta,
$($unordered_val:expr, $fn:ident);*;
) => {
$(
let cmp0 = if apfloat_fallback!(
$f, $apfloat_ty, $sys_available,
|x: FloatTy| x.is_nan() => no_convert,
$x
) || apfloat_fallback!(
$f, $apfloat_ty, $sys_available,
|y: FloatTy| y.is_nan() => no_convert,
$y
)
{
$unordered_val
} else if apfloat_fallback!(
$f, $apfloat_ty, $sys_available,
|x, y| x < y => no_convert,
$x, $y
) {
-1
} else if apfloat_fallback!(
$f, $apfloat_ty, $sys_available,
|x, y| x == y => no_convert,
$x, $y
) {
0
} else {
1
};
let cmp1 = $fn($x, $y);
if cmp0 != cmp1 {
panic!(
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
stringify!($fn), $x, $y, cmp0, cmp1
);
}
)*
};
}
#[test]
fn cmp_f32() {
use compiler_builtins::float::cmp::{
__eqsf2, __gesf2, __gtsf2, __lesf2, __ltsf2, __nesf2, __unordsf2,
};
fuzz_float_2(N, |x: f32, y: f32| {
assert_eq!(__unordsf2(x, y) != 0, x.is_nan() || y.is_nan());
cmp!(f32, x, y, Single, all(),
1, __ltsf2;
1, __lesf2;
1, __eqsf2;
-1, __gesf2;
-1, __gtsf2;
1, __nesf2;
);
});
}
#[test]
fn cmp_f64() {
use compiler_builtins::float::cmp::{
__eqdf2, __gedf2, __gtdf2, __ledf2, __ltdf2, __nedf2, __unorddf2,
};
fuzz_float_2(N, |x: f64, y: f64| {
assert_eq!(__unorddf2(x, y) != 0, x.is_nan() || y.is_nan());
cmp!(f64, x, y, Double, all(),
1, __ltdf2;
1, __ledf2;
1, __eqdf2;
-1, __gedf2;
-1, __gtdf2;
1, __nedf2;
);
});
}
#[test]
#[cfg(f128_enabled)]
fn cmp_f128() {
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
use compiler_builtins::float::cmp::{
__eqkf2 as __eqtf2, __gekf2 as __getf2, __gtkf2 as __gttf2, __lekf2 as __letf2,
__ltkf2 as __lttf2, __nekf2 as __netf2, __unordkf2 as __unordtf2,
};
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
use compiler_builtins::float::cmp::{
__eqtf2, __getf2, __gttf2, __letf2, __lttf2, __netf2, __unordtf2,
};
fuzz_float_2(N, |x: f128, y: f128| {
let x_is_nan = apfloat_fallback!(
f128, Quad, not(feature = "no-sys-f128"),
|x: FloatTy| x.is_nan() => no_convert,
x
);
let y_is_nan = apfloat_fallback!(
f128, Quad, not(feature = "no-sys-f128"),
|x: FloatTy| x.is_nan() => no_convert,
y
);
assert_eq!(__unordtf2(x, y) != 0, x_is_nan || y_is_nan);
cmp!(f128, x, y, Quad, not(feature = "no-sys-f128"),
1, __lttf2;
1, __letf2;
1, __eqtf2;
-1, __getf2;
-1, __gttf2;
1, __netf2;
);
});
}
}
#[cfg(target_arch = "arm")]
mod float_comparisons_arm {
use super::*;
macro_rules! cmp2 {
($x:ident, $y:ident, $($unordered_val:expr, $fn_std:expr, $fn_builtins:ident);*;) => {
$(
let cmp0: i32 = if $x.is_nan() || $y.is_nan() {
$unordered_val
} else {
$fn_std as i32
};
let cmp1: i32 = $fn_builtins($x, $y);
if cmp0 != cmp1 {
panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1);
}
)*
};
}
#[test]
fn cmp_f32() {
use compiler_builtins::float::cmp::{
__aeabi_fcmpeq, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmple, __aeabi_fcmplt,
};
fuzz_float_2(N, |x: f32, y: f32| {
cmp2!(x, y,
0, x < y, __aeabi_fcmplt;
0, x <= y, __aeabi_fcmple;
0, x == y, __aeabi_fcmpeq;
0, x >= y, __aeabi_fcmpge;
0, x > y, __aeabi_fcmpgt;
);
});
}
#[test]
fn cmp_f64() {
use compiler_builtins::float::cmp::{
__aeabi_dcmpeq, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmple, __aeabi_dcmplt,
};
fuzz_float_2(N, |x: f64, y: f64| {
cmp2!(x, y,
0, x < y, __aeabi_dcmplt;
0, x <= y, __aeabi_dcmple;
0, x == y, __aeabi_dcmpeq;
0, x >= y, __aeabi_dcmpge;
0, x > y, __aeabi_dcmpgt;
);
});
}
}

View file

@ -0,0 +1,364 @@
#![cfg_attr(f128_enabled, feature(f128))]
#![cfg_attr(f16_enabled, feature(f16))]
// makes configuration easier
#![allow(unused_macros)]
#![allow(unused_imports)]
use builtins_test::*;
use compiler_builtins::float::Float;
use rustc_apfloat::{Float as _, FloatConvert as _};
mod i_to_f {
use super::*;
macro_rules! i_to_f {
($f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => {
$(
#[test]
fn $fn() {
use compiler_builtins::float::conv::$fn;
use compiler_builtins::int::Int;
fuzz(N, |x: $i_ty| {
let f0 = apfloat_fallback!(
$f_ty, $apfloat_ty, $sys_available,
|x| x as $f_ty;
// When the builtin is not available, we need to use a different conversion
// method (since apfloat doesn't support `as` casting).
|x: $i_ty| {
use compiler_builtins::int::MinInt;
let apf = if <$i_ty>::SIGNED {
FloatTy::from_i128(x.try_into().unwrap()).value
} else {
FloatTy::from_u128(x.try_into().unwrap()).value
};
<$f_ty>::from_bits(apf.to_bits())
},
x
);
let f1: $f_ty = $fn(x);
#[cfg($sys_available)] {
// This makes sure that the conversion produced the best rounding possible, and does
// this independent of `x as $into` rounding correctly.
// This assumes that float to integer conversion is correct.
let y_minus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_sub(1)) as $i_ty;
let y = f1 as $i_ty;
let y_plus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_add(1)) as $i_ty;
let error_minus = <$i_ty as Int>::abs_diff(y_minus_ulp, x);
let error = <$i_ty as Int>::abs_diff(y, x);
let error_plus = <$i_ty as Int>::abs_diff(y_plus_ulp, x);
// The first two conditions check that none of the two closest float values are
// strictly closer in representation to `x`. The second makes sure that rounding is
// towards even significand if two float values are equally close to the integer.
if error_minus < error
|| error_plus < error
|| ((error_minus == error || error_plus == error)
&& ((f0.to_bits() & 1) != 0))
{
if !cfg!(any(
target_arch = "powerpc",
target_arch = "powerpc64"
)) {
panic!(
"incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
stringify!($fn),
x,
f1.to_bits(),
y_minus_ulp,
y,
y_plus_ulp,
error_minus,
error,
error_plus,
);
}
}
}
// Test against native conversion. We disable testing on all `x86` because of
// rounding bugs with `i686`. `powerpc` also has the same rounding bug.
if !Float::eq_repr(f0, f1) && !cfg!(any(
target_arch = "x86",
target_arch = "powerpc",
target_arch = "powerpc64"
)) {
panic!(
"{}({}): std: {:?}, builtins: {:?}",
stringify!($fn),
x,
f0,
f1,
);
}
});
}
)*
};
}
i_to_f! { f32, Single, all(),
u32, __floatunsisf;
i32, __floatsisf;
u64, __floatundisf;
i64, __floatdisf;
u128, __floatuntisf;
i128, __floattisf;
}
i_to_f! { f64, Double, all(),
u32, __floatunsidf;
i32, __floatsidf;
u64, __floatundidf;
i64, __floatdidf;
u128, __floatuntidf;
i128, __floattidf;
}
#[cfg(not(feature = "no-f16-f128"))]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
u32, __floatunsitf;
i32, __floatsitf;
u64, __floatunditf;
i64, __floatditf;
u128, __floatuntitf;
i128, __floattitf;
}
#[cfg(not(feature = "no-f16-f128"))]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
u32, __floatunsikf;
i32, __floatsikf;
u64, __floatundikf;
i64, __floatdikf;
u128, __floatuntikf;
i128, __floattikf;
}
}
mod f_to_i {
use super::*;
macro_rules! f_to_i {
($x:ident, $f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => {
$(
// it is undefined behavior in the first place to do conversions with NaNs
if !apfloat_fallback!(
$f_ty, $apfloat_ty, $sys_available, |x: FloatTy| x.is_nan() => no_convert, $x
) {
let conv0 = apfloat_fallback!(
$f_ty, $apfloat_ty, $sys_available,
// Use an `as` cast when the builtin is available on the system.
|x| x as $i_ty;
// When the builtin is not available, we need to use a different conversion
// method (since apfloat doesn't support `as` casting).
|x: $f_ty| {
use compiler_builtins::int::MinInt;
let apf = FloatTy::from_bits(x.to_bits().into());
let bits: usize = <$i_ty>::BITS.try_into().unwrap();
let err_fn = || panic!(
"Unable to convert value {x:?} to type {}:", stringify!($i_ty)
);
if <$i_ty>::SIGNED {
<$i_ty>::try_from(apf.to_i128(bits).value).ok().unwrap_or_else(err_fn)
} else {
<$i_ty>::try_from(apf.to_u128(bits).value).ok().unwrap_or_else(err_fn)
}
},
$x
);
let conv1: $i_ty = $fn($x);
if conv0 != conv1 {
panic!("{}({:?}): std: {:?}, builtins: {:?}", stringify!($fn), $x, conv0, conv1);
}
}
)*
};
}
#[test]
fn f32_to_int() {
use compiler_builtins::float::conv::{
__fixsfdi, __fixsfsi, __fixsfti, __fixunssfdi, __fixunssfsi, __fixunssfti,
};
fuzz_float(N, |x: f32| {
f_to_i!(x, f32, Single, all(),
u32, __fixunssfsi;
u64, __fixunssfdi;
u128, __fixunssfti;
i32, __fixsfsi;
i64, __fixsfdi;
i128, __fixsfti;
);
});
}
#[test]
fn f64_to_int() {
use compiler_builtins::float::conv::{
__fixdfdi, __fixdfsi, __fixdfti, __fixunsdfdi, __fixunsdfsi, __fixunsdfti,
};
fuzz_float(N, |x: f64| {
f_to_i!(x, f64, Double, all(),
u32, __fixunsdfsi;
u64, __fixunsdfdi;
u128, __fixunsdfti;
i32, __fixdfsi;
i64, __fixdfdi;
i128, __fixdfti;
);
});
}
#[test]
#[cfg(f128_enabled)]
fn f128_to_int() {
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
use compiler_builtins::float::conv::{
__fixkfdi as __fixtfdi, __fixkfsi as __fixtfsi, __fixkfti as __fixtfti,
__fixunskfdi as __fixunstfdi, __fixunskfsi as __fixunstfsi,
__fixunskfti as __fixunstfti,
};
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
use compiler_builtins::float::conv::{
__fixtfdi, __fixtfsi, __fixtfti, __fixunstfdi, __fixunstfsi, __fixunstfti,
};
fuzz_float(N, |x: f128| {
f_to_i!(
x,
f128,
Quad,
not(feature = "no-sys-f128-int-convert"),
u32, __fixunstfsi;
u64, __fixunstfdi;
u128, __fixunstfti;
i32, __fixtfsi;
i64, __fixtfdi;
i128, __fixtfti;
);
});
}
}
macro_rules! f_to_f {
(
$mod:ident,
$(
$from_ty:ty => $to_ty:ty,
$from_ap_ty:ident => $to_ap_ty:ident,
$fn:ident, $sys_available:meta
);+;
) => {$(
#[test]
fn $fn() {
use compiler_builtins::float::{$mod::$fn, Float};
use rustc_apfloat::ieee::{$from_ap_ty, $to_ap_ty};
fuzz_float(N, |x: $from_ty| {
let tmp0: $to_ty = apfloat_fallback!(
$from_ty,
$from_ap_ty,
$sys_available,
|x: $from_ty| x as $to_ty;
|x: $from_ty| {
let from_apf = FloatTy::from_bits(x.to_bits().into());
// Get `value` directly to ignore INVALID_OP
let to_apf: $to_ap_ty = from_apf.convert(&mut false).value;
<$to_ty>::from_bits(to_apf.to_bits().try_into().unwrap())
},
x
);
let tmp1: $to_ty = $fn(x);
if !Float::eq_repr(tmp0, tmp1) {
panic!(
"{}({:?}): std: {:?}, builtins: {:?}",
stringify!($fn),
x,
tmp0,
tmp1
);
}
})
}
)+};
}
mod extend {
use super::*;
f_to_f! {
extend,
f32 => f64, Single => Double, __extendsfdf2, all();
}
#[cfg(all(f16_enabled, f128_enabled))]
#[cfg(not(any(
target_arch = "powerpc",
target_arch = "powerpc64",
target_arch = "loongarch64"
)))]
f_to_f! {
extend,
f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16");
f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16");
f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert");
f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert");
f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128");
f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128");
}
#[cfg(f128_enabled)]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
f_to_f! {
extend,
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
f32 => f128, Single => Quad, __extendsfkf2, not(feature = "no-sys-f128");
f64 => f128, Double => Quad, __extenddfkf2, not(feature = "no-sys-f128");
}
}
mod trunc {
use super::*;
f_to_f! {
trunc,
f64 => f32, Double => Single, __truncdfsf2, all();
}
#[cfg(all(f16_enabled, f128_enabled))]
#[cfg(not(any(
target_arch = "powerpc",
target_arch = "powerpc64",
target_arch = "loongarch64"
)))]
f_to_f! {
trunc,
f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16");
f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16");
f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert");
f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert");
f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128");
f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128");
}
#[cfg(f128_enabled)]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
f_to_f! {
trunc,
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
f128 => f32, Quad => Single, __trunckfsf2, not(feature = "no-sys-f128");
f128 => f64, Quad => Double, __trunckfdf2, not(feature = "no-sys-f128");
}
}

View file

@ -0,0 +1,164 @@
#![feature(f128)]
#![allow(unused_macros)]
use builtins_test::*;
use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc};
// Division algorithms have by far the nastiest and largest number of edge cases, and experience shows
// that sometimes 100_000 iterations of the random fuzzer is needed.
/// Creates intensive test functions for division functions of a certain size
macro_rules! test {
(
$n:expr, // the number of bits in a $iX or $uX
$uX:ident, // unsigned integer that will be shifted
$iX:ident, // signed version of $uX
$test_name:ident, // name of the test function
$unsigned_name:ident, // unsigned division function
$signed_name:ident // signed division function
) => {
#[test]
fn $test_name() {
fuzz_2(N, |lhs, rhs| {
if rhs == 0 {
return;
}
let mut rem: $uX = 0;
let quo: $uX = $unsigned_name(lhs, rhs, Some(&mut rem));
if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) {
panic!(
"unsigned division function failed with lhs:{} rhs:{} \
std:({}, {}) builtins:({}, {})",
lhs,
rhs,
lhs.wrapping_div(rhs),
lhs.wrapping_rem(rhs),
quo,
rem
);
}
// test the signed division function also
let lhs = lhs as $iX;
let rhs = rhs as $iX;
let mut rem: $iX = 0;
let quo: $iX = $signed_name(lhs, rhs, &mut rem);
// We cannot just test that
// `lhs == rhs.wrapping_mul(quo).wrapping_add(rem)`, but also
// need to make sure the remainder isn't larger than the divisor
// and has the correct sign.
let incorrect_rem = if rem == 0 {
false
} else if rhs == $iX::MIN {
// `rhs.wrapping_abs()` would overflow, so handle this case
// separately.
(lhs.is_negative() != rem.is_negative()) || (rem == $iX::MIN)
} else {
(lhs.is_negative() != rem.is_negative())
|| (rhs.wrapping_abs() <= rem.wrapping_abs())
};
if incorrect_rem || lhs != rhs.wrapping_mul(quo).wrapping_add(rem) {
panic!(
"signed division function failed with lhs:{} rhs:{} \
std:({}, {}) builtins:({}, {})",
lhs,
rhs,
lhs.wrapping_div(rhs),
lhs.wrapping_rem(rhs),
quo,
rem
);
}
});
}
};
}
test!(32, u32, i32, div_rem_si4, __udivmodsi4, __divmodsi4);
test!(64, u64, i64, div_rem_di4, __udivmoddi4, __divmoddi4);
test!(128, u128, i128, div_rem_ti4, __udivmodti4, __divmodti4);
#[test]
fn divide_sparc() {
fuzz_2(N, |lhs, rhs| {
if rhs == 0 {
return;
}
let mut rem: u128 = 0;
let quo: u128 = u128_divide_sparc(lhs, rhs, &mut rem);
if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) {
panic!(
"u128_divide_sparc({}, {}): \
std:({}, {}), builtins:({}, {})",
lhs,
rhs,
lhs.wrapping_div(rhs),
lhs.wrapping_rem(rhs),
quo,
rem
);
}
});
}
macro_rules! float {
($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => {
$(
#[test]
fn $fn() {
use compiler_builtins::float::{div::$fn, Float};
use core::ops::Div;
fuzz_float_2(N, |x: $f, y: $f| {
let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y);
let quo1: $f = $fn(x, y);
// ARM SIMD instructions always flush subnormals to zero
if cfg!(target_arch = "arm") &&
((Float::is_subnormal(quo0)) || Float::is_subnormal(quo1)) {
return;
}
if !Float::eq_repr(quo0, quo1) {
panic!(
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
stringify!($fn),
x,
y,
quo0,
quo1
);
}
});
}
)*
};
}
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
mod float_div {
use super::*;
float! {
f32, __divsf3, Single, all();
f64, __divdf3, Double, all();
}
#[cfg(not(feature = "no-f16-f128"))]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
float! {
f128, __divtf3, Quad,
// FIXME(llvm): there is a bug in LLVM rt.
// See <https://github.com/llvm/llvm-project/issues/91840>.
not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
}
#[cfg(not(feature = "no-f16-f128"))]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
float! {
f128, __divkf3, Quad, not(feature = "no-sys-f128");
}
}

View file

@ -0,0 +1,72 @@
#![allow(unused_macros)]
#![cfg_attr(f128_enabled, feature(f128))]
#![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
use builtins_test::*;
// This is approximate because of issues related to
// https://github.com/rust-lang/rust/issues/73920.
// TODO how do we resolve this indeterminacy?
macro_rules! pow {
($($f:ty, $tolerance:expr, $fn:ident, $sys_available:meta);*;) => {
$(
#[test]
// FIXME(apfloat): We skip tests if system symbols aren't available rather
// than providing a fallback, since `rustc_apfloat` does not provide `pow`.
#[cfg($sys_available)]
fn $fn() {
use compiler_builtins::float::pow::$fn;
use compiler_builtins::float::Float;
fuzz_float_2(N, |x: $f, y: $f| {
if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) {
let n = y.to_bits() & !<$f as Float>::SIG_MASK;
let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIG_BITS;
let n = n as i32;
let tmp0: $f = x.powi(n);
let tmp1: $f = $fn(x, n);
let (a, b) = if tmp0 < tmp1 {
(tmp0, tmp1)
} else {
(tmp1, tmp0)
};
let good = if a == b {
// handles infinity equality
true
} else if a < $tolerance {
b < $tolerance
} else {
let quo = b / a;
(quo < (1. + $tolerance)) && (quo > (1. - $tolerance))
};
assert!(
good,
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
stringify!($fn), x, n, tmp0, tmp1
);
}
});
}
)*
};
}
pow! {
f32, 1e-4, __powisf2, all();
f64, 1e-12, __powidf2, all();
}
#[cfg(f128_enabled)]
// FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly.
#[cfg(not(target_env = "msvc"))]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
pow! {
f128, 1e-36, __powitf2, not(feature = "no-sys-f128");
}
#[cfg(f128_enabled)]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
pow! {
f128, 1e-36, __powikf2, not(feature = "no-sys-f128");
}

View file

@ -0,0 +1,97 @@
#![feature(decl_macro)] // so we can use pub(super)
#![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))]
/// Translate a byte size to a Rust type.
macro int_ty {
(1) => { i8 },
(2) => { i16 },
(4) => { i32 },
(8) => { i64 },
(16) => { i128 }
}
mod cas {
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
#[test]
fn $name() {
builtins_test::fuzz_2(10000, |expected: super::int_ty!($bytes), new| {
let mut target = expected.wrapping_add(10);
assert_eq!(
unsafe {
compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target)
},
expected.wrapping_add(10),
"return value should always be the previous value",
);
assert_eq!(
target,
expected.wrapping_add(10),
"shouldn't have changed target"
);
target = expected;
assert_eq!(
unsafe {
compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target)
},
expected
);
assert_eq!(target, new, "should have updated target");
});
}
}
}
macro test_cas16($_ordering:ident, $name:ident) {
cas::test!($_ordering, 16, $name);
}
mod swap {
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
#[test]
fn $name() {
builtins_test::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| {
let orig_right = right;
assert_eq!(
unsafe { compiler_builtins::aarch64_linux::$name::$name(left, &mut right) },
orig_right
);
assert_eq!(left, right);
});
}
}
}
macro_rules! test_op {
($mod:ident, $( $op:tt )* ) => {
mod $mod {
pub(super) macro test {
($_ordering:ident, $bytes:tt, $name:ident) => {
#[test]
fn $name() {
builtins_test::fuzz_2(10000, |old, val| {
let mut target = old;
let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*;
let expected = op(old, val);
assert_eq!(old, unsafe { compiler_builtins::aarch64_linux::$name::$name(val, &mut target) }, "{} should return original value", stringify!($name));
assert_eq!(expected, target, "{} should store to target", stringify!($name));
});
}
}
}
}
};
}
test_op!(add, |left, right| left.wrapping_add(right));
test_op!(clr, |left, right| left & !right);
test_op!(xor, std::ops::BitXor::bitxor);
test_op!(or, std::ops::BitOr::bitor);
compiler_builtins::foreach_cas!(cas::test);
compiler_builtins::foreach_cas16!(test_cas16);
compiler_builtins::foreach_swp!(swap::test);
compiler_builtins::foreach_ldadd!(add::test);
compiler_builtins::foreach_ldclr!(clr::test);
compiler_builtins::foreach_ldeor!(xor::test);
compiler_builtins::foreach_ldset!(or::test);

View file

@ -0,0 +1,286 @@
extern crate compiler_builtins;
use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
const WORD_SIZE: usize = core::mem::size_of::<usize>();
#[test]
fn memcpy_3() {
let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
unsafe {
let src = arr.as_ptr().offset(9);
let dst = arr.as_mut_ptr().offset(1);
assert_eq!(memcpy(dst, src, 3), dst);
assert_eq!(arr, [0, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11]);
}
arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
unsafe {
let src = arr.as_ptr().offset(1);
let dst = arr.as_mut_ptr().offset(9);
assert_eq!(memcpy(dst, src, 3), dst);
assert_eq!(arr, [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3]);
}
}
#[test]
fn memcpy_10() {
let arr: [u8; 18] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
let mut dst: [u8; 12] = [0; 12];
unsafe {
let src = arr.as_ptr().offset(1);
assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr());
assert_eq!(dst, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0]);
}
unsafe {
let src = arr.as_ptr().offset(8);
assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr());
assert_eq!(dst, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0, 0]);
}
}
#[test]
fn memcpy_big() {
// Make the arrays cross 3 pages
const SIZE: usize = 8193;
let src: [u8; SIZE] = [22; SIZE];
struct Dst {
start: usize,
buf: [u8; SIZE],
end: usize,
}
let mut dst = Dst {
start: 0,
buf: [0; SIZE],
end: 0,
};
unsafe {
assert_eq!(
memcpy(dst.buf.as_mut_ptr(), src.as_ptr(), SIZE),
dst.buf.as_mut_ptr()
);
assert_eq!(dst.start, 0);
assert_eq!(dst.buf, [22; SIZE]);
assert_eq!(dst.end, 0);
}
}
#[test]
fn memmove_forward() {
let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
unsafe {
let src = arr.as_ptr().offset(6);
let dst = arr.as_mut_ptr().offset(3);
assert_eq!(memmove(dst, src, 5), dst);
assert_eq!(arr, [0, 1, 2, 6, 7, 8, 9, 10, 8, 9, 10, 11]);
}
}
#[test]
fn memmove_backward() {
let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
unsafe {
let src = arr.as_ptr().offset(3);
let dst = arr.as_mut_ptr().offset(6);
assert_eq!(memmove(dst, src, 5), dst);
assert_eq!(arr, [0, 1, 2, 3, 4, 5, 3, 4, 5, 6, 7, 11]);
}
}
#[test]
fn memset_zero() {
let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
unsafe {
let ptr = arr.as_mut_ptr().offset(5);
assert_eq!(memset(ptr, 0, 2), ptr);
assert_eq!(arr, [0, 1, 2, 3, 4, 0, 0, 7]);
// Only the LSB matters for a memset
assert_eq!(memset(arr.as_mut_ptr(), 0x2000, 8), arr.as_mut_ptr());
assert_eq!(arr, [0, 0, 0, 0, 0, 0, 0, 0]);
}
}
#[test]
fn memset_nonzero() {
let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
unsafe {
let ptr = arr.as_mut_ptr().offset(2);
assert_eq!(memset(ptr, 22, 3), ptr);
assert_eq!(arr, [0, 1, 22, 22, 22, 5, 6, 7]);
// Only the LSB matters for a memset
assert_eq!(memset(arr.as_mut_ptr(), 0x2009, 8), arr.as_mut_ptr());
assert_eq!(arr, [9, 9, 9, 9, 9, 9, 9, 9]);
}
}
#[test]
fn memcmp_eq() {
let arr1 @ arr2 = gen_arr::<256>();
for i in 0..256 {
unsafe {
assert_eq!(memcmp(arr1.0.as_ptr(), arr2.0.as_ptr(), i), 0);
assert_eq!(memcmp(arr2.0.as_ptr(), arr1.0.as_ptr(), i), 0);
}
}
}
#[test]
fn memcmp_ne() {
let arr1 @ arr2 = gen_arr::<256>();
// Reduce iteration count in Miri as it is too slow otherwise.
let limit = if cfg!(miri) { 64 } else { 256 };
for i in 0..limit {
let mut diff_arr = arr1;
diff_arr.0[i] = 127;
let expect = diff_arr.0[i].cmp(&arr2.0[i]);
for k in i + 1..limit {
let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) };
assert_eq!(expect, result.cmp(&0));
}
}
}
#[derive(Clone, Copy)]
struct AlignedStorage<const N: usize>([u8; N], [usize; 0]);
fn gen_arr<const N: usize>() -> AlignedStorage<N> {
let mut ret = AlignedStorage::<N>([0; N], []);
for i in 0..N {
ret.0[i] = i as u8;
}
ret
}
#[test]
fn memmove_forward_misaligned_nonaligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().offset(6);
let dst = arr.0.as_mut_ptr().offset(3);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(6..6 + 17, 3);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_forward_misaligned_aligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().offset(6);
let dst = arr.0.as_mut_ptr().add(0);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(6..6 + 17, 0);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_forward_aligned() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().add(3 + WORD_SIZE);
let dst = arr.0.as_mut_ptr().add(3);
assert_eq!(memmove(dst, src, 17), dst);
reference
.0
.copy_within(3 + WORD_SIZE..3 + WORD_SIZE + 17, 3);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_backward_misaligned_nonaligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().offset(3);
let dst = arr.0.as_mut_ptr().offset(6);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(3..3 + 17, 6);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_backward_misaligned_aligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().offset(3);
let dst = arr.0.as_mut_ptr().add(WORD_SIZE);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(3..3 + 17, WORD_SIZE);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_backward_aligned() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().add(3);
let dst = arr.0.as_mut_ptr().add(3 + WORD_SIZE);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(3..3 + 17, 3 + WORD_SIZE);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_misaligned_bounds() {
// The above test have the downside that the addresses surrounding the range-to-copy are all
// still in-bounds, so Miri would not actually complain about OOB accesses. So we also test with
// an array that has just the right size. We test a few times to avoid it being accidentally
// aligned.
for _ in 0..8 {
let mut arr1 = [0u8; 17];
let mut arr2 = [0u8; 17];
unsafe {
// Copy both ways so we hit both the forward and backward cases.
memmove(arr1.as_mut_ptr(), arr2.as_mut_ptr(), 17);
memmove(arr2.as_mut_ptr(), arr1.as_mut_ptr(), 17);
}
}
}
#[test]
fn memset_backward_misaligned_nonaligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let ptr = arr.0.as_mut_ptr().offset(6);
assert_eq!(memset(ptr, 0xCC, 17), ptr);
core::ptr::write_bytes(reference.0.as_mut_ptr().add(6), 0xCC, 17);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memset_backward_misaligned_aligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let ptr = arr.0.as_mut_ptr().add(WORD_SIZE);
assert_eq!(memset(ptr, 0xCC, 17), ptr);
core::ptr::write_bytes(reference.0.as_mut_ptr().add(WORD_SIZE), 0xCC, 17);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memset_backward_aligned() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let ptr = arr.0.as_mut_ptr().add(3 + WORD_SIZE);
assert_eq!(memset(ptr, 0xCC, 17), ptr);
core::ptr::write_bytes(reference.0.as_mut_ptr().add(3 + WORD_SIZE), 0xCC, 17);
assert_eq!(arr.0, reference.0);
}
}

View file

@ -0,0 +1,202 @@
// makes configuration easier
#![allow(unused_macros)]
use builtins_test::*;
/// Make sure that the the edge case tester and randomized tester don't break, and list examples of
/// fuzz values for documentation purposes.
#[test]
fn fuzz_values() {
const VALS: [u16; 47] = [
0b0, // edge cases
0b1111111111111111,
0b1111111111111110,
0b1111111111111100,
0b1111111110000000,
0b1111111100000000,
0b1110000000000000,
0b1100000000000000,
0b1000000000000000,
0b111111111111111,
0b111111111111110,
0b111111111111100,
0b111111110000000,
0b111111100000000,
0b110000000000000,
0b100000000000000,
0b11111111111111,
0b11111111111110,
0b11111111111100,
0b11111110000000,
0b11111100000000,
0b10000000000000,
0b111111111,
0b111111110,
0b111111100,
0b110000000,
0b100000000,
0b11111111,
0b11111110,
0b11111100,
0b10000000,
0b111,
0b110,
0b100,
0b11,
0b10,
0b1,
0b1010110100000, // beginning of random fuzzing
0b1100011001011010,
0b1001100101001111,
0b1101010100011010,
0b100010001,
0b1000000000000000,
0b1100000000000101,
0b1100111101010101,
0b1100010111111111,
0b1111110101111111,
];
let mut i = 0;
fuzz(10, |x: u16| {
assert_eq!(x, VALS[i]);
i += 1;
});
}
#[test]
fn leading_zeros() {
use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv};
{
use compiler_builtins::int::leading_zeros::__clzsi2;
fuzz(N, |x: u32| {
if x == 0 {
return; // undefined value for an intrinsic
}
let lz = x.leading_zeros() as usize;
let lz0 = __clzsi2(x);
let lz1 = leading_zeros_default(x);
let lz2 = leading_zeros_riscv(x);
if lz0 != lz {
panic!("__clzsi2({x}): std: {lz}, builtins: {lz0}");
}
if lz1 != lz {
panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}");
}
if lz2 != lz {
panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}");
}
});
}
{
use compiler_builtins::int::leading_zeros::__clzdi2;
fuzz(N, |x: u64| {
if x == 0 {
return; // undefined value for an intrinsic
}
let lz = x.leading_zeros() as usize;
let lz0 = __clzdi2(x);
let lz1 = leading_zeros_default(x);
let lz2 = leading_zeros_riscv(x);
if lz0 != lz {
panic!("__clzdi2({x}): std: {lz}, builtins: {lz0}");
}
if lz1 != lz {
panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}");
}
if lz2 != lz {
panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}");
}
});
}
{
use compiler_builtins::int::leading_zeros::__clzti2;
fuzz(N, |x: u128| {
if x == 0 {
return; // undefined value for an intrinsic
}
let lz = x.leading_zeros() as usize;
let lz0 = __clzti2(x);
if lz0 != lz {
panic!("__clzti2({x}): std: {lz}, builtins: {lz0}");
}
});
}
}
#[test]
fn trailing_zeros() {
use compiler_builtins::int::trailing_zeros::{__ctzdi2, __ctzsi2, __ctzti2, trailing_zeros};
fuzz(N, |x: u32| {
if x == 0 {
return; // undefined value for an intrinsic
}
let tz = x.trailing_zeros() as usize;
let tz0 = __ctzsi2(x);
let tz1 = trailing_zeros(x);
if tz0 != tz {
panic!("__ctzsi2({x}): std: {tz}, builtins: {tz0}");
}
if tz1 != tz {
panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}");
}
});
fuzz(N, |x: u64| {
if x == 0 {
return; // undefined value for an intrinsic
}
let tz = x.trailing_zeros() as usize;
let tz0 = __ctzdi2(x);
let tz1 = trailing_zeros(x);
if tz0 != tz {
panic!("__ctzdi2({x}): std: {tz}, builtins: {tz0}");
}
if tz1 != tz {
panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}");
}
});
fuzz(N, |x: u128| {
if x == 0 {
return; // undefined value for an intrinsic
}
let tz = x.trailing_zeros() as usize;
let tz0 = __ctzti2(x);
if tz0 != tz {
panic!("__ctzti2({x}): std: {tz}, builtins: {tz0}");
}
});
}
#[test]
fn bswap() {
use compiler_builtins::int::bswap::{__bswapdi2, __bswapsi2};
fuzz(N, |x: u32| {
assert_eq!(x.swap_bytes(), __bswapsi2(x));
});
fuzz(N, |x: u64| {
assert_eq!(x.swap_bytes(), __bswapdi2(x));
});
assert_eq!(__bswapsi2(0x12345678u32), 0x78563412u32);
assert_eq!(__bswapsi2(0x00000001u32), 0x01000000u32);
assert_eq!(__bswapdi2(0x123456789ABCDEF0u64), 0xF0DEBC9A78563412u64);
assert_eq!(__bswapdi2(0x0200000001000000u64), 0x0000000100000002u64);
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
{
use compiler_builtins::int::bswap::__bswapti2;
fuzz(N, |x: u128| {
assert_eq!(x.swap_bytes(), __bswapti2(x));
});
assert_eq!(
__bswapti2(0x123456789ABCDEF013579BDF02468ACEu128),
0xCE8A4602DF9B5713F0DEBC9A78563412u128
);
assert_eq!(
__bswapti2(0x04000000030000000200000001000000u128),
0x00000001000000020000000300000004u128
);
}
}

View file

@ -0,0 +1,150 @@
#![allow(unused_macros)]
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::*;
mod int_mul {
use super::*;
macro_rules! mul {
($($i:ty, $fn:ident);*;) => {
$(
#[test]
fn $fn() {
use compiler_builtins::int::mul::$fn;
fuzz_2(N, |x: $i, y: $i| {
let mul0 = x.wrapping_mul(y);
let mul1: $i = $fn(x, y);
if mul0 != mul1 {
panic!(
"{func}({x}, {y}): std: {mul0}, builtins: {mul1}",
func = stringify!($fn),
);
}
});
}
)*
};
}
mul! {
u64, __muldi3;
i128, __multi3;
}
}
mod int_overflowing_mul {
use super::*;
macro_rules! overflowing_mul {
($($i:ty, $fn:ident);*;) => {
$(
#[test]
fn $fn() {
use compiler_builtins::int::mul::$fn;
fuzz_2(N, |x: $i, y: $i| {
let (mul0, o0) = x.overflowing_mul(y);
let mut o1 = 0i32;
let mul1: $i = $fn(x, y, &mut o1);
let o1 = o1 != 0;
if mul0 != mul1 || o0 != o1 {
panic!(
"{func}({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",
func = stringify!($fn),
);
}
});
}
)*
};
}
overflowing_mul! {
i32, __mulosi4;
i64, __mulodi4;
i128, __muloti4;
}
#[test]
fn overflowing_mul_u128() {
use compiler_builtins::int::mul::{__rust_i128_mulo, __rust_u128_mulo};
fuzz_2(N, |x: u128, y: u128| {
let mut o1 = 0;
let (mul0, o0) = x.overflowing_mul(y);
let mul1 = __rust_u128_mulo(x, y, &mut o1);
if mul0 != mul1 || i32::from(o0) != o1 {
panic!("__rust_u128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",);
}
let x = x as i128;
let y = y as i128;
let (mul0, o0) = x.overflowing_mul(y);
let mul1 = __rust_i128_mulo(x, y, &mut o1);
if mul0 != mul1 || i32::from(o0) != o1 {
panic!("__rust_i128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",);
}
});
}
}
macro_rules! float_mul {
($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => {
$(
#[test]
fn $fn() {
use compiler_builtins::float::{mul::$fn, Float};
use core::ops::Mul;
fuzz_float_2(N, |x: $f, y: $f| {
let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y);
let mul1: $f = $fn(x, y);
if !Float::eq_repr(mul0, mul1) {
panic!(
"{func}({x:?}, {y:?}): std: {mul0:?}, builtins: {mul1:?}",
func = stringify!($fn),
);
}
});
}
)*
};
}
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
mod float_mul {
use super::*;
// FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in
// nightly.
float_mul! {
f32, __mulsf3, Single, not(target_arch = "arm");
f64, __muldf3, Double, not(target_arch = "arm");
}
}
#[cfg(f128_enabled)]
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
mod float_mul_f128 {
use super::*;
float_mul! {
f128, __multf3, Quad,
// FIXME(llvm): there is a bug in LLVM rt.
// See <https://github.com/llvm/llvm-project/issues/91840>.
not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
}
}
#[cfg(f128_enabled)]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
mod float_mul_f128_ppc {
use super::*;
float_mul! {
f128, __mulkf3, Quad, not(feature = "no-sys-f128");
}
}

View file

@ -0,0 +1,35 @@
use builtins_test::*;
macro_rules! shift {
($($i:ty, $fn_std:ident, $fn_builtins:ident);*;) => {
$(
#[test]
fn $fn_builtins() {
use compiler_builtins::int::shift::$fn_builtins;
fuzz_shift(|x: $i, s: u32| {
let tmp0: $i = x.$fn_std(s);
let tmp1: $i = $fn_builtins(x, s);
if tmp0 != tmp1 {
panic!(
"{}({}, {}): std: {}, builtins: {}",
stringify!($fn_builtins), x, s, tmp0, tmp1
);
}
});
}
)*
};
}
shift! {
u32, wrapping_shl, __ashlsi3;
u64, wrapping_shl, __ashldi3;
u128, wrapping_shl, __ashlti3;
i32, wrapping_shr, __ashrsi3;
i64, wrapping_shr, __ashrdi3;
i128, wrapping_shr, __ashrti3;
u32, wrapping_shr, __lshrsi3;
u64, wrapping_shr, __lshrdi3;
u128, wrapping_shr, __lshrti3;
}

View file

@ -0,0 +1,58 @@
#!/bin/bash
set -eux
iai_home="iai-home"
# Download the baseline from master
./ci/ci-util.py locate-baseline --download --extract
# Run benchmarks once
function run_icount_benchmarks() {
cargo_args=(
"--bench" "icount"
"--no-default-features"
"--features" "unstable,unstable-float,icount"
)
iai_args=(
"--home" "$(pwd)/$iai_home"
"--regression=ir=5.0"
"--save-summary"
)
# Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax
parsing_iai_args=0
while [ "$#" -gt 0 ]; do
if [ "$parsing_iai_args" == "1" ]; then
iai_args+=("$1")
elif [ "$1" == "--" ]; then
parsing_iai_args=1
else
cargo_args+=("$1")
fi
shift
done
# Run iai-callgrind benchmarks
cargo bench "${cargo_args[@]}" -- "${iai_args[@]}"
# NB: iai-callgrind should exit on error but does not, so we inspect the sumary
# for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337
if [ -n "${PR_NUMBER:-}" ]; then
# If this is for a pull request, ignore regressions if specified.
./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER"
else
./ci/ci-util.py check-regressions --home "$iai_home" || true
fi
}
# Run once with softfloats, once with arch instructions enabled
run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat
run_icount_benchmarks -- --save-baseline=hardfloat
# Name and tar the new baseline
name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
echo "BASELINE_NAME=$name" >>"$GITHUB_ENV"
tar cJf "$name.tar.xz" "$iai_home"

View file

@ -0,0 +1,438 @@
#!/usr/bin/env python3
"""Utilities for CI.
This dynamically prepares a list of routines that had a source file change based on
git history.
"""
import json
import os
import re
import subprocess as sp
import sys
from dataclasses import dataclass
from glob import glob, iglob
from inspect import cleandoc
from os import getenv
from pathlib import Path
from typing import TypedDict, Self
USAGE = cleandoc(
"""
usage:
./ci/ci-util.py <COMMAND> [flags]
COMMAND:
generate-matrix
Calculate a matrix of which functions had source change, print that as
a JSON object.
locate-baseline [--download] [--extract]
Locate the most recent benchmark baseline available in CI and, if flags
specify, download and extract it. Never exits with nonzero status if
downloading fails.
Note that `--extract` will overwrite files in `iai-home`.
check-regressions [--home iai-home] [--allow-pr-override pr_number]
Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
files and see if there are any regressions. This is used as a workaround
for `iai-callgrind` not exiting with error status; see
<https://github.com/iai-callgrind/iai-callgrind/issues/337>.
If `--allow-pr-override` is specified, the regression check will not exit
with failure if any line in the PR starts with `allow-regressions`.
"""
)
REPO_ROOT = Path(__file__).parent.parent
GIT = ["git", "-C", REPO_ROOT]
DEFAULT_BRANCH = "master"
WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts
ARTIFACT_GLOB = "baseline-icount*"
# Place this in a PR body to skip regression checks (must be at the start of a line).
REGRESSION_DIRECTIVE = "ci: allow-regressions"
# Place this in a PR body to skip extensive tests
SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive"
# Place this in a PR body to allow running a large number of extensive tests. If not
# set, this script will error out if a threshold is exceeded in order to avoid
# accidentally spending huge amounts of CI time.
ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive"
MANY_EXTENSIVE_THRESHOLD = 20
# Don't run exhaustive tests if these files change, even if they contaiin a function
# definition.
IGNORE_FILES = [
"libm/src/math/support/",
"libm/src/libm_helper.rs",
"libm/src/math/arch/intrinsics.rs",
]
# libm PR CI takes a long time and doesn't need to run unless relevant files have been
# changed. Anything matching this regex pattern will trigger a run.
TRIGGER_LIBM_PR_CI = ".*(libm|musl).*"
TYPES = ["f16", "f32", "f64", "f128"]
def eprint(*args, **kwargs):
"""Print to stderr."""
print(*args, file=sys.stderr, **kwargs)
@dataclass
class PrInfo:
"""GitHub response for PR query"""
body: str
commits: list[str]
created_at: str
number: int
@classmethod
def load(cls, pr_number: int | str) -> Self:
"""For a given PR number, query the body and commit list"""
pr_info = sp.check_output(
[
"gh",
"pr",
"view",
str(pr_number),
"--json=number,commits,body,createdAt",
# Flatten the commit list to only hashes, change a key to snake naming
"--jq=.commits |= map(.oid) | .created_at = .createdAt | del(.createdAt)",
],
text=True,
)
eprint("PR info:", json.dumps(pr_info, indent=4))
return cls(**json.loads(pr_info))
def contains_directive(self, directive: str) -> bool:
"""Return true if the provided directive is on a line in the PR body"""
lines = self.body.splitlines()
return any(line.startswith(directive) for line in lines)
class FunctionDef(TypedDict):
"""Type for an entry in `function-definitions.json`"""
sources: list[str]
type: str
class Context:
gh_ref: str | None
changed: list[Path]
defs: dict[str, FunctionDef]
def __init__(self) -> None:
self.gh_ref = getenv("GITHUB_REF")
self.changed = []
self._init_change_list()
with open(REPO_ROOT.joinpath("etc/function-definitions.json")) as f:
defs = json.load(f)
defs.pop("__comment", None)
self.defs = defs
def _init_change_list(self):
"""Create a list of files that have been changed. This uses GITHUB_REF if
available, otherwise a diff between `HEAD` and `master`.
"""
# For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being
# the PR number), and sets this as `GITHUB_REF`.
ref = self.gh_ref
eprint(f"using ref `{ref}`")
if not self.is_pr():
# If the ref is not for `merge` then we are not in PR CI
eprint("No diff available for ref")
return
# The ref is for a dummy merge commit. We can extract the merge base by
# inspecting all parents (`^@`).
merge_sha = sp.check_output(
GIT + ["show-ref", "--hash", ref], text=True
).strip()
merge_log = sp.check_output(GIT + ["log", "-1", merge_sha], text=True)
eprint(f"Merge:\n{merge_log}\n")
parents = (
sp.check_output(GIT + ["rev-parse", f"{merge_sha}^@"], text=True)
.strip()
.splitlines()
)
assert len(parents) == 2, f"expected two-parent merge but got:\n{parents}"
base = parents[0].strip()
incoming = parents[1].strip()
eprint(f"base: {base}, incoming: {incoming}")
textlist = sp.check_output(
GIT + ["diff", base, incoming, "--name-only"], text=True
)
self.changed = [Path(p) for p in textlist.splitlines()]
def is_pr(self) -> bool:
"""Check if we are looking at a PR rather than a push."""
return self.gh_ref is not None and "merge" in self.gh_ref
@staticmethod
def _ignore_file(fname: str) -> bool:
return any(fname.startswith(pfx) for pfx in IGNORE_FILES)
def changed_routines(self) -> dict[str, list[str]]:
"""Create a list of routines for which one or more files have been updated,
separated by type.
"""
routines = set()
for name, meta in self.defs.items():
# Don't update if changes to the file should be ignored
sources = (f for f in meta["sources"] if not self._ignore_file(f))
# Select changed files
changed = [f for f in sources if Path(f) in self.changed]
if len(changed) > 0:
eprint(f"changed files for {name}: {changed}")
routines.add(name)
ret: dict[str, list[str]] = {}
for r in sorted(routines):
ret.setdefault(self.defs[r]["type"], []).append(r)
return ret
def may_skip_libm_ci(self) -> bool:
"""If this is a PR and no libm files were changed, allow skipping libm
jobs."""
if self.is_pr():
return all(not re.match(TRIGGER_LIBM_PR_CI, str(f)) for f in self.changed)
return False
def emit_workflow_output(self):
"""Create a JSON object a list items for each type's changed files, if any
did change, and the routines that were affected by the change.
"""
pr_number = os.environ.get("PR_NUMBER")
skip_tests = False
error_on_many_tests = False
if pr_number is not None and len(pr_number) > 0:
pr = PrInfo.load(pr_number)
skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE)
error_on_many_tests = not pr.contains_directive(
ALLOW_MANY_EXTENSIVE_DIRECTIVE
)
if skip_tests:
eprint("Skipping all extensive tests")
changed = self.changed_routines()
matrix = []
total_to_test = 0
# Figure out which extensive tests need to run
for ty in TYPES:
ty_changed = changed.get(ty, [])
ty_to_test = [] if skip_tests else ty_changed
total_to_test += len(ty_to_test)
item = {
"ty": ty,
"changed": ",".join(ty_changed),
"to_test": ",".join(ty_to_test),
}
matrix.append(item)
ext_matrix = json.dumps({"extensive_matrix": matrix}, separators=(",", ":"))
may_skip = str(self.may_skip_libm_ci()).lower()
print(f"extensive_matrix={ext_matrix}")
print(f"may_skip_libm_ci={may_skip}")
eprint(f"extensive_matrix={ext_matrix}")
eprint(f"may_skip_libm_ci={may_skip}")
eprint(f"total extensive tests: {total_to_test}")
if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD:
eprint(
f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add"
f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is"
" intentional. If this is refactoring that happens to touch a lot of"
f" files, `{SKIP_EXTENSIVE_DIRECTIVE}` can be used instead."
)
exit(1)
def locate_baseline(flags: list[str]) -> None:
"""Find the most recent baseline from CI, download it if specified.
This returns rather than erroring, even if the `gh` commands fail. This is to avoid
erroring in CI if the baseline is unavailable (artifact time limit exceeded, first
run on the branch, etc).
"""
download = False
extract = False
while len(flags) > 0:
match flags[0]:
case "--download":
download = True
case "--extract":
extract = True
case _:
eprint(USAGE)
exit(1)
flags = flags[1:]
if extract and not download:
eprint("cannot extract without downloading")
exit(1)
try:
# Locate the most recent job to complete with success on our branch
latest_job = sp.check_output(
[
"gh",
"run",
"list",
"--status=success",
f"--branch={DEFAULT_BRANCH}",
"--json=databaseId,url,headSha,conclusion,createdAt,"
"status,workflowDatabaseId,workflowName",
# Return the first array element matching our workflow name. NB: cannot
# just use `--limit=1`, jq filtering happens after limiting. We also
# cannot just use `--workflow` because GH gets confused from
# different file names in history.
f'--jq=[.[] | select(.workflowName == "{WORKFLOW_NAME}")][0]',
],
text=True,
)
except sp.CalledProcessError as e:
eprint(f"failed to run github command: {e}")
return
try:
latest = json.loads(latest_job)
eprint("latest job: ", json.dumps(latest, indent=4))
except json.JSONDecodeError as e:
eprint(f"failed to decode json '{latest_job}', {e}")
return
if not download:
eprint("--download not specified, returning")
return
job_id = latest.get("databaseId")
if job_id is None:
eprint("skipping download step")
return
sp.run(
["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
check=False,
)
if not extract:
eprint("skipping extraction step")
return
# Find the baseline with the most recent timestamp. GH downloads the files to e.g.
# `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
if len(candidate_baselines) == 0:
eprint("no possible baseline directories found")
return
candidate_baselines.sort(reverse=True)
baseline_archive = candidate_baselines[0]
eprint(f"extracting {baseline_archive}")
sp.run(["tar", "xJvf", baseline_archive], check=True)
eprint("baseline extracted successfully")
def check_iai_regressions(args: list[str]):
"""Find regressions in iai summary.json files, exit with failure if any are
found.
"""
iai_home_str = "iai-home"
pr_number = None
while len(args) > 0:
match args:
case ["--home", home, *rest]:
iai_home_str = home
args = rest
case ["--allow-pr-override", pr_num, *rest]:
pr_number = pr_num
args = rest
case _:
eprint(USAGE)
exit(1)
iai_home = Path(iai_home_str)
found_summaries = False
regressions: list[dict] = []
for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
found_summaries = True
with open(iai_home / summary_path, "r") as f:
summary = json.load(f)
summary_regs = []
run = summary["callgrind_summary"]["callgrind_run"]
fname = summary["function_name"]
id = summary["id"]
name_entry = {"name": f"{fname}.{id}"}
for segment in run["segments"]:
summary_regs.extend(segment["regressions"])
summary_regs.extend(run["total"]["regressions"])
regressions.extend(name_entry | reg for reg in summary_regs)
if not found_summaries:
eprint(f"did not find any summary.json files within {iai_home}")
exit(1)
if len(regressions) == 0:
eprint("No regressions found")
return
eprint("Found regressions:", json.dumps(regressions, indent=4))
if pr_number is not None:
pr = PrInfo.load(pr_number)
if pr.contains_directive(REGRESSION_DIRECTIVE):
eprint("PR allows regressions, returning")
return
exit(1)
def main():
match sys.argv[1:]:
case ["generate-matrix"]:
ctx = Context()
ctx.emit_workflow_output()
case ["locate-baseline", *flags]:
locate_baseline(flags)
case ["check-regressions", *args]:
check_iai_regressions(args)
case ["--help" | "-h"]:
print(USAGE)
exit()
case _:
eprint(USAGE)
exit(1)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,16 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-aarch64-linux-gnu m4 make libc6-dev-arm64-cross \
qemu-user-static
ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu-
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-aarch64-static \
AR_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
CC_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/aarch64-linux-gnu \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,15 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-arm-linux-gnueabi libc6-dev-armel-cross qemu-user-static
ENV TOOLCHAIN_PREFIX=arm-linux-gnueabi-
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER=qemu-arm-static \
AR_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"ar \
CC_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/arm-linux-gnueabi \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,15 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static
ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf-
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \
AR_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \
CC_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,15 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static
ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf-
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \
AR_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \
CC_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,6 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc-multilib m4 make libc6-dev ca-certificates

View file

@ -0,0 +1,6 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc-multilib m4 make libc6-dev ca-certificates

View file

@ -0,0 +1,14 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user-static ca-certificates \
gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \
CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \
AR_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-ar \
CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \
QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,16 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-mips-linux-gnu libc6-dev-mips-cross \
binfmt-support qemu-user-static qemu-system-mips
ENV TOOLCHAIN_PREFIX=mips-linux-gnu-
ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER=qemu-mips-static \
AR_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
CC_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/mips-linux-gnu \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,20 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
gcc \
gcc-mips64-linux-gnuabi64 \
libc6-dev \
libc6-dev-mips64-cross \
qemu-user-static \
qemu-system-mips
ENV TOOLCHAIN_PREFIX=mips64-linux-gnuabi64-
ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64-static \
AR_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \
CC_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/mips64-linux-gnuabi64 \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,19 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
gcc \
gcc-mips64el-linux-gnuabi64 \
libc6-dev \
libc6-dev-mips64el-cross \
qemu-user-static
ENV TOOLCHAIN_PREFIX=mips64el-linux-gnuabi64-
ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64el-static \
AR_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \
CC_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/mips64el-linux-gnuabi64 \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,16 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-mipsel-linux-gnu libc6-dev-mipsel-cross \
binfmt-support qemu-user-static
ENV TOOLCHAIN_PREFIX=mipsel-linux-gnu-
ENV CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_RUNNER=qemu-mipsel-static \
AR_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
CC_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/mipsel-linux-gnu \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,16 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user-static ca-certificates \
gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
qemu-system-ppc
ENV TOOLCHAIN_PREFIX=powerpc-linux-gnu-
ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc-static \
AR_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
CC_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/powerpc-linux-gnu \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,16 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \
binfmt-support qemu-user-static qemu-system-ppc
ENV TOOLCHAIN_PREFIX=powerpc64-linux-gnu-
ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64-static \
AR_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
CC_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/powerpc64-linux-gnu \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,17 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user-static ca-certificates \
gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
qemu-system-ppc
ENV TOOLCHAIN_PREFIX=powerpc64le-linux-gnu-
ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \
AR_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
CC_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
QEMU_CPU=POWER8 \
QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,16 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user-static ca-certificates \
gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \
qemu-system-riscv64
ENV TOOLCHAIN_PREFIX=riscv64-linux-gnu-
ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64-static \
AR_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
CC_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
QEMU_LD_PREFIX=/usr/riscv64-linux-gnu \
RUST_TEST_THREADS=1

View file

@ -0,0 +1,9 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-arm-none-eabi \
libnewlib-arm-none-eabi
ENV BUILD_ONLY=1

View file

@ -0,0 +1,9 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-arm-none-eabi \
libnewlib-arm-none-eabi
ENV BUILD_ONLY=1

View file

@ -0,0 +1,9 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-arm-none-eabi \
libnewlib-arm-none-eabi
ENV BUILD_ONLY=1

View file

@ -0,0 +1,9 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc libc6-dev ca-certificates \
gcc-arm-none-eabi \
libnewlib-arm-none-eabi
ENV BUILD_ONLY=1

View file

@ -0,0 +1,8 @@
ARG IMAGE=ubuntu:20.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc clang libc6-dev ca-certificates
ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=true

View file

@ -0,0 +1,6 @@
ARG IMAGE=ubuntu:24.04
FROM $IMAGE
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc m4 make libc6-dev ca-certificates

View file

@ -0,0 +1,10 @@
#!/bin/sh
# Download sources to build C versions of intrinsics. Once being run,
# `RUST_COMPILER_RT_ROOT` must be set.
set -eux
rust_llvm_version=20.1-2025-02-13
curl -L -o code.tar.gz "https://github.com/rust-lang/llvm-project/archive/rustc/${rust_llvm_version}.tar.gz"
tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-${rust_llvm_version}/compiler-rt

View file

@ -0,0 +1,18 @@
#!/bin/bash
set -eux
# We need Tree Borrows as some of our raw pointer patterns are not
# compatible with Stacked Borrows.
export MIRIFLAGS="-Zmiri-tree-borrows"
# One target that sets `mem-unaligned` and one that does not,
# and a big-endian target.
targets=(
x86_64-unknown-linux-gnu
armv7-unknown-linux-gnueabihf
s390x-unknown-linux-gnu
)
for target in "${targets[@]}"; do
# Only run the `mem` tests to avoid this taking too long.
cargo miri test --manifest-path builtins-test/Cargo.toml --features no-asm --target "$target" -- mem
done

View file

@ -0,0 +1,111 @@
#!/bin/bash
# Small script to run tests for a target (or all targets) inside all the
# respective docker images.
set -euxo pipefail
host_arch="$(uname -m | sed 's/arm64/aarch64/')"
# Directories and files that do not yet exist need to be created before
# calling docker, otherwise docker will create them but they will be owned
# by root.
mkdir -p target
cargo generate-lockfile
cargo generate-lockfile --manifest-path builtins-test-intrinsics/Cargo.toml
run() {
local target="$1"
echo "testing target: $target"
emulated=""
target_arch="$(echo "$target" | cut -d'-' -f1)"
if [ "$target_arch" != "$host_arch" ]; then
emulated=1
echo "target is emulated"
fi
run_cmd="HOME=/tmp"
if [ "${GITHUB_ACTIONS:-}" = "true" ]; then
# Enable Docker image caching on GHA
build_cmd=("buildx" "build")
build_args=(
"--cache-from" "type=local,src=/tmp/.buildx-cache"
"--cache-to" "type=local,dest=/tmp/.buildx-cache-new"
# This is the beautiful bash syntax for expanding an array but neither
# raising an error nor returning an empty string if the array is empty.
"${build_args[@]:+"${build_args[@]}"}"
"--load"
)
fi
if [ "$(uname -s)" = "Linux" ] && [ -z "${DOCKER_BASE_IMAGE:-}" ]; then
# Share the host rustc and target. Do this only on Linux and if the image
# isn't overridden
run_args=(
--user "$(id -u):$(id -g)"
-e "CARGO_HOME=/cargo"
-v "${HOME}/.cargo:/cargo"
-v "$(pwd)/target:/builtins-target"
-v "$(rustc --print sysroot):/rust:ro"
)
run_cmd="$run_cmd PATH=\$PATH:/rust/bin:/cargo/bin"
else
# Use rustc provided by a docker image
docker volume create compiler-builtins-cache
build_args=(
"--build-arg"
"IMAGE=${DOCKER_BASE_IMAGE:-rustlang/rust:nightly}"
)
run_args=(-v "compiler-builtins-cache:/builtins-target")
run_cmd="$run_cmd HOME=/tmp" "USING_CONTAINER_RUSTC=1"
fi
if [ -d compiler-rt ]; then
export RUST_COMPILER_RT_ROOT="/checkout/compiler-rt"
fi
run_cmd="$run_cmd ci/run.sh $target"
docker "${build_cmd[@]:-build}" \
-t "builtins-$target" \
"${build_args[@]:-}" \
"ci/docker/$target"
docker run \
--rm \
-e CI \
-e CARGO_TARGET_DIR=/builtins-target \
-e CARGO_TERM_COLOR \
-e MAY_SKIP_LIBM_CI \
-e RUSTFLAGS \
-e RUST_BACKTRACE \
-e RUST_COMPILER_RT_ROOT \
-e "EMULATED=$emulated" \
-v "$(pwd):/checkout:ro" \
-w /checkout \
"${run_args[@]:-}" \
--init \
"builtins-$target" \
sh -c "$run_cmd"
}
if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then
set +x
echo "\
usage: ./ci/run-docker.sh [target]
you can also set DOCKER_BASE_IMAGE to use something other than the default
ubuntu:24.04 (or rustlang/rust:nightly).
"
exit
fi
if [ -z "${1:-}" ]; then
for d in ci/docker/*; do
run $(basename "$d")
done
else
run "$1"
fi

View file

@ -0,0 +1,24 @@
#!/bin/bash
set -euo pipefail
echo "Tests to run: '$TO_TEST'"
if [ -z "$TO_TEST" ]; then
echo "No tests to run, exiting."
exit
fi
set -x
test_cmd=(
cargo test
--package libm-test
--features "build-mpfr,libm/unstable,libm/force-soft-floats"
--profile release-checked
)
# Run the non-extensive tests first to catch any easy failures
"${test_cmd[@]}" -- "$TO_TEST"
LIBM_EXTENSIVE_TESTS="$TO_TEST" "${test_cmd[@]}" -- extensive

View file

@ -0,0 +1,302 @@
#!/bin/bash
set -eux
export RUST_BACKTRACE="${RUST_BACKTRACE:-full}"
export NEXTEST_STATUS_LEVEL=all
target="${1:-}"
if [ -z "$target" ]; then
host_target=$(rustc -vV | awk '/^host/ { print $2 }')
echo "Defaulted to host target $host_target"
target="$host_target"
fi
if [[ "$target" = *"wasm"* ]]; then
# Enable the random backend
export RUSTFLAGS="${RUSTFLAGS:-} --cfg getrandom_backend=\"wasm_js\""
fi
if [ "${USING_CONTAINER_RUSTC:-}" = 1 ]; then
# Install nonstandard components if we have control of the environment
rustup target list --installed |
grep -E "^$target\$" ||
rustup target add "$target"
fi
# Test our implementation
if [ "${BUILD_ONLY:-}" = "1" ]; then
echo "no tests to run for build-only targets"
else
test_builtins=(cargo test --package builtins-test --no-fail-fast --target "$target")
"${test_builtins[@]}"
"${test_builtins[@]}" --release
"${test_builtins[@]}" --features c
"${test_builtins[@]}" --features c --release
"${test_builtins[@]}" --features no-asm
"${test_builtins[@]}" --features no-asm --release
"${test_builtins[@]}" --features no-f16-f128
"${test_builtins[@]}" --features no-f16-f128 --release
"${test_builtins[@]}" --benches
"${test_builtins[@]}" --benches --release
if [ "${TEST_VERBATIM:-}" = "1" ]; then
verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\builtins-test\\target2)
"${test_builtins[@]}" --target-dir "$verb_path" --features c
fi
fi
declare -a rlib_paths
# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs
update_rlib_paths() {
if [ -d /builtins-target ]; then
rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
else
rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
fi
}
# Remove any existing artifacts from previous tests that don't set #![compiler_builtins]
update_rlib_paths
rm -f "${rlib_paths[@]}"
cargo build -p compiler_builtins --target "$target"
cargo build -p compiler_builtins --target "$target" --release
cargo build -p compiler_builtins --target "$target" --features c
cargo build -p compiler_builtins --target "$target" --features c --release
cargo build -p compiler_builtins --target "$target" --features no-asm
cargo build -p compiler_builtins --target "$target" --features no-asm --release
cargo build -p compiler_builtins --target "$target" --features no-f16-f128
cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release
PREFIX=${target//unknown-/}-
case "$target" in
armv7-*)
PREFIX=arm-linux-gnueabihf-
;;
thumb*)
PREFIX=arm-none-eabi-
;;
*86*-*)
PREFIX=
;;
esac
NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) )
if [ "$NM" = "" ]; then
NM="${PREFIX}nm"
fi
# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with
# rustup run to ensure that those are in PATH.
TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')"
if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then
NM="rustup run $TOOLCHAIN $NM"
fi
# Look out for duplicated symbols when we include the compiler-rt (C) implementation
update_rlib_paths
for rlib in "${rlib_paths[@]}"; do
set +x
echo "================================================================"
echo "checking $rlib for duplicate symbols"
echo "================================================================"
set -x
duplicates_found=0
# NOTE On i586, It's normal that the get_pc_thunk symbol appears several
# times so ignore it
$NM -g --defined-only "$rlib" 2>&1 |
sort |
uniq -d |
grep -v __x86.get_pc_thunk --quiet |
grep 'T __' && duplicates_found=1
if [ "$duplicates_found" != 0 ]; then
echo "error: found duplicate symbols"
exit 1
else
echo "success; no duplicate symbols found"
fi
done
rm -f "${rlib_paths[@]}"
build_intrinsics_test() {
cargo build \
--target "$target" --verbose \
--manifest-path builtins-test-intrinsics/Cargo.toml "$@"
}
# Verify that we haven't dropped any intrinsics/symbols
build_intrinsics_test
build_intrinsics_test --release
build_intrinsics_test --features c
build_intrinsics_test --features c --release
# Verify that there are no undefined symbols to `panic` within our
# implementations
CARGO_PROFILE_DEV_LTO=true build_intrinsics_test
CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release
# Ensure no references to any symbols from core
update_rlib_paths
for rlib in "${rlib_paths[@]}"; do
set +x
echo "================================================================"
echo "checking $rlib for references to core"
echo "================================================================"
set -x
tmpdir="${CARGO_TARGET_DIR:-target}/tmp"
test -d "$tmpdir" || mkdir "$tmpdir"
defined="$tmpdir/defined_symbols.txt"
undefined="$tmpdir/defined_symbols.txt"
$NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined"
$NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined"
grep_has_results=0
grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1
if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then
echo "FIXME: powerpc64 fails these tests"
elif [ "$grep_has_results" != 0 ]; then
echo "error: found unexpected references to core"
exit 1
else
echo "success; no references to core found"
fi
done
# Test libm
# Make sure a simple build works
cargo check -p libm --no-default-features --target "$target"
if [ "${MAY_SKIP_LIBM_CI:-}" = "true" ]; then
echo "skipping libm PR CI"
exit
fi
mflags=()
# We enumerate features manually.
mflags+=(--no-default-features)
# Enable arch-specific routines when available.
mflags+=(--features arch)
# Always enable `unstable-float` since it expands available API but does not
# change any implementations.
mflags+=(--features unstable-float)
# We need to specifically skip tests for musl-math-sys on systems that can't
# build musl since otherwise `--all` will activate it.
case "$target" in
# Can't build at all on MSVC, WASM, or thumb
*windows-msvc*) mflags+=(--exclude musl-math-sys) ;;
*wasm*) mflags+=(--exclude musl-math-sys) ;;
*thumb*) mflags+=(--exclude musl-math-sys) ;;
# We can build musl on MinGW but running tests gets a stack overflow
*windows-gnu*) ;;
# FIXME(#309): LE PPC crashes calling the musl version of some functions. It
# seems like a qemu bug but should be investigated further at some point.
# See <https://github.com/rust-lang/libm/issues/309>.
*powerpc64le*) ;;
# Everything else gets musl enabled
*) mflags+=(--features libm-test/build-musl) ;;
esac
# Configure which targets test against MPFR
case "$target" in
# MSVC cannot link MPFR
*windows-msvc*) ;;
# FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial.
*windows-gnu*) ;;
# Targets that aren't cross compiled in CI work fine
aarch64*apple*) mflags+=(--features libm-test/build-mpfr) ;;
aarch64*linux*) mflags+=(--features libm-test/build-mpfr) ;;
i586*) mflags+=(--features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross) ;;
i686*) mflags+=(--features libm-test/build-mpfr) ;;
x86_64*) mflags+=(--features libm-test/build-mpfr) ;;
esac
# FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI.
# <https://github.com/rust-lang/rust/issues/128944>
case "$target" in
*windows-gnu) mflags+=(--exclude libm-macros) ;;
esac
if [ "${BUILD_ONLY:-}" = "1" ]; then
# If we are on targets that can't run tests, verify that we can build.
cmd=(cargo build --target "$target" --package libm)
"${cmd[@]}"
"${cmd[@]}" --features unstable-intrinsics
echo "can't run tests on $target; skipping"
else
mflags+=(--workspace --target "$target")
cmd=(cargo test "${mflags[@]}")
profile_flag="--profile"
# If nextest is available, use that
command -v cargo-nextest && nextest=1 || nextest=0
if [ "$nextest" = "1" ]; then
cmd=(cargo nextest run --max-fail=10)
# Workaround for https://github.com/nextest-rs/nextest/issues/2066
if [ -f /.dockerenv ]; then
cfg_file="/tmp/nextest-config.toml"
echo "[store]" >> "$cfg_file"
echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file"
cmd+=(--config-file "$cfg_file")
fi
# Not all configurations have tests to run on wasm
[[ "$target" = *"wasm"* ]] && cmd+=(--no-tests=warn)
cmd+=("${mflags[@]}")
profile_flag="--cargo-profile"
fi
# Test once without intrinsics
"${cmd[@]}"
# Run doctests if they were excluded by nextest
[ "$nextest" = "1" ] && cargo test --doc --exclude compiler_builtins "${mflags[@]}"
# Exclude the macros and utile crates from the rest of the tests to save CI
# runtime, they shouldn't have anything feature- or opt-level-dependent.
cmd+=(--exclude util --exclude libm-macros)
# Test once with intrinsics enabled
"${cmd[@]}" --features unstable-intrinsics
"${cmd[@]}" --features unstable-intrinsics --benches
# Test the same in release mode, which also increases coverage. Also ensure
# the soft float routines are checked.
"${cmd[@]}" "$profile_flag" release-checked
"${cmd[@]}" "$profile_flag" release-checked --features force-soft-floats
"${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics
"${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics --benches
# Ensure that the routines do not panic.
#
# `--tests` must be passed because no-panic is only enabled as a dev
# dependency. The `release-opt` profile must be used to enable LTO and a
# single CGU.
ENSURE_NO_PANIC=1 cargo build \
-p libm \
--target "$target" \
--no-default-features \
--features unstable-float \
--tests \
--profile release-opt
fi

View file

@ -0,0 +1,168 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [0.1.159](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.158...compiler_builtins-v0.1.159) - 2025-05-12
### Other
- Remove cfg(bootstrap)
## [0.1.158](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.157...compiler_builtins-v0.1.158) - 2025-05-06
### Other
- Require `target_has_atomic = "ptr"` for runtime feature detection
## [0.1.157](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.156...compiler_builtins-v0.1.157) - 2025-05-03
### Other
- Use runtime feature detection for fma routines on x86
## [0.1.156](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.155...compiler_builtins-v0.1.156) - 2025-04-21
### Other
- avr: Provide `abort()`
- Remove `unsafe` from `naked_asm!` blocks
- Enable icount benchmarks in CI
- Move builtins-test-intrinsics out of the workspace
- Run `cargo fmt` on all projects
- Flatten the `libm/libm` directory
- Update path to libm after the merge
## [0.1.155](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.154...compiler_builtins-v0.1.155) - 2025-04-17
### Other
- use `#[cfg(bootstrap)]` for rustc sync
- Replace the `bl!` macro with `asm_sym`
- __udivmod(h|q)i4
## [0.1.154](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.153...compiler_builtins-v0.1.154) - 2025-04-16
### Other
- turn #[naked] into an unsafe attribute
## [0.1.153](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.152...compiler_builtins-v0.1.153) - 2025-04-09
### Other
- Remove a mention of `force-soft-float` in `build.rs`
- Revert "Disable `f16` on AArch64 without the `neon` feature"
- Skip No More!
- avoid out-of-bounds accesses ([#799](https://github.com/rust-lang/compiler-builtins/pull/799))
## [0.1.152](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.151...compiler_builtins-v0.1.152) - 2025-03-20
### Other
- Remove use of `atomic_load_unordered` and undefined behaviour from `arm_linux.rs`
- Switch repository layout to use a virtual manifest
## [0.1.151](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.150...compiler_builtins-v0.1.151) - 2025-03-05
### Other
- Add cygwin support
- Enable `f16` for LoongArch ([#770](https://github.com/rust-lang/compiler-builtins/pull/770))
- Add __extendhfdf2 and add __truncdfhf2 test
- Remove outdated information from the readme
## [0.1.150](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.149...compiler_builtins-v0.1.150) - 2025-03-01
### Other
- Disable `f16` on AArch64 without the `neon` feature
- Update LLVM downloads to 20.1-2025-02-13
## [0.1.149](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.148...compiler_builtins-v0.1.149) - 2025-02-25
### Other
- Make a subset of `libm` symbols weakly available on all platforms
## [0.1.148](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.147...compiler_builtins-v0.1.148) - 2025-02-24
### Other
- Update the `libm` submodule
- Enable `f16` for MIPS
- Eliminate the use of `public_test_dep!` for a third time
## [0.1.147](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.146...compiler_builtins-v0.1.147) - 2025-02-19
### Other
- remove win64_128bit_abi_hack
## [0.1.146](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.145...compiler_builtins-v0.1.146) - 2025-02-06
### Other
- Expose erf{,c}{,f} from libm
## [0.1.145](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.144...compiler_builtins-v0.1.145) - 2025-02-04
### Other
- Revert "Eliminate the use of `public_test_dep!`"
- Indentation fix to please clippy
- Don't build out of line atomics support code for uefi
- Add a version to some FIXMEs that will be resolved in LLVM 20
- Remove use of the `start` feature
## [0.1.144](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.143...compiler_builtins-v0.1.144) - 2025-01-15
### Other
- Eliminate the use of `public_test_dep!`
## [0.1.143](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.142...compiler_builtins-v0.1.143) - 2025-01-15
### Other
- Use a C-safe return type for `__rust_[ui]128_*` overflowing intrinsics
## [0.1.142](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.141...compiler_builtins-v0.1.142) - 2025-01-07
### Other
- Account for optimization levels other than numbers
## [0.1.141](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.140...compiler_builtins-v0.1.141) - 2025-01-07
### Other
- Update the `libm` submodule
- Fix new `clippy::precedence` errors
- Rename `EXP_MAX` to `EXP_SAT`
- Shorten prefixes for float constants
## [0.1.140](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.139...compiler_builtins-v0.1.140) - 2024-12-26
### Other
- Disable f128 for amdgpu ([#737](https://github.com/rust-lang/compiler-builtins/pull/737))
- Fix a bug in `abs_diff`
- Disable `f16` on platforms that have recursion problems
## [0.1.139](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.138...compiler_builtins-v0.1.139) - 2024-11-03
### Other
- Remove incorrect `sparcv9` match pattern from `configure_f16_f128`
## [0.1.138](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.137...compiler_builtins-v0.1.138) - 2024-11-01
### Other
- Use `f16_enabled`/`f128_enabled` in `examples/intrinsics.rs` ([#724](https://github.com/rust-lang/compiler-builtins/pull/724))
- Disable `f16` for LoongArch64 ([#722](https://github.com/rust-lang/compiler-builtins/pull/722))

View file

@ -0,0 +1,64 @@
[package]
authors = ["Jorge Aparicio <japaricious@gmail.com>"]
name = "compiler_builtins"
version = "0.1.159"
license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
readme = "README.md"
repository = "https://github.com/rust-lang/compiler-builtins"
homepage = "https://github.com/rust-lang/compiler-builtins"
documentation = "https://docs.rs/compiler_builtins"
edition = "2021"
description = "Compiler intrinsics used by the Rust compiler."
links = "compiler-rt"
[lib]
bench = false
doctest = false
test = false
[dependencies]
# For more information on this dependency see
# https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core
core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
[build-dependencies]
cc = { optional = true, version = "1.0" }
[dev-dependencies]
panic-handler = { path = "../crates/panic-handler" }
[features]
default = ["compiler-builtins"]
# Enable compilation of C code in compiler-rt, filling in some more optimized
# implementations and also filling in unimplemented intrinsics
c = ["dep:cc"]
# Workaround for the Cranelift codegen backend. Disables any implementations
# which use inline assembly and fall back to pure Rust versions (if available).
no-asm = []
# Workaround for codegen backends which haven't yet implemented `f16` and
# `f128` support. Disabled any intrinsics which use those types.
no-f16-f128 = []
# Flag this library as the unstable compiler-builtins lib
compiler-builtins = []
# Generate memory-related intrinsics like memcpy
mem = []
# Mangle all names so this can be linked in with other versions or other
# compiler-rt implementations. Also used for testing
mangled-names = []
# Only used in the compiler's build system
rustc-dep-of-std = ["compiler-builtins", "dep:core"]
# This makes certain traits and function specializations public that
# are not normally public but are required by the `builtins-test`
unstable-public-internals = []
[lints.rust]
# The cygwin config can be dropped after our benchmark toolchain is bumped
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bootstrap)', 'cfg(target_os, values("cygwin"))'] }

View file

@ -0,0 +1 @@
../LICENSE.txt

View file

@ -0,0 +1,436 @@
# `compiler-builtins`
This crate provides external symbols that the compiler expects to be available
when building Rust projects, typically software routines for basic operations
that do not have hardware support. It is largely a port of LLVM's
[`compiler-rt`].
It is distributed as part of Rust's sysroot. `compiler-builtins` does not need
to be added as an explicit dependency in `Cargo.toml`.
[`compiler-rt`]: https://github.com/llvm/llvm-project/tree/1b1dc505057322f4fa1110ef4f53c44347f52986/compiler-rt
## Contributing
See [CONTRIBUTING.md](CONTRIBUTING.md).
## Progress
- [x] aarch64/chkstk.S
- [x] adddf3.c
- [x] addsf3.c
- [x] arm/addsf3.S
- [x] arm/aeabi_dcmp.S
- [x] arm/aeabi_fcmp.S
- [x] arm/aeabi_idivmod.S
- [x] arm/aeabi_ldivmod.S
- [x] arm/aeabi_memcpy.S
- [x] arm/aeabi_memmove.S
- [x] arm/aeabi_memset.S
- [x] arm/aeabi_uidivmod.S
- [x] arm/aeabi_uldivmod.S
- [ ] arm/chkstk.S
- [ ] arm/divmodsi4.S (generic version is done)
- [ ] arm/divsi3.S (generic version is done)
- [ ] arm/modsi3.S (generic version is done)
- [x] arm/softfloat-alias.list
- [ ] arm/udivmodsi4.S (generic version is done)
- [ ] arm/udivsi3.S (generic version is done)
- [ ] arm/umodsi3.S (generic version is done)
- [x] ashldi3.c
- [x] ashrdi3.c
- [ ] avr/divmodhi4.S
- [ ] avr/divmodqi4.S
- [ ] avr/mulhi3.S
- [ ] avr/mulqi3.S
- [ ] avr/udivmodhi4.S
- [ ] avr/udivmodqi4.S
- [x] bswapdi2.c
- [x] bswapsi2.c
- [x] bswapti2.c
- [x] clzdi2.c
- [x] clzsi2.c
- [x] clzti2.c
- [x] comparedf2.c
- [x] comparesf2.c
- [x] ctzdi2.c
- [x] ctzsi2.c
- [x] ctzti2.c
- [x] divdf3.c
- [x] divdi3.c
- [x] divmoddi4.c
- [x] divmodsi4.c
- [x] divmodti4.c
- [x] divsf3.c
- [x] divsi3.c
- [x] extendsfdf2.c
- [x] fixdfdi.c
- [x] fixdfsi.c
- [x] fixsfdi.c
- [x] fixsfsi.c
- [x] fixunsdfdi.c
- [x] fixunsdfsi.c
- [x] fixunssfdi.c
- [x] fixunssfsi.c
- [x] floatdidf.c
- [x] floatdisf.c
- [x] floatsidf.c
- [x] floatsisf.c
- [x] floatundidf.c
- [x] floatundisf.c
- [x] floatunsidf.c
- [x] floatunsisf.c
- [ ] i386/ashldi3.S
- [ ] i386/ashrdi3.S
- [x] i386/chkstk.S
- [ ] i386/divdi3.S
- [ ] i386/lshrdi3.S
- [ ] i386/moddi3.S
- [ ] i386/muldi3.S
- [ ] i386/udivdi3.S
- [ ] i386/umoddi3.S
- [x] lshrdi3.c
- [x] moddi3.c
- [x] modsi3.c
- [x] muldf3.c
- [x] muldi3.c
- [x] mulodi4.c
- [x] mulosi4.c
- [x] mulsf3.c
- [x] powidf2.c
- [x] powisf2.c
- [ ] riscv/muldi3.S
- [ ] riscv/mulsi3.S
- [x] subdf3.c
- [x] subsf3.c
- [x] truncdfsf2.c
- [x] udivdi3.c
- [x] udivmoddi4.c
- [x] udivmodsi4.c
- [x] udivsi3.c
- [x] umoddi3.c
- [x] umodsi3.c
- [x] x86_64/chkstk.S
These builtins are needed to support 128-bit integers.
- [x] ashlti3.c
- [x] ashrti3.c
- [x] divti3.c
- [x] fixdfti.c
- [x] fixsfti.c
- [x] fixunsdfti.c
- [x] fixunssfti.c
- [x] floattidf.c
- [x] floattisf.c
- [x] floatuntidf.c
- [x] floatuntisf.c
- [x] lshrti3.c
- [x] modti3.c
- [x] muloti4.c
- [x] multi3.c
- [x] udivmodti4.c
- [x] udivti3.c
- [x] umodti3.c
These builtins are needed to support `f16` and `f128`, which are in the process
of being added to Rust.
- [x] addtf3.c
- [x] comparetf2.c
- [x] divtf3.c
- [x] extenddftf2.c
- [x] extendhfsf2.c
- [x] extendhftf2.c
- [x] extendsftf2.c
- [x] fixtfdi.c
- [x] fixtfsi.c
- [x] fixtfti.c
- [x] fixunstfdi.c
- [x] fixunstfsi.c
- [x] fixunstfti.c
- [x] floatditf.c
- [x] floatsitf.c
- [x] floattitf.c
- [x] floatunditf.c
- [x] floatunsitf.c
- [x] floatuntitf.c
- [x] multf3.c
- [x] powitf2.c
- [x] subtf3.c
- [x] truncdfhf2.c
- [x] truncsfhf2.c
- [x] trunctfdf2.c
- [x] trunctfhf2.c
- [x] trunctfsf2.c
These builtins are used by the Hexagon DSP
- [ ] hexagon/common_entry_exit_abi1.S
- [ ] hexagon/common_entry_exit_abi2.S
- [ ] hexagon/common_entry_exit_legacy.S
- [x] hexagon/dfaddsub.S~~
- [x] hexagon/dfdiv.S~~
- [x] hexagon/dffma.S~~
- [x] hexagon/dfminmax.S~~
- [x] hexagon/dfmul.S~~
- [x] hexagon/dfsqrt.S~~
- [x] hexagon/divdi3.S~~
- [x] hexagon/divsi3.S~~
- [x] hexagon/fastmath2_dlib_asm.S~~
- [x] hexagon/fastmath2_ldlib_asm.S~~
- [x] hexagon/fastmath_dlib_asm.S~~
- [x] hexagon/memcpy_forward_vp4cp4n2.S~~
- [x] hexagon/memcpy_likely_aligned.S~~
- [x] hexagon/moddi3.S~~
- [x] hexagon/modsi3.S~~
- [x] hexagon/sfdiv_opt.S~~
- [x] hexagon/sfsqrt_opt.S~~
- [x] hexagon/udivdi3.S~~
- [x] hexagon/udivmoddi4.S~~
- [x] hexagon/udivmodsi4.S~~
- [x] hexagon/udivsi3.S~~
- [x] hexagon/umoddi3.S~~
- [x] hexagon/umodsi3.S~~
## Unimplemented functions
These builtins are for x87 `f80` floating-point numbers that are not supported
by Rust.
- ~~extendxftf2.c~~
- ~~fixunsxfdi.c~~
- ~~fixunsxfsi.c~~
- ~~fixunsxfti.c~~
- ~~fixxfdi.c~~
- ~~fixxfti.c~~
- ~~floatdixf.c~~
- ~~floattixf.c~~
- ~~floatundixf.c~~
- ~~floatuntixf.c~~
- ~~i386/floatdixf.S~~
- ~~i386/floatundixf.S~~
- ~~x86_64/floatdixf.c~~
- ~~x86_64/floatundixf.S~~
These builtins are for IBM "extended double" non-IEEE 128-bit floating-point
numbers.
- ~~ppc/divtc3.c~~
- ~~ppc/fixtfdi.c~~
- ~~ppc/fixtfti.c~~
- ~~ppc/fixunstfdi.c~~
- ~~ppc/fixunstfti.c~~
- ~~ppc/floatditf.c~~
- ~~ppc/floattitf.c~~
- ~~ppc/floatunditf.c~~
- ~~ppc/gcc_qadd.c~~
- ~~ppc/gcc_qdiv.c~~
- ~~ppc/gcc_qmul.c~~
- ~~ppc/gcc_qsub.c~~
- ~~ppc/multc3.c~~
These builtins are for 16-bit brain floating-point numbers that are not
supported by Rust.
- ~~truncdfbf2.c~~
- ~~truncsfbf2.c~~
- ~~trunctfxf2.c~~
These builtins involve complex floating-point types that are not supported by
Rust.
- ~~divdc3.c~~
- ~~divsc3.c~~
- ~~divtc3.c~~
- ~~divxc3.c~~
- ~~muldc3.c~~
- ~~mulsc3.c~~
- ~~multc3.c~~
- ~~mulxc3.c~~
- ~~powixf2.c~~
These builtins are never called by LLVM.
- ~~absvdi2.c~~
- ~~absvsi2.c~~
- ~~absvti2.c~~
- ~~addvdi3.c~~
- ~~addvsi3.c~~
- ~~addvti3.c~~
- ~~arm/aeabi_cdcmp.S~~
- ~~arm/aeabi_cdcmpeq_check_nan.c~~
- ~~arm/aeabi_cfcmp.S~~
- ~~arm/aeabi_cfcmpeq_check_nan.c~~
- ~~arm/aeabi_div0.c~~
- ~~arm/aeabi_drsub.c~~
- ~~arm/aeabi_frsub.c~~
- ~~arm/aeabi_memcmp.S~~
- ~~arm/bswapdi2.S~~
- ~~arm/bswapsi2.S~~
- ~~arm/clzdi2.S~~
- ~~arm/clzsi2.S~~
- ~~arm/comparesf2.S~~
- ~~arm/restore_vfp_d8_d15_regs.S~~
- ~~arm/save_vfp_d8_d15_regs.S~~
- ~~arm/switch16.S~~
- ~~arm/switch32.S~~
- ~~arm/switch8.S~~
- ~~arm/switchu8.S~~
- ~~cmpdi2.c~~
- ~~cmpti2.c~~
- ~~ffssi2.c~~
- ~~ffsdi2.c~~ - this is [called by gcc][jemalloc-fail] though!
- ~~ffsti2.c~~
- ~~mulvdi3.c~~
- ~~mulvsi3.c~~
- ~~mulvti3.c~~
- ~~negdf2.c~~
- ~~negdi2.c~~
- ~~negsf2.c~~
- ~~negti2.c~~
- ~~negvdi2.c~~
- ~~negvsi2.c~~
- ~~negvti2.c~~
- ~~paritydi2.c~~
- ~~paritysi2.c~~
- ~~parityti2.c~~
- ~~popcountdi2.c~~
- ~~popcountsi2.c~~
- ~~popcountti2.c~~
- ~~ppc/restFP.S~~
- ~~ppc/saveFP.S~~
- ~~subvdi3.c~~
- ~~subvsi3.c~~
- ~~subvti3.c~~
- ~~ucmpdi2.c~~
- ~~ucmpti2.c~~
- ~~udivmodti4.c~~
[jemalloc-fail]: https://travis-ci.org/rust-lang/rust/jobs/249772758
Rust only exposes atomic types on platforms that support them, and therefore does not need to fall back to software implementations.
- ~~arm/sync_fetch_and_add_4.S~~
- ~~arm/sync_fetch_and_add_8.S~~
- ~~arm/sync_fetch_and_and_4.S~~
- ~~arm/sync_fetch_and_and_8.S~~
- ~~arm/sync_fetch_and_max_4.S~~
- ~~arm/sync_fetch_and_max_8.S~~
- ~~arm/sync_fetch_and_min_4.S~~
- ~~arm/sync_fetch_and_min_8.S~~
- ~~arm/sync_fetch_and_nand_4.S~~
- ~~arm/sync_fetch_and_nand_8.S~~
- ~~arm/sync_fetch_and_or_4.S~~
- ~~arm/sync_fetch_and_or_8.S~~
- ~~arm/sync_fetch_and_sub_4.S~~
- ~~arm/sync_fetch_and_sub_8.S~~
- ~~arm/sync_fetch_and_umax_4.S~~
- ~~arm/sync_fetch_and_umax_8.S~~
- ~~arm/sync_fetch_and_umin_4.S~~
- ~~arm/sync_fetch_and_umin_8.S~~
- ~~arm/sync_fetch_and_xor_4.S~~
- ~~arm/sync_fetch_and_xor_8.S~~
- ~~arm/sync_synchronize.S~~
- ~~atomic.c~~
- ~~atomic_flag_clear.c~~
- ~~atomic_flag_clear_explicit.c~~
- ~~atomic_flag_test_and_set.c~~
- ~~atomic_flag_test_and_set_explicit.c~~
- ~~atomic_signal_fence.c~~
- ~~atomic_thread_fence.c~~
Miscellaneous functionality that is not used by Rust.
- ~~aarch64/fp_mode.c~~
- ~~aarch64/lse.S~~ (LSE atomics)
- ~~aarch64/sme-abi-init.c~~ (matrix extension)
- ~~aarch64/sme-abi.S~~ (matrix extension)
- ~~aarch64/sme-libc-routines.c~~ (matrix extension)
- ~~apple_versioning.c~~
- ~~arm/fp_mode.c~~
- ~~avr/exit.S~~
- ~~clear_cache.c~~
- ~~cpu_model/aarch64.c~~
- ~~cpu_model/x86.c~~
- ~~crtbegin.c~~
- ~~crtend.c~~
- ~~emutls.c~~
- ~~enable_execute_stack.c~~
- ~~eprintf.c~~
- ~~fp_mode.c~~ (float exception handling)
- ~~gcc_personality_v0.c~~
- ~~i386/fp_mode.c~~
- ~~int_util.c~~
- ~~loongarch/fp_mode.c~~
- ~~os_version_check.c~~
- ~~riscv/fp_mode.c~~
- ~~riscv/restore.S~~ (callee-saved registers)
- ~~riscv/save.S~~ (callee-saved registers)
- ~~trampoline_setup.c~~
- ~~ve/grow_stack.S~~
- ~~ve/grow_stack_align.S~~
Floating-point implementations of builtins that are only called from soft-float code. It would be better to simply use the generic soft-float versions in this case.
- ~~i386/floatdidf.S~~
- ~~i386/floatdisf.S~~
- ~~i386/floatundidf.S~~
- ~~i386/floatundisf.S~~
- ~~x86_64/floatundidf.S~~
- ~~x86_64/floatundisf.S~~
- ~~x86_64/floatdidf.c~~
- ~~x86_64/floatdisf.c~~
Unsupported in any current target: used on old versions of 32-bit iOS with ARMv5.
- ~~arm/adddf3vfp.S~~
- ~~arm/addsf3vfp.S~~
- ~~arm/divdf3vfp.S~~
- ~~arm/divsf3vfp.S~~
- ~~arm/eqdf2vfp.S~~
- ~~arm/eqsf2vfp.S~~
- ~~arm/extendsfdf2vfp.S~~
- ~~arm/fixdfsivfp.S~~
- ~~arm/fixsfsivfp.S~~
- ~~arm/fixunsdfsivfp.S~~
- ~~arm/fixunssfsivfp.S~~
- ~~arm/floatsidfvfp.S~~
- ~~arm/floatsisfvfp.S~~
- ~~arm/floatunssidfvfp.S~~
- ~~arm/floatunssisfvfp.S~~
- ~~arm/gedf2vfp.S~~
- ~~arm/gesf2vfp.S~~
- ~~arm/gtdf2vfp.S~~
- ~~arm/gtsf2vfp.S~~
- ~~arm/ledf2vfp.S~~
- ~~arm/lesf2vfp.S~~
- ~~arm/ltdf2vfp.S~~
- ~~arm/ltsf2vfp.S~~
- ~~arm/muldf3vfp.S~~
- ~~arm/mulsf3vfp.S~~
- ~~arm/nedf2vfp.S~~
- ~~arm/negdf2vfp.S~~
- ~~arm/negsf2vfp.S~~
- ~~arm/nesf2vfp.S~~
- ~~arm/subdf3vfp.S~~
- ~~arm/subsf3vfp.S~~
- ~~arm/truncdfsf2vfp.S~~
- ~~arm/unorddf2vfp.S~~
- ~~arm/unordsf2vfp.S~~
## License
Usage is allowed under the [MIT License] and the [Apache License, Version 2.0]
with the LLVM exception.
[MIT License]: https://opensource.org/license/mit
[Apache License, Version 2.0]: htps://www.apache.org/licenses/LICENSE-2.0
### Contribution
Contributions are licensed under the MIT License, the Apache License,
Version 2.0, and the Apache-2.0 license with the LLVM exception.
See [LICENSE.txt](../LICENSE.txt) for full details.

View file

@ -0,0 +1,712 @@
mod configure;
use std::collections::BTreeMap;
use std::env;
use std::path::PathBuf;
use std::sync::atomic::Ordering;
use configure::{Target, configure_aliases, configure_f16_f128};
fn main() {
println!("cargo::rerun-if-changed=build.rs");
println!("cargo::rerun-if-changed=configure.rs");
let target = Target::from_env();
let cwd = env::current_dir().unwrap();
configure_check_cfg();
configure_f16_f128(&target);
configure_aliases(&target);
configure_libm(&target);
println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display());
// Emscripten's runtime includes all the builtins
if target.os == "emscripten" {
return;
}
// OpenBSD provides compiler_rt by default, use it instead of rebuilding it from source
if target.os == "openbsd" {
println!("cargo:rustc-link-search=native=/usr/lib");
println!("cargo:rustc-link-lib=compiler_rt");
return;
}
// Forcibly enable memory intrinsics on wasm & SGX as we don't have a libc to
// provide them.
if (target.triple.contains("wasm") && !target.triple.contains("wasi"))
|| (target.triple.contains("sgx") && target.triple.contains("fortanix"))
|| target.triple.contains("-none")
|| target.triple.contains("nvptx")
|| target.triple.contains("uefi")
|| target.triple.contains("xous")
{
println!("cargo:rustc-cfg=feature=\"mem\"");
}
// These targets have hardware unaligned access support.
println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))");
if target.arch.contains("x86_64")
|| target.arch.contains("x86")
|| target.arch.contains("aarch64")
|| target.arch.contains("bpf")
{
println!("cargo:rustc-cfg=feature=\"mem-unaligned\"");
}
// NOTE we are going to assume that llvm-target, what determines our codegen option, matches the
// target triple. This is usually correct for our built-in targets but can break in presence of
// custom targets, which can have arbitrary names.
let llvm_target = target.triple.split('-').collect::<Vec<_>>();
// Build missing intrinsics from compiler-rt C source code. If we're
// mangling names though we assume that we're also in test mode so we don't
// build anything and we rely on the upstream implementation of compiler-rt
// functions
if !cfg!(feature = "mangled-names") && cfg!(feature = "c") {
// Don't use a C compiler for these targets:
//
// * nvptx - everything is bitcode, not compatible with mixed C/Rust
if !target.arch.contains("nvptx") {
#[cfg(feature = "c")]
c::compile(&llvm_target, &target);
}
}
// Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This
// includes the old androideabi. It is deprecated but it is available as a
// rustc target (arm-linux-androideabi).
println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)");
if llvm_target[0] == "armv4t"
|| llvm_target[0] == "armv5te"
|| target.triple == "arm-linux-androideabi"
{
println!("cargo:rustc-cfg=kernel_user_helpers")
}
if llvm_target[0].starts_with("aarch64") {
generate_aarch64_outlined_atomics();
}
}
/// Run configuration for `libm` since it is included directly.
///
/// Much of this is copied from `libm/configure.rs`.
fn configure_libm(target: &Target) {
println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)");
println!("cargo:rustc-check-cfg=cfg(arch_enabled)");
println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable-public-internals\"))");
// Always use intrinsics
println!("cargo:rustc-cfg=intrinsics_enabled");
// The arch module may contain assembly.
if !cfg!(feature = "no-asm") {
println!("cargo:rustc-cfg=arch_enabled");
}
println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
if !matches!(target.opt_level.as_str(), "0" | "1") {
println!("cargo:rustc-cfg=optimizations_enabled");
}
// Config shorthands
println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") {
// Shorthand to detect i586 targets
println!("cargo:rustc-cfg=x86_no_sse");
}
println!(
"cargo:rustc-env=CFG_CARGO_FEATURES={:?}",
target.cargo_features
);
println!("cargo:rustc-env=CFG_OPT_LEVEL={}", target.opt_level);
println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", target.features);
// Activate libm's unstable features to make full use of Nightly.
println!("cargo:rustc-cfg=feature=\"unstable-intrinsics\"");
}
fn aarch64_symbol(ordering: Ordering) -> &'static str {
match ordering {
Ordering::Relaxed => "relax",
Ordering::Acquire => "acq",
Ordering::Release => "rel",
Ordering::AcqRel => "acq_rel",
_ => panic!("unknown symbol for {ordering:?}"),
}
}
/// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items.
/// Define them from the build script instead.
/// Note that the majority of the code is still defined in `aarch64.rs` through inline macros.
fn generate_aarch64_outlined_atomics() {
use std::fmt::Write;
// #[macro_export] so that we can use this in tests
let gen_macro =
|name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n");
// Generate different macros for add/clr/eor/set so that we can test them separately.
let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"];
let mut macros = BTreeMap::new();
for sym in sym_names {
macros.insert(sym, gen_macro(sym));
}
// Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
let mut cas16 = gen_macro("cas16");
for ordering in [
Ordering::Relaxed,
Ordering::Acquire,
Ordering::Release,
Ordering::AcqRel,
] {
let sym_ordering = aarch64_symbol(ordering);
for size in [1, 2, 4, 8] {
for (sym, macro_) in &mut macros {
let name = format!("__aarch64_{sym}{size}_{sym_ordering}");
writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap();
}
}
let name = format!("__aarch64_cas16_{sym_ordering}");
writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap();
}
let mut buf = String::new();
for macro_def in macros.values().chain(std::iter::once(&cas16)) {
buf += macro_def;
buf += "}; }\n";
}
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
std::fs::write(out_dir.join("outlined_atomics.rs"), buf).unwrap();
}
/// Emit directives for features we expect to support that aren't in `Cargo.toml`.
///
/// These are mostly cfg elements emitted by this `build.rs`.
fn configure_check_cfg() {
// Functions where we can set the "optimized-c" flag
const HAS_OPTIMIZED_C: &[&str] = &[
"__ashldi3",
"__ashlsi3",
"__ashrdi3",
"__ashrsi3",
"__bswapsi2",
"__bswapdi2",
"__bswapti2",
"__divdi3",
"__divsi3",
"__divmoddi4",
"__divmodsi4",
"__divmodsi4",
"__divmodti4",
"__lshrdi3",
"__lshrsi3",
"__moddi3",
"__modsi3",
"__muldi3",
"__udivdi3",
"__udivmoddi4",
"__udivmodsi4",
"__udivsi3",
"__umoddi3",
"__umodsi3",
];
// Build a list of all aarch64 atomic operation functions
let mut aarch_atomic = Vec::new();
for aarch_op in ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"] {
let op_sizes = if aarch_op == "cas" {
[1, 2, 4, 8, 16].as_slice()
} else {
[1, 2, 4, 8].as_slice()
};
for op_size in op_sizes {
for ordering in ["relax", "acq", "rel", "acq_rel"] {
aarch_atomic.push(format!("__aarch64_{aarch_op}{op_size}_{ordering}"));
}
}
}
for fn_name in HAS_OPTIMIZED_C
.iter()
.copied()
.chain(aarch_atomic.iter().map(|s| s.as_str()))
{
println!("cargo::rustc-check-cfg=cfg({fn_name}, values(\"optimized-c\"))",);
}
// Rustc is unaware of sparc target features, but this does show up from
// `rustc --print target-features --target sparc64-unknown-linux-gnu`.
println!("cargo::rustc-check-cfg=cfg(target_feature, values(\"vis3\"))");
// FIXME: these come from libm and should be changed there
println!("cargo::rustc-check-cfg=cfg(feature, values(\"checked\"))");
println!("cargo::rustc-check-cfg=cfg(assert_no_panic)");
}
#[cfg(feature = "c")]
mod c {
use std::collections::{BTreeMap, HashSet};
use std::env;
use std::fs::{self, File};
use std::io::Write;
use std::path::{Path, PathBuf};
use super::Target;
struct Sources {
// SYMBOL -> PATH TO SOURCE
map: BTreeMap<&'static str, &'static str>,
}
impl Sources {
fn new() -> Sources {
Sources {
map: BTreeMap::new(),
}
}
fn extend(&mut self, sources: &[(&'static str, &'static str)]) {
// NOTE Some intrinsics have both a generic implementation (e.g.
// `floatdidf.c`) and an arch optimized implementation
// (`x86_64/floatdidf.c`). In those cases, we keep the arch optimized
// implementation and discard the generic implementation. If we don't
// and keep both implementations, the linker will yell at us about
// duplicate symbols!
for (symbol, src) in sources {
if src.contains("/") {
// Arch-optimized implementation (preferred)
self.map.insert(symbol, src);
} else {
// Generic implementation
if !self.map.contains_key(symbol) {
self.map.insert(symbol, src);
}
}
}
}
fn remove(&mut self, symbols: &[&str]) {
for symbol in symbols {
self.map.remove(*symbol).unwrap();
}
}
}
/// Compile intrinsics from the compiler-rt C source code
pub fn compile(llvm_target: &[&str], target: &Target) {
let mut consider_float_intrinsics = true;
let cfg = &mut cc::Build::new();
// AArch64 GCCs exit with an error condition when they encounter any kind of floating point
// code if the `nofp` and/or `nosimd` compiler flags have been set.
//
// Therefore, evaluate if those flags are present and set a boolean that causes any
// compiler-rt intrinsics that contain floating point source to be excluded for this target.
if target.arch == "aarch64" {
let cflags_key = String::from("CFLAGS_") + &(target.triple.replace("-", "_"));
if let Ok(cflags_value) = env::var(cflags_key) {
if cflags_value.contains("+nofp") || cflags_value.contains("+nosimd") {
consider_float_intrinsics = false;
}
}
}
// `compiler-rt` requires `COMPILER_RT_HAS_FLOAT16` to be defined to make it use the
// `_Float16` type for `f16` intrinsics. This shouldn't matter as all existing `f16`
// intrinsics have been ported to Rust in `compiler-builtins` as C compilers don't
// support `_Float16` on all targets (whereas Rust does). However, define the macro
// anyway to prevent issues like rust#118813 and rust#123885 silently reoccuring if more
// `f16` intrinsics get accidentally added here in the future.
cfg.define("COMPILER_RT_HAS_FLOAT16", None);
cfg.warnings(false);
if target.env == "msvc" {
// Don't pull in extra libraries on MSVC
cfg.flag("/Zl");
// Emulate C99 and C++11's __func__ for MSVC prior to 2013 CTP
cfg.define("__func__", Some("__FUNCTION__"));
} else {
// Turn off various features of gcc and such, mostly copying
// compiler-rt's build system already
cfg.flag("-fno-builtin");
cfg.flag("-fvisibility=hidden");
cfg.flag("-ffreestanding");
// Avoid the following warning appearing once **per file**:
// clang: warning: optimization flag '-fomit-frame-pointer' is not supported for target 'armv7' [-Wignored-optimization-argument]
//
// Note that compiler-rt's build system also checks
//
// `check_cxx_compiler_flag(-fomit-frame-pointer COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG)`
//
// in https://github.com/rust-lang/compiler-rt/blob/c8fbcb3/cmake/config-ix.cmake#L19.
cfg.flag_if_supported("-fomit-frame-pointer");
cfg.define("VISIBILITY_HIDDEN", None);
if let "aarch64" | "arm64ec" = target.arch.as_str() {
// FIXME(llvm20): Older GCCs on A64 fail to build with
// -Werror=implicit-function-declaration due to a compiler-rt bug.
// With a newer LLVM we should be able to enable the flag everywhere.
// https://github.com/llvm/llvm-project/commit/8aa9d6206ce55bdaaf422839c351fbd63f033b89
} else {
// Avoid implicitly creating references to undefined functions
cfg.flag("-Werror=implicit-function-declaration");
}
}
// int_util.c tries to include stdlib.h if `_WIN32` is defined,
// which it is when compiling UEFI targets with clang. This is
// at odds with compiling with `-ffreestanding`, as the header
// may be incompatible or not present. Create a minimal stub
// header to use instead.
if target.os == "uefi" {
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
let include_dir = out_dir.join("include");
if !include_dir.exists() {
fs::create_dir(&include_dir).unwrap();
}
fs::write(include_dir.join("stdlib.h"), "#include <stddef.h>").unwrap();
cfg.flag(&format!("-I{}", include_dir.to_str().unwrap()));
}
let mut sources = Sources::new();
sources.extend(&[
("__absvdi2", "absvdi2.c"),
("__absvsi2", "absvsi2.c"),
("__addvdi3", "addvdi3.c"),
("__addvsi3", "addvsi3.c"),
("__cmpdi2", "cmpdi2.c"),
("__int_util", "int_util.c"),
("__mulvdi3", "mulvdi3.c"),
("__mulvsi3", "mulvsi3.c"),
("__negdi2", "negdi2.c"),
("__negvdi2", "negvdi2.c"),
("__negvsi2", "negvsi2.c"),
("__paritydi2", "paritydi2.c"),
("__paritysi2", "paritysi2.c"),
("__popcountdi2", "popcountdi2.c"),
("__popcountsi2", "popcountsi2.c"),
("__subvdi3", "subvdi3.c"),
("__subvsi3", "subvsi3.c"),
("__ucmpdi2", "ucmpdi2.c"),
]);
if consider_float_intrinsics {
sources.extend(&[
("__divdc3", "divdc3.c"),
("__divsc3", "divsc3.c"),
("__muldc3", "muldc3.c"),
("__mulsc3", "mulsc3.c"),
("__negdf2", "negdf2.c"),
("__negsf2", "negsf2.c"),
]);
}
// On iOS and 32-bit OSX these are all just empty intrinsics, no need to
// include them.
if target.vendor != "apple" || target.arch != "x86" {
sources.extend(&[
("__absvti2", "absvti2.c"),
("__addvti3", "addvti3.c"),
("__cmpti2", "cmpti2.c"),
("__ffsti2", "ffsti2.c"),
("__mulvti3", "mulvti3.c"),
("__negti2", "negti2.c"),
("__parityti2", "parityti2.c"),
("__popcountti2", "popcountti2.c"),
("__subvti3", "subvti3.c"),
("__ucmpti2", "ucmpti2.c"),
]);
if consider_float_intrinsics {
sources.extend(&[("__negvti2", "negvti2.c")]);
}
}
if target.vendor == "apple" {
sources.extend(&[
("atomic_flag_clear", "atomic_flag_clear.c"),
("atomic_flag_clear_explicit", "atomic_flag_clear_explicit.c"),
("atomic_flag_test_and_set", "atomic_flag_test_and_set.c"),
(
"atomic_flag_test_and_set_explicit",
"atomic_flag_test_and_set_explicit.c",
),
("atomic_signal_fence", "atomic_signal_fence.c"),
("atomic_thread_fence", "atomic_thread_fence.c"),
]);
}
if target.env != "msvc" {
if target.arch == "x86" {
sources.extend(&[
("__ashldi3", "i386/ashldi3.S"),
("__ashrdi3", "i386/ashrdi3.S"),
("__divdi3", "i386/divdi3.S"),
("__lshrdi3", "i386/lshrdi3.S"),
("__moddi3", "i386/moddi3.S"),
("__muldi3", "i386/muldi3.S"),
("__udivdi3", "i386/udivdi3.S"),
("__umoddi3", "i386/umoddi3.S"),
]);
}
}
if target.arch == "arm" && target.vendor != "apple" && target.env != "msvc" {
sources.extend(&[
("__aeabi_div0", "arm/aeabi_div0.c"),
("__aeabi_drsub", "arm/aeabi_drsub.c"),
("__aeabi_frsub", "arm/aeabi_frsub.c"),
("__bswapdi2", "arm/bswapdi2.S"),
("__bswapsi2", "arm/bswapsi2.S"),
("__divmodsi4", "arm/divmodsi4.S"),
("__divsi3", "arm/divsi3.S"),
("__modsi3", "arm/modsi3.S"),
("__switch16", "arm/switch16.S"),
("__switch32", "arm/switch32.S"),
("__switch8", "arm/switch8.S"),
("__switchu8", "arm/switchu8.S"),
("__sync_synchronize", "arm/sync_synchronize.S"),
("__udivmodsi4", "arm/udivmodsi4.S"),
("__udivsi3", "arm/udivsi3.S"),
("__umodsi3", "arm/umodsi3.S"),
]);
if target.os == "freebsd" {
sources.extend(&[("__clear_cache", "clear_cache.c")]);
}
// First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM.
// Second are little-endian only, so build fail on big-endian targets.
// Temporally workaround: exclude these files for big-endian targets.
if !llvm_target[0].starts_with("thumbeb") && !llvm_target[0].starts_with("armeb") {
sources.extend(&[
("__aeabi_cdcmp", "arm/aeabi_cdcmp.S"),
("__aeabi_cdcmpeq_check_nan", "arm/aeabi_cdcmpeq_check_nan.c"),
("__aeabi_cfcmp", "arm/aeabi_cfcmp.S"),
("__aeabi_cfcmpeq_check_nan", "arm/aeabi_cfcmpeq_check_nan.c"),
]);
}
}
if llvm_target[0] == "armv7" {
sources.extend(&[
("__sync_fetch_and_add_4", "arm/sync_fetch_and_add_4.S"),
("__sync_fetch_and_add_8", "arm/sync_fetch_and_add_8.S"),
("__sync_fetch_and_and_4", "arm/sync_fetch_and_and_4.S"),
("__sync_fetch_and_and_8", "arm/sync_fetch_and_and_8.S"),
("__sync_fetch_and_max_4", "arm/sync_fetch_and_max_4.S"),
("__sync_fetch_and_max_8", "arm/sync_fetch_and_max_8.S"),
("__sync_fetch_and_min_4", "arm/sync_fetch_and_min_4.S"),
("__sync_fetch_and_min_8", "arm/sync_fetch_and_min_8.S"),
("__sync_fetch_and_nand_4", "arm/sync_fetch_and_nand_4.S"),
("__sync_fetch_and_nand_8", "arm/sync_fetch_and_nand_8.S"),
("__sync_fetch_and_or_4", "arm/sync_fetch_and_or_4.S"),
("__sync_fetch_and_or_8", "arm/sync_fetch_and_or_8.S"),
("__sync_fetch_and_sub_4", "arm/sync_fetch_and_sub_4.S"),
("__sync_fetch_and_sub_8", "arm/sync_fetch_and_sub_8.S"),
("__sync_fetch_and_umax_4", "arm/sync_fetch_and_umax_4.S"),
("__sync_fetch_and_umax_8", "arm/sync_fetch_and_umax_8.S"),
("__sync_fetch_and_umin_4", "arm/sync_fetch_and_umin_4.S"),
("__sync_fetch_and_umin_8", "arm/sync_fetch_and_umin_8.S"),
("__sync_fetch_and_xor_4", "arm/sync_fetch_and_xor_4.S"),
("__sync_fetch_and_xor_8", "arm/sync_fetch_and_xor_8.S"),
]);
}
if llvm_target.last().unwrap().ends_with("eabihf") {
if !llvm_target[0].starts_with("thumbv7em")
&& !llvm_target[0].starts_with("thumbv8m.main")
{
// The FPU option chosen for these architectures in cc-rs, ie:
// -mfpu=fpv4-sp-d16 for thumbv7em
// -mfpu=fpv5-sp-d16 for thumbv8m.main
// do not support double precision floating points conversions so the files
// that include such instructions are not included for these targets.
sources.extend(&[
("__fixdfsivfp", "arm/fixdfsivfp.S"),
("__fixunsdfsivfp", "arm/fixunsdfsivfp.S"),
("__floatsidfvfp", "arm/floatsidfvfp.S"),
("__floatunssidfvfp", "arm/floatunssidfvfp.S"),
]);
}
sources.extend(&[
("__fixsfsivfp", "arm/fixsfsivfp.S"),
("__fixunssfsivfp", "arm/fixunssfsivfp.S"),
("__floatsisfvfp", "arm/floatsisfvfp.S"),
("__floatunssisfvfp", "arm/floatunssisfvfp.S"),
("__floatunssisfvfp", "arm/floatunssisfvfp.S"),
("__restore_vfp_d8_d15_regs", "arm/restore_vfp_d8_d15_regs.S"),
("__save_vfp_d8_d15_regs", "arm/save_vfp_d8_d15_regs.S"),
("__negdf2vfp", "arm/negdf2vfp.S"),
("__negsf2vfp", "arm/negsf2vfp.S"),
]);
}
if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics {
sources.extend(&[
("__comparetf2", "comparetf2.c"),
("__fe_getround", "fp_mode.c"),
("__fe_raise_inexact", "fp_mode.c"),
]);
if target.os != "windows" && target.os != "cygwin" {
sources.extend(&[("__multc3", "multc3.c")]);
}
}
if target.arch == "mips" || target.arch == "riscv32" || target.arch == "riscv64" {
sources.extend(&[("__bswapsi2", "bswapsi2.c")]);
}
if target.arch == "mips64" {
sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
}
if target.arch == "loongarch64" {
sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
}
// Remove the assembly implementations that won't compile for the target
if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target.os == "uefi"
{
let mut to_remove = Vec::new();
for (k, v) in sources.map.iter() {
if v.ends_with(".S") {
to_remove.push(*k);
}
}
sources.remove(&to_remove);
}
if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" {
sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]);
}
// Android and Cygwin uses emulated TLS so we need a runtime support function.
if target.os == "android" || target.os == "cygwin" {
sources.extend(&[("__emutls_get_address", "emutls.c")]);
}
// Work around a bug in the NDK headers (fixed in
// https://r.android.com/2038949 which will be released in a future
// NDK version) by providing a definition of LONG_BIT.
if target.os == "android" {
cfg.define("LONG_BIT", "(8 * sizeof(long))");
}
// OpenHarmony also uses emulated TLS.
if target.env == "ohos" {
sources.extend(&[("__emutls_get_address", "emutls.c")]);
}
// When compiling the C code we require the user to tell us where the
// source code is, and this is largely done so when we're compiling as
// part of rust-lang/rust we can use the same llvm-project repository as
// rust-lang/rust.
let root = match env::var_os("RUST_COMPILER_RT_ROOT") {
Some(s) => PathBuf::from(s),
None => {
panic!(
"RUST_COMPILER_RT_ROOT is not set. You may need to run \
`ci/download-compiler-rt.sh`."
);
}
};
if !root.exists() {
panic!("RUST_COMPILER_RT_ROOT={} does not exist", root.display());
}
// Support deterministic builds by remapping the __FILE__ prefix if the
// compiler supports it. This fixes the nondeterminism caused by the
// use of that macro in lib/builtins/int_util.h in compiler-rt.
cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display()));
// Include out-of-line atomics for aarch64, which are all generated by supplying different
// sets of flags to the same source file.
// Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430) and
// on uefi.
let src_dir = root.join("lib/builtins");
if target.arch == "aarch64" && target.env != "msvc" && target.os != "uefi" {
// See below for why we're building these as separate libraries.
build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg);
// Some run-time CPU feature detection is necessary, as well.
let cpu_model_src = if src_dir.join("cpu_model.c").exists() {
"cpu_model.c"
} else {
"cpu_model/aarch64.c"
};
sources.extend(&[("__aarch64_have_lse_atomics", cpu_model_src)]);
}
let mut added_sources = HashSet::new();
for (sym, src) in sources.map.iter() {
let src = src_dir.join(src);
if added_sources.insert(src.clone()) {
cfg.file(&src);
println!("cargo:rerun-if-changed={}", src.display());
}
println!("cargo:rustc-cfg={}=\"optimized-c\"", sym);
}
cfg.compile("libcompiler-rt.a");
}
fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &mut cc::Build) {
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
let outlined_atomics_file = builtins_dir.join("aarch64").join("lse.S");
println!("cargo:rerun-if-changed={}", outlined_atomics_file.display());
cfg.include(&builtins_dir);
for instruction_type in &["cas", "swp", "ldadd", "ldclr", "ldeor", "ldset"] {
for size in &[1, 2, 4, 8, 16] {
if *size == 16 && *instruction_type != "cas" {
continue;
}
for (model_number, model_name) in
&[(1, "relax"), (2, "acq"), (3, "rel"), (4, "acq_rel")]
{
// The original compiler-rt build system compiles the same
// source file multiple times with different compiler
// options. Here we do something slightly different: we
// create multiple .S files with the proper #defines and
// then include the original file.
//
// This is needed because the cc crate doesn't allow us to
// override the name of object files and libtool requires
// all objects in an archive to have unique names.
let path =
out_dir.join(format!("lse_{}{}_{}.S", instruction_type, size, model_name));
let mut file = File::create(&path).unwrap();
writeln!(file, "#define L_{}", instruction_type).unwrap();
writeln!(file, "#define SIZE {}", size).unwrap();
writeln!(file, "#define MODEL {}", model_number).unwrap();
writeln!(
file,
"#include \"{}\"",
outlined_atomics_file.canonicalize().unwrap().display()
)
.unwrap();
drop(file);
cfg.file(path);
let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name);
println!("cargo:rustc-cfg={}=\"optimized-c\"", sym);
}
}
}
}
}

View file

@ -0,0 +1,136 @@
// Configuration that is shared between `compiler_builtins` and `builtins_test`.
use std::env;
#[derive(Debug)]
#[allow(dead_code)]
pub struct Target {
pub triple: String,
pub triple_split: Vec<String>,
pub opt_level: String,
pub cargo_features: Vec<String>,
pub os: String,
pub arch: String,
pub vendor: String,
pub env: String,
pub pointer_width: u8,
pub little_endian: bool,
pub features: Vec<String>,
}
impl Target {
pub fn from_env() -> Self {
let triple = env::var("TARGET").unwrap();
let triple_split = triple.split('-').map(ToOwned::to_owned).collect();
let little_endian = match env::var("CARGO_CFG_TARGET_ENDIAN").unwrap().as_str() {
"little" => true,
"big" => false,
x => panic!("unknown endian {x}"),
};
let cargo_features = env::vars()
.filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned))
.map(|s| s.to_lowercase().replace("_", "-"))
.collect();
Self {
triple,
triple_split,
os: env::var("CARGO_CFG_TARGET_OS").unwrap(),
opt_level: env::var("OPT_LEVEL").unwrap(),
cargo_features,
arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
pointer_width: env::var("CARGO_CFG_TARGET_POINTER_WIDTH")
.unwrap()
.parse()
.unwrap(),
little_endian,
features: env::var("CARGO_CFG_TARGET_FEATURE")
.unwrap_or_default()
.split(",")
.map(ToOwned::to_owned)
.collect(),
}
}
#[allow(dead_code)]
pub fn has_feature(&self, feature: &str) -> bool {
self.features.iter().any(|f| f == feature)
}
}
pub fn configure_aliases(target: &Target) {
// To compile builtins-test-intrinsics for thumb targets, where there is no libc
println!("cargo::rustc-check-cfg=cfg(thumb)");
if target.triple_split[0].starts_with("thumb") {
println!("cargo:rustc-cfg=thumb")
}
// compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because
// these targets do not have full Thumb-2 support but only original Thumb-1.
// We have to cfg our code accordingly.
println!("cargo::rustc-check-cfg=cfg(thumb_1)");
if target.triple_split[0] == "thumbv6m" || target.triple_split[0] == "thumbv8m.base" {
println!("cargo:rustc-cfg=thumb_1")
}
}
/// Configure whether or not `f16` and `f128` support should be enabled.
pub fn configure_f16_f128(target: &Target) {
// Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means
// that the backend will not crash when using these types and generates code that can be called
// without crashing (no infinite recursion). This does not mean that the platform doesn't have
// ABI or other bugs.
//
// We do this here rather than in `rust-lang/rust` because configuring via cargo features is
// not straightforward.
//
// Original source of this list:
// <https://github.com/rust-lang/compiler-builtins/pull/652#issuecomment-2266151350>
let f16_enabled = match target.arch.as_str() {
// Unsupported <https://github.com/llvm/llvm-project/issues/94434>
"arm64ec" => false,
// Selection failure <https://github.com/llvm/llvm-project/issues/50374>
"s390x" => false,
// Infinite recursion <https://github.com/llvm/llvm-project/issues/97981>
"csky" => false,
"hexagon" => false,
"powerpc" | "powerpc64" => false,
"sparc" | "sparc64" => false,
"wasm32" | "wasm64" => false,
// Most everything else works as of LLVM 19
_ => true,
};
let f128_enabled = match target.arch.as_str() {
// Unsupported (libcall is not supported) <https://github.com/llvm/llvm-project/issues/121122>
"amdgpu" => false,
// Unsupported <https://github.com/llvm/llvm-project/issues/94434>
"arm64ec" => false,
// FIXME(llvm20): fixed by <https://github.com/llvm/llvm-project/pull/117525>
"mips64" | "mips64r6" => false,
// Selection failure <https://github.com/llvm/llvm-project/issues/95471>
"nvptx64" => false,
// Selection failure <https://github.com/llvm/llvm-project/issues/101545>
"powerpc64" if &target.os == "aix" => false,
// Selection failure <https://github.com/llvm/llvm-project/issues/41838>
"sparc" => false,
// Most everything else works as of LLVM 19
_ => true,
};
// If the feature is set, disable these types.
let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some();
println!("cargo::rustc-check-cfg=cfg(f16_enabled)");
println!("cargo::rustc-check-cfg=cfg(f128_enabled)");
if f16_enabled && !disable_both {
println!("cargo::rustc-cfg=f16_enabled");
}
if f128_enabled && !disable_both {
println!("cargo::rustc-cfg=f128_enabled");
}
}

View file

@ -0,0 +1,21 @@
#![allow(unused_imports)]
use core::intrinsics;
intrinsics! {
#[unsafe(naked)]
#[cfg(all(target_os = "uefi", not(feature = "no-asm")))]
pub unsafe extern "C" fn __chkstk() {
core::arch::naked_asm!(
".p2align 2",
"lsl x16, x15, #4",
"mov x17, sp",
"1:",
"sub x17, x17, 4096",
"subs x16, x16, 4096",
"ldr xzr, [x17]",
"b.gt 1b",
"ret",
);
}
}

View file

@ -0,0 +1,273 @@
//! Aarch64 targets have two possible implementations for atomics:
//! 1. Load-Locked, Store-Conditional (LL/SC), older and slower.
//! 2. Large System Extensions (LSE), newer and faster.
//! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics",
//! where atomic operations call into the compiler runtime to dispatch between two depending on
//! which is supported on the current CPU.
//! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion.
//!
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
//! Use the `compiler-rt` intrinsics if you want LSE support.
//!
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
//!
//! Generate functions for each of the following symbols:
//! __aarch64_casM_ORDER
//! __aarch64_swpN_ORDER
//! __aarch64_ldaddN_ORDER
//! __aarch64_ldclrN_ORDER
//! __aarch64_ldeorN_ORDER
//! __aarch64_ldsetN_ORDER
//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel }
//!
//! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants.
//! We do something similar, but with macro arguments.
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
/// Translate a byte size to a Rust type.
#[rustfmt::skip]
macro_rules! int_ty {
(1) => { i8 };
(2) => { i16 };
(4) => { i32 };
(8) => { i64 };
(16) => { i128 };
}
/// Given a byte size and a register number, return a register of the appropriate size.
///
/// See <https://developer.arm.com/documentation/102374/0101/Registers-in-AArch64---general-purpose-registers>.
#[rustfmt::skip]
macro_rules! reg {
(1, $num:literal) => { concat!("w", $num) };
(2, $num:literal) => { concat!("w", $num) };
(4, $num:literal) => { concat!("w", $num) };
(8, $num:literal) => { concat!("x", $num) };
}
/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction.
#[rustfmt::skip]
macro_rules! acquire {
(Relaxed) => { "" };
(Acquire) => { "a" };
(Release) => { "" };
(AcqRel) => { "a" };
}
/// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction.
#[rustfmt::skip]
macro_rules! release {
(Relaxed) => { "" };
(Acquire) => { "" };
(Release) => { "l" };
(AcqRel) => { "l" };
}
/// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction.
#[rustfmt::skip]
macro_rules! size {
(1) => { "b" };
(2) => { "h" };
(4) => { "" };
(8) => { "" };
(16) => { "" };
}
/// Given a byte size, translate it to an Unsigned eXTend instruction
/// with the correct semantics.
///
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM->
#[rustfmt::skip]
macro_rules! uxt {
(1) => { "uxtb" };
(2) => { "uxth" };
($_:tt) => { "mov" };
}
/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Register instruction
/// with the correct semantics.
///
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXR--Load-Exclusive-Register->.
macro_rules! ldxr {
($ordering:ident, $bytes:tt) => {
concat!("ld", acquire!($ordering), "xr", size!($bytes))
};
}
/// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction
/// with the correct semantics.
///
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXR--Store-Exclusive-Register->.
macro_rules! stxr {
($ordering:ident, $bytes:tt) => {
concat!("st", release!($ordering), "xr", size!($bytes))
};
}
/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction
/// with the correct semantics.
///
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers->
macro_rules! ldxp {
($ordering:ident) => {
concat!("ld", acquire!($ordering), "xp")
};
}
/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction
/// with the correct semantics.
///
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers->.
macro_rules! stxp {
($ordering:ident) => {
concat!("st", release!($ordering), "xp")
};
}
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
macro_rules! compare_and_swap {
($ordering:ident, $bytes:tt, $name:ident) => {
intrinsics! {
#[maybe_use_optimized_c_shim]
#[unsafe(naked)]
pub unsafe extern "C" fn $name (
expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes)
) -> int_ty!($bytes) {
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
core::arch::naked_asm! {
// UXT s(tmp0), s(0)
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
"0:",
// LDXR s(0), [x2]
concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x2]"),
// cmp s(0), s(tmp0)
concat!("cmp ", reg!($bytes, 0), ", ", reg!($bytes, 16)),
"bne 1f",
// STXR w(tmp1), s(1), [x2]
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 1), ", [x2]"),
"cbnz w17, 0b",
"1:",
"ret",
}
}
}
};
}
// i128 uses a completely different impl, so it has its own macro.
macro_rules! compare_and_swap_i128 {
($ordering:ident, $name:ident) => {
intrinsics! {
#[maybe_use_optimized_c_shim]
#[unsafe(naked)]
pub unsafe extern "C" fn $name (
expected: i128, desired: i128, ptr: *mut i128
) -> i128 {
core::arch::naked_asm! {
"mov x16, x0",
"mov x17, x1",
"0:",
// LDXP x0, x1, [x4]
concat!(ldxp!($ordering), " x0, x1, [x4]"),
"cmp x0, x16",
"ccmp x1, x17, #0, eq",
"bne 1f",
// STXP w(tmp2), x2, x3, [x4]
concat!(stxp!($ordering), " w15, x2, x3, [x4]"),
"cbnz w15, 0b",
"1:",
"ret",
}
}
}
};
}
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.swap>.
macro_rules! swap {
($ordering:ident, $bytes:tt, $name:ident) => {
intrinsics! {
#[maybe_use_optimized_c_shim]
#[unsafe(naked)]
pub unsafe extern "C" fn $name (
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
) -> int_ty!($bytes) {
core::arch::naked_asm! {
// mov s(tmp0), s(0)
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
"0:",
// LDXR s(0), [x1]
concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"),
// STXR w(tmp1), s(tmp0), [x1]
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
"cbnz w17, 0b",
"ret",
}
}
}
};
}
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
macro_rules! fetch_op {
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
intrinsics! {
#[maybe_use_optimized_c_shim]
#[unsafe(naked)]
pub unsafe extern "C" fn $name (
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
) -> int_ty!($bytes) {
core::arch::naked_asm! {
// mov s(tmp0), s(0)
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
"0:",
// LDXR s(0), [x1]
concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"),
// OP s(tmp1), s(0), s(tmp0)
concat!($op, " ", reg!($bytes, 17), ", ", reg!($bytes, 0), ", ", reg!($bytes, 16)),
// STXR w(tmp2), s(tmp1), [x1]
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
"cbnz w15, 0b",
"ret",
}
}
}
}
}
// We need a single macro to pass to `foreach_ldadd`.
macro_rules! add {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "add" }
};
}
macro_rules! and {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "bic" }
};
}
macro_rules! xor {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "eor" }
};
}
macro_rules! or {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "orr" }
};
}
// See `generate_aarch64_outlined_atomics` in build.rs.
include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs"));
foreach_cas!(compare_and_swap);
foreach_cas16!(compare_and_swap_i128);
foreach_swp!(swap);
foreach_ldadd!(add);
foreach_ldclr!(and);
foreach_ldeor!(xor);
foreach_ldset!(or);

View file

@ -0,0 +1,280 @@
#![cfg(not(feature = "no-asm"))]
// Interfaces used by naked trampolines.
extern "C" {
fn __udivmodsi4(a: u32, b: u32, rem: *mut u32) -> u32;
fn __udivmoddi4(a: u64, b: u64, rem: *mut u64) -> u64;
fn __divmoddi4(a: i64, b: i64, rem: *mut i64) -> i64;
}
extern "aapcs" {
// AAPCS is not always the correct ABI for these intrinsics, but we only use this to
// forward another `__aeabi_` call so it doesn't matter.
fn __aeabi_idiv(a: i32, b: i32) -> i32;
}
intrinsics! {
// NOTE This function and the ones below are implemented using assembly because they are using a
// custom calling convention which can't be implemented using a normal Rust function.
#[unsafe(naked)]
#[cfg(not(target_env = "msvc"))]
pub unsafe extern "C" fn __aeabi_uidivmod() {
core::arch::naked_asm!(
"push {{lr}}",
"sub sp, sp, #4",
"mov r2, sp",
"bl {trampoline}",
"ldr r1, [sp]",
"add sp, sp, #4",
"pop {{pc}}",
trampoline = sym crate::arm::__udivmodsi4
);
}
#[unsafe(naked)]
pub unsafe extern "C" fn __aeabi_uldivmod() {
core::arch::naked_asm!(
"push {{r4, lr}}",
"sub sp, sp, #16",
"add r4, sp, #8",
"str r4, [sp]",
"bl {trampoline}",
"ldr r2, [sp, #8]",
"ldr r3, [sp, #12]",
"add sp, sp, #16",
"pop {{r4, pc}}",
trampoline = sym crate::arm::__udivmoddi4
);
}
#[unsafe(naked)]
pub unsafe extern "C" fn __aeabi_idivmod() {
core::arch::naked_asm!(
"push {{r0, r1, r4, lr}}",
"bl {trampoline}",
"pop {{r1, r2}}",
"muls r2, r2, r0",
"subs r1, r1, r2",
"pop {{r4, pc}}",
trampoline = sym crate::arm::__aeabi_idiv,
);
}
#[unsafe(naked)]
pub unsafe extern "C" fn __aeabi_ldivmod() {
core::arch::naked_asm!(
"push {{r4, lr}}",
"sub sp, sp, #16",
"add r4, sp, #8",
"str r4, [sp]",
"bl {trampoline}",
"ldr r2, [sp, #8]",
"ldr r3, [sp, #12]",
"add sp, sp, #16",
"pop {{r4, pc}}",
trampoline = sym crate::arm::__divmoddi4,
);
}
// FIXME(arm): The `*4` and `*8` variants should be defined as aliases.
/// `memcpy` provided with the `aapcs` ABI.
///
/// # Safety
///
/// Usual `memcpy` requirements apply.
#[cfg(not(target_vendor = "apple"))]
pub unsafe extern "aapcs" fn __aeabi_memcpy(dst: *mut u8, src: *const u8, n: usize) {
// SAFETY: memcpy preconditions apply.
unsafe { crate::mem::memcpy(dst, src, n) };
}
/// `memcpy` for 4-byte alignment.
///
/// # Safety
///
/// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to
/// four bytes.
#[cfg(not(target_vendor = "apple"))]
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dst: *mut u8, src: *const u8, n: usize) {
// We are guaranteed 4-alignment, so accessing at u32 is okay.
let mut dst = dst.cast::<u32>();
let mut src = src.cast::<u32>();
debug_assert!(dst.is_aligned());
debug_assert!(src.is_aligned());
let mut n = n;
while n >= 4 {
// SAFETY: `dst` and `src` are both valid for at least 4 bytes, from
// `memcpy` preconditions and the loop guard.
unsafe { *dst = *src };
// FIXME(addr): if we can make this end-of-address-space safe without losing
// performance, we may want to consider that.
// SAFETY: memcpy is not expected to work at the end of the address space
unsafe {
dst = dst.offset(1);
src = src.offset(1);
}
n -= 4;
}
// SAFETY: `dst` and `src` will still be valid for `n` bytes
unsafe { __aeabi_memcpy(dst.cast::<u8>(), src.cast::<u8>(), n) };
}
/// `memcpy` for 8-byte alignment.
///
/// # Safety
///
/// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to
/// eight bytes.
#[cfg(not(target_vendor = "apple"))]
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dst: *mut u8, src: *const u8, n: usize) {
debug_assert!(dst.addr() & 7 == 0);
debug_assert!(src.addr() & 7 == 0);
// SAFETY: memcpy preconditions apply, less strict alignment.
unsafe { __aeabi_memcpy4(dst, src, n) };
}
/// `memmove` provided with the `aapcs` ABI.
///
/// # Safety
///
/// Usual `memmove` requirements apply.
#[cfg(not(target_vendor = "apple"))]
pub unsafe extern "aapcs" fn __aeabi_memmove(dst: *mut u8, src: *const u8, n: usize) {
// SAFETY: memmove preconditions apply.
unsafe { crate::mem::memmove(dst, src, n) };
}
/// `memmove` for 4-byte alignment.
///
/// # Safety
///
/// Usual `memmove` requirements apply. Additionally, `dest` and `src` must be aligned to
/// four bytes.
#[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
pub unsafe extern "aapcs" fn __aeabi_memmove4(dst: *mut u8, src: *const u8, n: usize) {
debug_assert!(dst.addr() & 3 == 0);
debug_assert!(src.addr() & 3 == 0);
// SAFETY: same preconditions, less strict aligment.
unsafe { __aeabi_memmove(dst, src, n) };
}
/// `memmove` for 8-byte alignment.
///
/// # Safety
///
/// Usual `memmove` requirements apply. Additionally, `dst` and `src` must be aligned to
/// eight bytes.
#[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
pub unsafe extern "aapcs" fn __aeabi_memmove8(dst: *mut u8, src: *const u8, n: usize) {
debug_assert!(dst.addr() & 7 == 0);
debug_assert!(src.addr() & 7 == 0);
// SAFETY: memmove preconditions apply, less strict alignment.
unsafe { __aeabi_memmove(dst, src, n) };
}
/// `memset` provided with the `aapcs` ABI.
///
/// # Safety
///
/// Usual `memset` requirements apply.
#[cfg(not(target_vendor = "apple"))]
pub unsafe extern "aapcs" fn __aeabi_memset(dst: *mut u8, n: usize, c: i32) {
// Note the different argument order
// SAFETY: memset preconditions apply.
unsafe { crate::mem::memset(dst, c, n) };
}
/// `memset` for 4-byte alignment.
///
/// # Safety
///
/// Usual `memset` requirements apply. Additionally, `dest` and `src` must be aligned to
/// four bytes.
#[cfg(not(target_vendor = "apple"))]
pub unsafe extern "aapcs" fn __aeabi_memset4(dst: *mut u8, n: usize, c: i32) {
let mut dst = dst.cast::<u32>();
debug_assert!(dst.is_aligned());
let mut n = n;
let byte = (c as u32) & 0xff;
let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;
while n >= 4 {
// SAFETY: `dst` is valid for at least 4 bytes, from `memset` preconditions and
// the loop guard.
unsafe { *dst = c };
// FIXME(addr): if we can make this end-of-address-space safe without losing
// performance, we may want to consider that.
// SAFETY: memcpy is not expected to work at the end of the address space
unsafe {
dst = dst.offset(1);
}
n -= 4;
}
// SAFETY: `dst` will still be valid for `n` bytes
unsafe { __aeabi_memset(dst.cast::<u8>(), n, byte as i32) };
}
/// `memset` for 8-byte alignment.
///
/// # Safety
///
/// Usual `memset` requirements apply. Additionally, `dst` and `src` must be aligned to
/// eight bytes.
#[cfg(not(target_vendor = "apple"))]
pub unsafe extern "aapcs" fn __aeabi_memset8(dst: *mut u8, n: usize, c: i32) {
debug_assert!(dst.addr() & 7 == 0);
// SAFETY: memset preconditions apply, less strict alignment.
unsafe { __aeabi_memset4(dst, n, c) };
}
/// `memclr` provided with the `aapcs` ABI.
///
/// # Safety
///
/// Usual `memclr` requirements apply.
#[cfg(not(target_vendor = "apple"))]
pub unsafe extern "aapcs" fn __aeabi_memclr(dst: *mut u8, n: usize) {
// SAFETY: memclr preconditions apply, less strict alignment.
unsafe { __aeabi_memset(dst, n, 0) };
}
/// `memclr` for 4-byte alignment.
///
/// # Safety
///
/// Usual `memclr` requirements apply. Additionally, `dest` and `src` must be aligned to
/// four bytes.
#[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
pub unsafe extern "aapcs" fn __aeabi_memclr4(dst: *mut u8, n: usize) {
debug_assert!(dst.addr() & 3 == 0);
// SAFETY: memclr preconditions apply, less strict alignment.
unsafe { __aeabi_memset4(dst, n, 0) };
}
/// `memclr` for 8-byte alignment.
///
/// # Safety
///
/// Usual `memclr` requirements apply. Additionally, `dst` and `src` must be aligned to
/// eight bytes.
#[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
pub unsafe extern "aapcs" fn __aeabi_memclr8(dst: *mut u8, n: usize) {
debug_assert!(dst.addr() & 7 == 0);
// SAFETY: memclr preconditions apply, less strict alignment.
unsafe { __aeabi_memset4(dst, n, 0) };
}
}

View file

@ -0,0 +1,290 @@
use core::sync::atomic::{AtomicU32, Ordering};
use core::{arch, mem};
// Kernel-provided user-mode helper functions:
// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool {
let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ());
f(oldval, newval, ptr) == 0
}
unsafe fn __kuser_memory_barrier() {
let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ());
f();
}
// Word-align a pointer
fn align_ptr<T>(ptr: *mut T) -> *mut u32 {
// This gives us a mask of 0 when T == u32 since the pointer is already
// supposed to be aligned, which avoids any masking in that case.
let ptr_mask = 3 & (4 - mem::size_of::<T>());
(ptr as usize & !ptr_mask) as *mut u32
}
// Calculate the shift and mask of a value inside an aligned word
fn get_shift_mask<T>(ptr: *mut T) -> (u32, u32) {
// Mask to get the low byte/halfword/word
let mask = match mem::size_of::<T>() {
1 => 0xff,
2 => 0xffff,
4 => 0xffffffff,
_ => unreachable!(),
};
// If we are on big-endian then we need to adjust the shift accordingly
let endian_adjust = if cfg!(target_endian = "little") {
0
} else {
4 - mem::size_of::<T>() as u32
};
// Shift to get the desired element in the word
let ptr_mask = 3 & (4 - mem::size_of::<T>());
let shift = ((ptr as usize & ptr_mask) as u32 ^ endian_adjust) * 8;
(shift, mask)
}
// Extract a value from an aligned word
fn extract_aligned(aligned: u32, shift: u32, mask: u32) -> u32 {
(aligned >> shift) & mask
}
// Insert a value into an aligned word
fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 {
(aligned & !(mask << shift)) | ((val & mask) << shift)
}
/// Performs a relaxed atomic load of 4 bytes at `ptr`. Some of the bytes are allowed to be out of
/// bounds as long as `size_of::<T>()` bytes are in bounds.
///
/// # Safety
///
/// - `ptr` must be 4-aligned.
/// - `size_of::<T>()` must be at most 4.
/// - if `size_of::<T>() == 1`, `ptr` or `ptr` offset by 1, 2 or 3 bytes must be valid for a relaxed
/// atomic read of 1 byte.
/// - if `size_of::<T>() == 2`, `ptr` or `ptr` offset by 2 bytes must be valid for a relaxed atomic
/// read of 2 bytes.
/// - if `size_of::<T>() == 4`, `ptr` must be valid for a relaxed atomic read of 4 bytes.
unsafe fn atomic_load_aligned<T>(ptr: *mut u32) -> u32 {
if mem::size_of::<T>() == 4 {
// SAFETY: As `T` has a size of 4, the caller garantees this is sound.
unsafe { AtomicU32::from_ptr(ptr).load(Ordering::Relaxed) }
} else {
// SAFETY:
// As all 4 bytes pointed to by `ptr` might not be dereferenceable due to being out of
// bounds when doing atomic operations on a `u8`/`i8`/`u16`/`i16`, inline ASM is used to
// avoid causing undefined behaviour. However, as `ptr` is 4-aligned and at least 1 byte of
// `ptr` is dereferencable, the load won't cause a segfault as the page size is always
// larger than 4 bytes.
// The `ldr` instruction does not touch the stack or flags, or write to memory, so
// `nostack`, `preserves_flags` and `readonly` are sound. The caller garantees that `ptr` is
// 4-aligned, as required by `ldr`.
unsafe {
let res: u32;
arch::asm!(
"ldr {res}, [{ptr}]",
ptr = in(reg) ptr,
res = lateout(reg) res,
options(nostack, preserves_flags, readonly)
);
res
}
}
}
// Generic atomic read-modify-write operation
unsafe fn atomic_rmw<T, F: Fn(u32) -> u32, G: Fn(u32, u32) -> u32>(ptr: *mut T, f: F, g: G) -> u32 {
let aligned_ptr = align_ptr(ptr);
let (shift, mask) = get_shift_mask(ptr);
loop {
let curval_aligned = atomic_load_aligned::<T>(aligned_ptr);
let curval = extract_aligned(curval_aligned, shift, mask);
let newval = f(curval);
let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
return g(curval, newval);
}
}
}
// Generic atomic compare-exchange operation
unsafe fn atomic_cmpxchg<T>(ptr: *mut T, oldval: u32, newval: u32) -> u32 {
let aligned_ptr = align_ptr(ptr);
let (shift, mask) = get_shift_mask(ptr);
loop {
let curval_aligned = atomic_load_aligned::<T>(aligned_ptr);
let curval = extract_aligned(curval_aligned, shift, mask);
if curval != oldval {
return curval;
}
let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
return oldval;
}
}
}
macro_rules! atomic_rmw {
($name:ident, $ty:ty, $op:expr, $fetch:expr) => {
intrinsics! {
pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty {
atomic_rmw(ptr, |x| $op(x as $ty, val) as u32, |old, new| $fetch(old, new)) as $ty
}
}
};
(@old $name:ident, $ty:ty, $op:expr) => {
atomic_rmw!($name, $ty, $op, |old, _| old);
};
(@new $name:ident, $ty:ty, $op:expr) => {
atomic_rmw!($name, $ty, $op, |_, new| new);
};
}
macro_rules! atomic_cmpxchg {
($name:ident, $ty:ty) => {
intrinsics! {
pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty {
atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty
}
}
};
}
atomic_rmw!(@old __sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b));
atomic_rmw!(@old __sync_fetch_and_add_2, u16, |a: u16, b: u16| a
.wrapping_add(b));
atomic_rmw!(@old __sync_fetch_and_add_4, u32, |a: u32, b: u32| a
.wrapping_add(b));
atomic_rmw!(@new __sync_add_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_add(b));
atomic_rmw!(@new __sync_add_and_fetch_2, u16, |a: u16, b: u16| a
.wrapping_add(b));
atomic_rmw!(@new __sync_add_and_fetch_4, u32, |a: u32, b: u32| a
.wrapping_add(b));
atomic_rmw!(@old __sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
atomic_rmw!(@old __sync_fetch_and_sub_2, u16, |a: u16, b: u16| a
.wrapping_sub(b));
atomic_rmw!(@old __sync_fetch_and_sub_4, u32, |a: u32, b: u32| a
.wrapping_sub(b));
atomic_rmw!(@new __sync_sub_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
atomic_rmw!(@new __sync_sub_and_fetch_2, u16, |a: u16, b: u16| a
.wrapping_sub(b));
atomic_rmw!(@new __sync_sub_and_fetch_4, u32, |a: u32, b: u32| a
.wrapping_sub(b));
atomic_rmw!(@old __sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b);
atomic_rmw!(@old __sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b);
atomic_rmw!(@old __sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b);
atomic_rmw!(@new __sync_and_and_fetch_1, u8, |a: u8, b: u8| a & b);
atomic_rmw!(@new __sync_and_and_fetch_2, u16, |a: u16, b: u16| a & b);
atomic_rmw!(@new __sync_and_and_fetch_4, u32, |a: u32, b: u32| a & b);
atomic_rmw!(@old __sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b);
atomic_rmw!(@old __sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b);
atomic_rmw!(@old __sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b);
atomic_rmw!(@new __sync_or_and_fetch_1, u8, |a: u8, b: u8| a | b);
atomic_rmw!(@new __sync_or_and_fetch_2, u16, |a: u16, b: u16| a | b);
atomic_rmw!(@new __sync_or_and_fetch_4, u32, |a: u32, b: u32| a | b);
atomic_rmw!(@old __sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b);
atomic_rmw!(@old __sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b);
atomic_rmw!(@old __sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b);
atomic_rmw!(@new __sync_xor_and_fetch_1, u8, |a: u8, b: u8| a ^ b);
atomic_rmw!(@new __sync_xor_and_fetch_2, u16, |a: u16, b: u16| a ^ b);
atomic_rmw!(@new __sync_xor_and_fetch_4, u32, |a: u32, b: u32| a ^ b);
atomic_rmw!(@old __sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b));
atomic_rmw!(@old __sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b));
atomic_rmw!(@old __sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b));
atomic_rmw!(@new __sync_nand_and_fetch_1, u8, |a: u8, b: u8| !(a & b));
atomic_rmw!(@new __sync_nand_and_fetch_2, u16, |a: u16, b: u16| !(a & b));
atomic_rmw!(@new __sync_nand_and_fetch_4, u32, |a: u32, b: u32| !(a & b));
atomic_rmw!(@old __sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b {
a
} else {
b
});
atomic_rmw!(@old __sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b {
a
} else {
b
});
atomic_rmw!(@old __sync_lock_test_and_set_1, u8, |_: u8, b: u8| b);
atomic_rmw!(@old __sync_lock_test_and_set_2, u16, |_: u16, b: u16| b);
atomic_rmw!(@old __sync_lock_test_and_set_4, u32, |_: u32, b: u32| b);
atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8);
atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16);
atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32);
intrinsics! {
pub unsafe extern "C" fn __sync_synchronize() {
__kuser_memory_barrier();
}
}

View file

@ -0,0 +1,23 @@
intrinsics! {
pub unsafe extern "C" fn abort() -> ! {
// On AVRs, an architecture that doesn't support traps, unreachable code
// paths get lowered into calls to `abort`:
//
// https://github.com/llvm/llvm-project/blob/cbe8f3ad7621e402b050e768f400ff0d19c3aedd/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp#L4462
//
// When control gets here, it means that either core::intrinsics::abort()
// was called or an undefined bebavior has occurred, so there's not that
// much we can do to recover - we can't `panic!()`, because for all we
// know the environment is gone now, so panicking might end up with us
// getting back to this very function.
//
// So let's do the next best thing, loop.
//
// Alternatively we could (try to) restart the program, but since
// undefined behavior is undefined, there's really no obligation for us
// to do anything here - for all we care, we could just set the chip on
// fire; but that'd be bad for the environment.
loop {}
}
}

View file

@ -0,0 +1,209 @@
use crate::float::Float;
use crate::int::{CastInto, Int, MinInt};
/// Returns `a + b`
fn add<F: Float>(a: F, b: F) -> F
where
u32: CastInto<F::Int>,
F::Int: CastInto<u32>,
i32: CastInto<F::Int>,
F::Int: CastInto<i32>,
{
let one = F::Int::ONE;
let zero = F::Int::ZERO;
let bits = F::BITS.cast();
let significand_bits = F::SIG_BITS;
let max_exponent = F::EXP_SAT;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIG_MASK;
let sign_bit = F::SIGN_MASK as F::Int;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXP_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let mut a_rep = a.to_bits();
let mut b_rep = b.to_bits();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// Detect if a or b is zero, infinity, or NaN.
if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one {
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_bits(a_abs | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_bits(b_abs | quiet_bit);
}
if a_abs == inf_rep {
// +/-infinity + -/+infinity = qNaN
if (a.to_bits() ^ b.to_bits()) == sign_bit {
return F::from_bits(qnan_rep);
} else {
// +/-infinity + anything remaining = +/- infinity
return a;
}
}
// anything remaining + +/-infinity = +/-infinity
if b_abs == inf_rep {
return b;
}
// zero + anything = anything
if a_abs == MinInt::ZERO {
// but we need to get the sign right for zero + zero
if b_abs == MinInt::ZERO {
return F::from_bits(a.to_bits() & b.to_bits());
} else {
return b;
}
}
// anything + zero = anything
if b_abs == MinInt::ZERO {
return a;
}
}
// Swap a and b if necessary so that a has the larger absolute value.
if b_abs > a_abs {
// Don't use mem::swap because it may generate references to memcpy in unoptimized code.
let tmp = a_rep;
a_rep = b_rep;
b_rep = tmp;
}
// Extract the exponent and significand from the (possibly swapped) a and b.
let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits).cast();
let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits).cast();
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
// normalize any denormals, and adjust the exponent accordingly.
if a_exponent == 0 {
let (exponent, significand) = F::normalize(a_significand);
a_exponent = exponent;
a_significand = significand;
}
if b_exponent == 0 {
let (exponent, significand) = F::normalize(b_significand);
b_exponent = exponent;
b_significand = significand;
}
// The sign of the result is the sign of the larger operand, a. If they
// have opposite signs, we are performing a subtraction; otherwise addition.
let result_sign = a_rep & sign_bit;
let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
// Shift the significands to give us round, guard and sticky, and or in the
// implicit significand bit. (If we fell through from the denormal path it
// was already set by normalize(), but setting it twice won't hurt
// anything.)
a_significand = (a_significand | implicit_bit) << 3;
b_significand = (b_significand | implicit_bit) << 3;
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
let align = a_exponent.wrapping_sub(b_exponent).cast();
if align != MinInt::ZERO {
if align < bits {
let sticky =
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO);
b_significand = (b_significand >> align.cast()) | sticky;
} else {
b_significand = one; // sticky; b is known to be non-zero.
}
}
if subtraction {
a_significand = a_significand.wrapping_sub(b_significand);
// If a == -b, return +zero.
if a_significand == MinInt::ZERO {
return F::from_bits(MinInt::ZERO);
}
// If partial cancellation occured, we need to left-shift the result
// and adjust the exponent:
if a_significand < implicit_bit << 3 {
let shift =
a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32;
a_significand <<= shift;
a_exponent -= shift;
}
} else {
// addition
a_significand += b_significand;
// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if a_significand & (implicit_bit << 4) != MinInt::ZERO {
let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO);
a_significand = (a_significand >> 1) | sticky;
a_exponent += 1;
}
}
// If we have overflowed the type, return +/- infinity:
if a_exponent >= max_exponent as i32 {
return F::from_bits(inf_rep | result_sign);
}
if a_exponent <= 0 {
// Result is denormal before rounding; the exponent is zero and we
// need to shift the significand.
let shift = (1 - a_exponent).cast();
let sticky =
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
a_significand = (a_significand >> shift.cast()) | sticky;
a_exponent = 0;
}
// Low three bits are round, guard, and sticky.
let a_significand_i32: i32 = a_significand.cast();
let round_guard_sticky: i32 = a_significand_i32 & 0x7;
// Shift the significand into place, and mask off the implicit bit.
let mut result = (a_significand >> 3) & significand_mask;
// Insert the exponent and sign.
result |= a_exponent.cast() << significand_bits;
result |= result_sign;
// Final rounding. The result may overflow to infinity, but that is the
// correct result in that case.
if round_guard_sticky > 0x4 {
result += one;
}
if round_guard_sticky == 0x4 {
result += result & one;
}
F::from_bits(result)
}
intrinsics! {
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_fadd]
pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 {
add(a, b)
}
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_dadd]
pub extern "C" fn __adddf3(a: f64, b: f64) -> f64 {
add(a, b)
}
#[ppc_alias = __addkf3]
#[cfg(f128_enabled)]
pub extern "C" fn __addtf3(a: f128, b: f128) -> f128 {
add(a, b)
}
}

View file

@ -0,0 +1,248 @@
#![allow(unreachable_code)]
use crate::float::Float;
use crate::int::MinInt;
// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L22
#[cfg(target_arch = "avr")]
pub type CmpResult = i8;
// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L25
#[cfg(not(target_arch = "avr"))]
pub type CmpResult = i32;
#[derive(Clone, Copy)]
enum Result {
Less,
Equal,
Greater,
Unordered,
}
impl Result {
fn to_le_abi(self) -> CmpResult {
match self {
Result::Less => -1,
Result::Equal => 0,
Result::Greater => 1,
Result::Unordered => 1,
}
}
fn to_ge_abi(self) -> CmpResult {
match self {
Result::Less => -1,
Result::Equal => 0,
Result::Greater => 1,
Result::Unordered => -1,
}
}
}
fn cmp<F: Float>(a: F, b: F) -> Result {
let one = F::Int::ONE;
let zero = F::Int::ZERO;
let szero = F::SignedInt::ZERO;
let sign_bit = F::SIGN_MASK as F::Int;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXP_MASK;
let inf_rep = exponent_mask;
let a_rep = a.to_bits();
let b_rep = b.to_bits();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// If either a or b is NaN, they are unordered.
if a_abs > inf_rep || b_abs > inf_rep {
return Result::Unordered;
}
// If a and b are both zeros, they are equal.
if a_abs | b_abs == zero {
return Result::Equal;
}
let a_srep = a.to_bits_signed();
let b_srep = b.to_bits_signed();
// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a fp_ting-point compare.
if a_srep & b_srep >= szero {
if a_srep < b_srep {
Result::Less
} else if a_srep == b_srep {
Result::Equal
} else {
Result::Greater
}
// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
} else if a_srep > b_srep {
Result::Less
} else if a_srep == b_srep {
Result::Equal
} else {
Result::Greater
}
}
fn unord<F: Float>(a: F, b: F) -> bool {
let one = F::Int::ONE;
let sign_bit = F::SIGN_MASK as F::Int;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXP_MASK;
let inf_rep = exponent_mask;
let a_rep = a.to_bits();
let b_rep = b.to_bits();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
a_abs > inf_rep || b_abs > inf_rep
}
intrinsics! {
pub extern "C" fn __lesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __gesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
cmp(a, b).to_ge_abi()
}
#[arm_aeabi_alias = __aeabi_fcmpun]
pub extern "C" fn __unordsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
unord(a, b) as crate::float::cmp::CmpResult
}
pub extern "C" fn __eqsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __ltsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __nesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __gtsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
cmp(a, b).to_ge_abi()
}
pub extern "C" fn __ledf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __gedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
cmp(a, b).to_ge_abi()
}
#[arm_aeabi_alias = __aeabi_dcmpun]
pub extern "C" fn __unorddf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
unord(a, b) as crate::float::cmp::CmpResult
}
pub extern "C" fn __eqdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __ltdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __nedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __gtdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
cmp(a, b).to_ge_abi()
}
}
#[cfg(f128_enabled)]
intrinsics! {
#[ppc_alias = __lekf2]
pub extern "C" fn __letf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
#[ppc_alias = __gekf2]
pub extern "C" fn __getf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
cmp(a, b).to_ge_abi()
}
#[ppc_alias = __unordkf2]
pub extern "C" fn __unordtf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
unord(a, b) as crate::float::cmp::CmpResult
}
#[ppc_alias = __eqkf2]
pub extern "C" fn __eqtf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
#[ppc_alias = __ltkf2]
pub extern "C" fn __lttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
#[ppc_alias = __nekf2]
pub extern "C" fn __netf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
#[ppc_alias = __gtkf2]
pub extern "C" fn __gttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
cmp(a, b).to_ge_abi()
}
}
#[cfg(target_arch = "arm")]
intrinsics! {
pub extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 {
(__lesf2(a, b) <= 0) as i32
}
pub extern "aapcs" fn __aeabi_fcmpge(a: f32, b: f32) -> i32 {
(__gesf2(a, b) >= 0) as i32
}
pub extern "aapcs" fn __aeabi_fcmpeq(a: f32, b: f32) -> i32 {
(__eqsf2(a, b) == 0) as i32
}
pub extern "aapcs" fn __aeabi_fcmplt(a: f32, b: f32) -> i32 {
(__ltsf2(a, b) < 0) as i32
}
pub extern "aapcs" fn __aeabi_fcmpgt(a: f32, b: f32) -> i32 {
(__gtsf2(a, b) > 0) as i32
}
pub extern "aapcs" fn __aeabi_dcmple(a: f64, b: f64) -> i32 {
(__ledf2(a, b) <= 0) as i32
}
pub extern "aapcs" fn __aeabi_dcmpge(a: f64, b: f64) -> i32 {
(__gedf2(a, b) >= 0) as i32
}
pub extern "aapcs" fn __aeabi_dcmpeq(a: f64, b: f64) -> i32 {
(__eqdf2(a, b) == 0) as i32
}
pub extern "aapcs" fn __aeabi_dcmplt(a: f64, b: f64) -> i32 {
(__ltdf2(a, b) < 0) as i32
}
pub extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 {
(__gtdf2(a, b) > 0) as i32
}
}

View file

@ -0,0 +1,489 @@
use core::ops::Neg;
use super::Float;
use crate::int::{CastFrom, CastInto, Int, MinInt};
/// Conversions from integers to floats.
///
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
/// mantissa _with the implicit bit set_!
/// - Figure out if rounding needs to occur by classifying the bits that are to be truncated. Some
/// patterns are used to simplify this. Adjust the mantissa with the result if needed.
/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros). Subtract one.
/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
/// from the exponent (above) accounts for the explicit bit being set in the mantissa.
///
/// # Terminology
///
/// - `i`: the original integer
/// - `i_m`: the integer, shifted fully left (no leading zeros)
/// - `n`: number of leading zeroes
/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit.
/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set.
/// - `adj`: the bits that will be truncated, possibly compressed in some way.
/// - `m`: the resulting mantissa. Implicit bit is usually set.
mod int_to_float {
use super::*;
/// Calculate the exponent from the number of leading zeros.
///
/// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
/// bit set can be added back later.
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
F::Int::cast_from(F::EXP_BIAS - 1 + I::BITS - n)
}
/// Adjust a mantissa with dropped bits to perform correct rounding.
///
/// The dropped bits should be exactly the bits that get truncated (left-aligned), but they
/// can be combined or compressed in some way that simplifies operations.
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
// Branchlessly extract a `1` if rounding up should happen, 0 otherwise
// This accounts for rounding to even.
let adj = (dropped_bits - ((dropped_bits >> (F::BITS - 1)) & !m_base)) >> (F::BITS - 1);
// Add one when we need to round up. Break ties to even.
m_base + adj
}
/// Shift the exponent to its position and add the mantissa.
///
/// If the mantissa has the implicit bit set, the exponent should be one less than its actual
/// value to cancel it out.
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
// + rather than | so the mantissa can overflow into the exponent
(e << F::SIG_BITS) + m
}
/// Shift distance from a left-aligned integer to a smaller float.
fn shift_f_lt_i<I: Int, F: Float>() -> u32 {
(I::BITS - F::BITS) + F::EXP_BITS
}
/// Shift distance from an integer with `n` leading zeros to a smaller float.
fn shift_f_gt_i<I: Int, F: Float>(n: u32) -> u32 {
F::SIG_BITS - I::BITS + 1 + n
}
/// Perform a signed operation as unsigned, then add the sign back.
pub fn signed<I, F, Conv>(i: I, conv: Conv) -> F
where
F: Float,
I: Int,
F::Int: CastFrom<I>,
Conv: Fn(I::UnsignedInt) -> F::Int,
{
let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
F::from_bits(conv(i.unsigned_abs()) | sign_bit)
}
pub fn u32_to_f32_bits(i: u32) -> u32 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
// Mantissa with implicit bit set (significant bits)
let m_base = (i << n) >> f32::EXP_BITS;
// Bits that will be dropped (insignificant bits)
let adj = (i << n) << (f32::SIG_BITS + 1);
let m = m_adj::<f32>(m_base, adj);
let e = exp::<u32, f32>(n) - 1;
repr::<f32>(e, m)
}
pub fn u32_to_f64_bits(i: u32) -> u64 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
// Mantissa with implicit bit set
let m = (i as u64) << shift_f_gt_i::<u32, f64>(n);
let e = exp::<u32, f64>(n) - 1;
repr::<f64>(e, m)
}
#[cfg(f128_enabled)]
pub fn u32_to_f128_bits(i: u32) -> u128 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
// Shift into mantissa position that is correct for the type, but shifted into the lower
// 64 bits over so can can avoid 128-bit math.
let m = (i as u64) << (shift_f_gt_i::<u32, f128>(n) - 64);
let e = exp::<u32, f128>(n) as u64 - 1;
// High 64 bits of f128 representation.
let h = (e << (f128::SIG_BITS - 64)) + m;
// Shift back to the high bits, the rest of the mantissa will always be 0.
(h as u128) << 64
}
pub fn u64_to_f32_bits(i: u64) -> u32 {
let n = i.leading_zeros();
let i_m = i.wrapping_shl(n);
// Mantissa with implicit bit set
let m_base: u32 = (i_m >> shift_f_lt_i::<u64, f32>()) as u32;
// The entire lower half of `i` will be truncated (masked portion), plus the
// next `EXP_BITS` bits.
let adj = ((i_m >> f32::EXP_BITS) | i_m & 0xFFFF) as u32;
let m = m_adj::<f32>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
repr::<f32>(e, m)
}
pub fn u64_to_f64_bits(i: u64) -> u64 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
// Mantissa with implicit bit set
let m_base = (i << n) >> f64::EXP_BITS;
let adj = (i << n) << (f64::SIG_BITS + 1);
let m = m_adj::<f64>(m_base, adj);
let e = exp::<u64, f64>(n) - 1;
repr::<f64>(e, m)
}
#[cfg(f128_enabled)]
pub fn u64_to_f128_bits(i: u64) -> u128 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
// Mantissa with implicit bit set
let m = (i as u128) << shift_f_gt_i::<u64, f128>(n);
let e = exp::<u64, f128>(n) - 1;
repr::<f128>(e, m)
}
pub fn u128_to_f32_bits(i: u128) -> u32 {
let n = i.leading_zeros();
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
let m_base: u32 = (i_m >> shift_f_lt_i::<u128, f32>()) as u32;
// Within the upper `F::BITS`, everything except for the signifcand
// gets truncated
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast();
// The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
// check if it is nonzero.
let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into();
let adj = d1 | d2;
// Mantissa with implicit bit set
let m = m_adj::<f32>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) - 1 };
repr::<f32>(e, m)
}
pub fn u128_to_f64_bits(i: u128) -> u64 {
let n = i.leading_zeros();
let i_m = i.wrapping_shl(n);
// Mantissa with implicit bit set
let m_base: u64 = (i_m >> shift_f_lt_i::<u128, f64>()) as u64;
// The entire lower half of `i` will be truncated (masked portion), plus the
// next `EXP_BITS` bits.
let adj = ((i_m >> f64::EXP_BITS) | i_m & 0xFFFF_FFFF) as u64;
let m = m_adj::<f64>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
repr::<f64>(e, m)
}
#[cfg(f128_enabled)]
pub fn u128_to_f128_bits(i: u128) -> u128 {
if i == 0 {
return 0;
}
let n = i.leading_zeros();
// Mantissa with implicit bit set
let m_base = (i << n) >> f128::EXP_BITS;
let adj = (i << n) << (f128::SIG_BITS + 1);
let m = m_adj::<f128>(m_base, adj);
let e = exp::<u128, f128>(n) - 1;
repr::<f128>(e, m)
}
}
// Conversions from unsigned integers to floats.
intrinsics! {
#[arm_aeabi_alias = __aeabi_ui2f]
pub extern "C" fn __floatunsisf(i: u32) -> f32 {
f32::from_bits(int_to_float::u32_to_f32_bits(i))
}
#[arm_aeabi_alias = __aeabi_ui2d]
pub extern "C" fn __floatunsidf(i: u32) -> f64 {
f64::from_bits(int_to_float::u32_to_f64_bits(i))
}
#[arm_aeabi_alias = __aeabi_ul2f]
pub extern "C" fn __floatundisf(i: u64) -> f32 {
f32::from_bits(int_to_float::u64_to_f32_bits(i))
}
#[arm_aeabi_alias = __aeabi_ul2d]
pub extern "C" fn __floatundidf(i: u64) -> f64 {
f64::from_bits(int_to_float::u64_to_f64_bits(i))
}
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
pub extern "C" fn __floatuntisf(i: u128) -> f32 {
f32::from_bits(int_to_float::u128_to_f32_bits(i))
}
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
pub extern "C" fn __floatuntidf(i: u128) -> f64 {
f64::from_bits(int_to_float::u128_to_f64_bits(i))
}
#[ppc_alias = __floatunsikf]
#[cfg(f128_enabled)]
pub extern "C" fn __floatunsitf(i: u32) -> f128 {
f128::from_bits(int_to_float::u32_to_f128_bits(i))
}
#[ppc_alias = __floatundikf]
#[cfg(f128_enabled)]
pub extern "C" fn __floatunditf(i: u64) -> f128 {
f128::from_bits(int_to_float::u64_to_f128_bits(i))
}
#[ppc_alias = __floatuntikf]
#[cfg(f128_enabled)]
pub extern "C" fn __floatuntitf(i: u128) -> f128 {
f128::from_bits(int_to_float::u128_to_f128_bits(i))
}
}
// Conversions from signed integers to floats.
intrinsics! {
#[arm_aeabi_alias = __aeabi_i2f]
pub extern "C" fn __floatsisf(i: i32) -> f32 {
int_to_float::signed(i, int_to_float::u32_to_f32_bits)
}
#[arm_aeabi_alias = __aeabi_i2d]
pub extern "C" fn __floatsidf(i: i32) -> f64 {
int_to_float::signed(i, int_to_float::u32_to_f64_bits)
}
#[arm_aeabi_alias = __aeabi_l2f]
pub extern "C" fn __floatdisf(i: i64) -> f32 {
int_to_float::signed(i, int_to_float::u64_to_f32_bits)
}
#[arm_aeabi_alias = __aeabi_l2d]
pub extern "C" fn __floatdidf(i: i64) -> f64 {
int_to_float::signed(i, int_to_float::u64_to_f64_bits)
}
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
pub extern "C" fn __floattisf(i: i128) -> f32 {
int_to_float::signed(i, int_to_float::u128_to_f32_bits)
}
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
pub extern "C" fn __floattidf(i: i128) -> f64 {
int_to_float::signed(i, int_to_float::u128_to_f64_bits)
}
#[ppc_alias = __floatsikf]
#[cfg(f128_enabled)]
pub extern "C" fn __floatsitf(i: i32) -> f128 {
int_to_float::signed(i, int_to_float::u32_to_f128_bits)
}
#[ppc_alias = __floatdikf]
#[cfg(f128_enabled)]
pub extern "C" fn __floatditf(i: i64) -> f128 {
int_to_float::signed(i, int_to_float::u64_to_f128_bits)
}
#[ppc_alias = __floattikf]
#[cfg(f128_enabled)]
pub extern "C" fn __floattitf(i: i128) -> f128 {
int_to_float::signed(i, int_to_float::u128_to_f128_bits)
}
}
/// Generic float to unsigned int conversions.
fn float_to_unsigned_int<F, U>(f: F) -> U
where
F: Float,
U: Int<UnsignedInt = U>,
F::Int: CastInto<U>,
F::Int: CastFrom<u32>,
F::Int: CastInto<U::UnsignedInt>,
u32: CastFrom<F::Int>,
{
float_to_int_inner::<F, U, _, _>(f.to_bits(), |i: U| i, || U::MAX)
}
/// Generic float to signed int conversions.
fn float_to_signed_int<F, I>(f: F) -> I
where
F: Float,
I: Int + Neg<Output = I>,
I::UnsignedInt: Int,
F::Int: CastInto<I::UnsignedInt>,
F::Int: CastFrom<u32>,
u32: CastFrom<F::Int>,
{
float_to_int_inner::<F, I, _, _>(
f.to_bits() & !F::SIGN_MASK,
|i: I| if f.is_sign_negative() { -i } else { i },
|| if f.is_sign_negative() { I::MIN } else { I::MAX },
)
}
/// Float to int conversions, generic for both signed and unsigned.
///
/// Parameters:
/// - `fbits`: `abg(f)` bitcasted to an integer.
/// - `map_inbounds`: apply this transformation to integers that are within range (add the sign back).
/// - `out_of_bounds`: return value when out of range for `I`.
fn float_to_int_inner<F, I, FnFoo, FnOob>(
fbits: F::Int,
map_inbounds: FnFoo,
out_of_bounds: FnOob,
) -> I
where
F: Float,
I: Int,
FnFoo: FnOnce(I) -> I,
FnOob: FnOnce() -> I,
I::UnsignedInt: Int,
F::Int: CastInto<I::UnsignedInt>,
F::Int: CastFrom<u32>,
u32: CastFrom<F::Int>,
{
let int_max_exp = F::EXP_BIAS + I::MAX.ilog2() + 1;
let foobar = F::EXP_BIAS + I::UnsignedInt::BITS - 1;
if fbits < F::ONE.to_bits() {
// < 0 gets rounded to 0
I::ZERO
} else if fbits < F::Int::cast_from(int_max_exp) << F::SIG_BITS {
// >= 1, < integer max
let m_base = if I::UnsignedInt::BITS >= F::Int::BITS {
I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1)
} else {
I::UnsignedInt::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1))
};
// Set the implicit 1-bit.
let m: I::UnsignedInt = (I::UnsignedInt::ONE << (I::BITS - 1)) | m_base;
// Shift based on the exponent and bias.
let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIG_BITS);
let unsigned = m >> s;
map_inbounds(I::from_unsigned(unsigned))
} else if fbits <= F::EXP_MASK {
// >= max (incl. inf)
out_of_bounds()
} else {
I::ZERO
}
}
// Conversions from floats to unsigned integers.
intrinsics! {
#[arm_aeabi_alias = __aeabi_f2uiz]
pub extern "C" fn __fixunssfsi(f: f32) -> u32 {
float_to_unsigned_int(f)
}
#[arm_aeabi_alias = __aeabi_f2ulz]
pub extern "C" fn __fixunssfdi(f: f32) -> u64 {
float_to_unsigned_int(f)
}
pub extern "C" fn __fixunssfti(f: f32) -> u128 {
float_to_unsigned_int(f)
}
#[arm_aeabi_alias = __aeabi_d2uiz]
pub extern "C" fn __fixunsdfsi(f: f64) -> u32 {
float_to_unsigned_int(f)
}
#[arm_aeabi_alias = __aeabi_d2ulz]
pub extern "C" fn __fixunsdfdi(f: f64) -> u64 {
float_to_unsigned_int(f)
}
pub extern "C" fn __fixunsdfti(f: f64) -> u128 {
float_to_unsigned_int(f)
}
#[ppc_alias = __fixunskfsi]
#[cfg(f128_enabled)]
pub extern "C" fn __fixunstfsi(f: f128) -> u32 {
float_to_unsigned_int(f)
}
#[ppc_alias = __fixunskfdi]
#[cfg(f128_enabled)]
pub extern "C" fn __fixunstfdi(f: f128) -> u64 {
float_to_unsigned_int(f)
}
#[ppc_alias = __fixunskfti]
#[cfg(f128_enabled)]
pub extern "C" fn __fixunstfti(f: f128) -> u128 {
float_to_unsigned_int(f)
}
}
// Conversions from floats to signed integers.
intrinsics! {
#[arm_aeabi_alias = __aeabi_f2iz]
pub extern "C" fn __fixsfsi(f: f32) -> i32 {
float_to_signed_int(f)
}
#[arm_aeabi_alias = __aeabi_f2lz]
pub extern "C" fn __fixsfdi(f: f32) -> i64 {
float_to_signed_int(f)
}
pub extern "C" fn __fixsfti(f: f32) -> i128 {
float_to_signed_int(f)
}
#[arm_aeabi_alias = __aeabi_d2iz]
pub extern "C" fn __fixdfsi(f: f64) -> i32 {
float_to_signed_int(f)
}
#[arm_aeabi_alias = __aeabi_d2lz]
pub extern "C" fn __fixdfdi(f: f64) -> i64 {
float_to_signed_int(f)
}
pub extern "C" fn __fixdfti(f: f64) -> i128 {
float_to_signed_int(f)
}
#[ppc_alias = __fixkfsi]
#[cfg(f128_enabled)]
pub extern "C" fn __fixtfsi(f: f128) -> i32 {
float_to_signed_int(f)
}
#[ppc_alias = __fixkfdi]
#[cfg(f128_enabled)]
pub extern "C" fn __fixtfdi(f: f128) -> i64 {
float_to_signed_int(f)
}
#[ppc_alias = __fixkfti]
#[cfg(f128_enabled)]
pub extern "C" fn __fixtfti(f: f128) -> i128 {
float_to_signed_int(f)
}
}

View file

@ -0,0 +1,635 @@
//! Floating point division routines.
//!
//! This module documentation gives an overview of the method used. More documentation is inline.
//!
//! # Relevant notation
//!
//! - `m_a`: the mantissa of `a`, in base 2
//! - `p_a`: the exponent of `a`, in base 2. I.e. `a = m_a * 2^p_a`
//! - `uqN` (e.g. `uq1`): this refers to Q notation for fixed-point numbers. UQ1.31 is an unsigned
//! fixed-point number with 1 integral bit, and 31 decimal bits. A `uqN` variable of type `uM`
//! will have N bits of integer and M-N bits of fraction.
//! - `hw`: half width, i.e. for `f64` this will be a `u32`.
//! - `x` is the best estimate of `1/m_b`
//!
//! # Method Overview
//!
//! Division routines must solve for `a / b`, which is `res = m_a*2^p_a / m_b*2^p_b`. The basic
//! process is as follows:
//!
//! - Rearange the exponent and significand to simplify the operations:
//! `res = (m_a / m_b) * 2^{p_a - p_b}`.
//! - Check for early exits (infinity, zero, etc).
//! - If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent.
//! - Set the implicit bit so math is correct.
//! - Shift mantissa significant digits (with implicit bit) fully left such that fixed-point UQ1
//! or UQ0 numbers can be used for mantissa math. These will have greater precision than the
//! actual mantissa, which is important for correct rounding.
//! - Calculate the reciprocal of `m_b`, `x`.
//! - Use the reciprocal to multiply rather than divide: `res = m_a * x_b * 2^{p_a - p_b}`.
//! - Reapply rounding.
//!
//! # Reciprocal calculation
//!
//! Calculating the reciprocal is the most complicated part of this process. It uses the
//! [Newton-Raphson method], which picks an initial estimation (of the reciprocal) and performs
//! a number of iterations to increase its precision.
//!
//! In general, Newton's method takes the following form:
//!
//! ```text
//! `x_n` is a guess or the result of a previous iteration. Increasing `n` converges to the
//! desired result.
//!
//! The result approaches a zero of `f(x)` by applying a correction to the previous gues.
//!
//! x_{n+1} = x_n - f(x_n) / f'(x_n)
//! ```
//!
//! Applying this to find the reciprocal:
//!
//! ```text
//! 1 / x = b
//!
//! Rearrange so we can solve by finding a zero
//! 0 = (1 / x) - b = f(x)
//!
//! f'(x) = -x^{-2}
//!
//! x_{n+1} = 2*x_n - b*x_n^2
//! ```
//!
//! This is a process that can be repeated to calculate the reciprocal with enough precision to
//! achieve a correctly rounded result for the overall division operation. The maximum required
//! number of iterations is known since precision doubles with each iteration.
//!
//! # Half-width operations
//!
//! Calculating the reciprocal requires widening multiplication and performing arithmetic on the
//! results, meaning that emulated integer arithmetic on `u128` (for `f64`) and `u256` (for `f128`)
//! gets used instead of native math.
//!
//! To make this more efficient, all but the final operation can be computed using half-width
//! integers. For example, rather than computing four iterations using 128-bit integers for `f64`,
//! we can instead perform three iterations using native 64-bit integers and only one final
//! iteration using the full 128 bits.
//!
//! This works because of precision doubling. Some leeway is allowed here because the fixed-point
//! number has more bits than the final mantissa will.
//!
//! [Newton-Raphson method]: https://en.wikipedia.org/wiki/Newton%27s_method
use core::mem::size_of;
use core::ops;
use super::HalfRep;
use crate::float::Float;
use crate::int::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
fn div<F: Float>(a: F, b: F) -> F
where
F::Int: CastInto<i32>,
F::Int: From<HalfRep<F>>,
F::Int: From<u8>,
F::Int: HInt + DInt,
<F::Int as HInt>::D: ops::Shr<u32, Output = <F::Int as HInt>::D>,
F::Int: From<u32>,
u16: CastInto<F::Int>,
i32: CastInto<F::Int>,
u32: CastInto<F::Int>,
u128: CastInto<HalfRep<F>>,
{
let one = F::Int::ONE;
let zero = F::Int::ZERO;
let one_hw = HalfRep::<F>::ONE;
let zero_hw = HalfRep::<F>::ZERO;
let hw = F::BITS / 2;
let lo_mask = F::Int::MAX >> hw;
let significand_bits = F::SIG_BITS;
// Saturated exponent, representing infinity
let exponent_sat: F::Int = F::EXP_SAT.cast();
let exponent_bias = F::EXP_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIG_MASK;
let sign_bit = F::SIGN_MASK;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXP_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let (mut half_iterations, full_iterations) = get_iterations::<F>();
let recip_precision = reciprocal_precision::<F>();
if F::BITS == 128 {
// FIXME(tgross35): f128 seems to require one more half iteration than expected
half_iterations += 1;
}
let a_rep = a.to_bits();
let b_rep = b.to_bits();
// Exponent numeric representationm not accounting for bias
let a_exponent = (a_rep >> significand_bits) & exponent_sat;
let b_exponent = (b_rep >> significand_bits) & exponent_sat;
let quotient_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
// The exponent of our final result in its encoded form
let mut res_exponent: i32 =
i32::cast_from(a_exponent) - i32::cast_from(b_exponent) + (exponent_bias as i32);
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (exponent_sat - one)
|| b_exponent.wrapping_sub(one) >= (exponent_sat - one)
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN / anything = qNaN
if a_abs > inf_rep {
return F::from_bits(a_rep | quiet_bit);
}
// anything / NaN = qNaN
if b_abs > inf_rep {
return F::from_bits(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs == inf_rep {
// infinity / infinity = NaN
return F::from_bits(qnan_rep);
} else {
// infinity / anything else = +/- infinity
return F::from_bits(a_abs | quotient_sign);
}
}
// anything else / infinity = +/- 0
if b_abs == inf_rep {
return F::from_bits(quotient_sign);
}
if a_abs == zero {
if b_abs == zero {
// zero / zero = NaN
return F::from_bits(qnan_rep);
} else {
// zero / anything else = +/- zero
return F::from_bits(quotient_sign);
}
}
// anything else / zero = +/- infinity
if b_abs == zero {
return F::from_bits(inf_rep | quotient_sign);
}
// a is denormal. Renormalize it and set the scale to include the necessary exponent
// adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
res_exponent += exponent;
a_significand = significand;
}
// b is denormal. Renormalize it and set the scale to include the necessary exponent
// adjustment.
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
res_exponent -= exponent;
b_significand = significand;
}
}
// Set the implicit significand bit. If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.
a_significand |= implicit_bit;
b_significand |= implicit_bit;
// Transform to a fixed-point representation by shifting the significand to the high bits. We
// know this is in the range [1.0, 2.0] since the implicit bit is set to 1 above.
let b_uq1 = b_significand << (F::BITS - significand_bits - 1);
// Align the significand of b as a UQ1.(n-1) fixed-point number in the range
// [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
// polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
// The max error for this approximation is achieved at endpoints, so
// abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289...,
// which is about 4.5 bits.
// The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571...
//
// Then, refine the reciprocal estimate using a quadratically converging
// Newton-Raphson iteration:
// x_{n+1} = x_n * (2 - x_n * b)
//
// Let b be the original divisor considered "in infinite precision" and
// obtained from IEEE754 representation of function argument (with the
// implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in
// UQ1.(W-1).
//
// Let b_hw be an infinitely precise number obtained from the highest (HW-1)
// bits of divisor significand (with the implicit bit set). Corresponds to
// half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated**
// version of b_UQ1.
//
// Let e_n := x_n - 1/b_hw
// E_n := x_n - 1/b
// abs(E_n) <= abs(e_n) + (1/b_hw - 1/b)
// = abs(e_n) + (b - b_hw) / (b*b_hw)
// <= abs(e_n) + 2 * 2^-HW
//
// rep_t-sized iterations may be slower than the corresponding half-width
// variant depending on the handware and whether single/double/quad precision
// is selected.
//
// NB: Using half-width iterations increases computation errors due to
// rounding, so error estimations have to be computed taking the selected
// mode into account!
let mut x_uq0 = if half_iterations > 0 {
// Starting with (n-1) half-width iterations
let b_uq1_hw: HalfRep<F> = b_uq1.hi();
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
// with W0 being either 16 or 32 and W0 <= HW.
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
let c_hw = c_hw::<F>();
// Check that the top bit is set, i.e. value is within `[1, 2)`.
debug_assert!(b_uq1_hw & (one_hw << (HalfRep::<F>::BITS - 1)) > zero_hw);
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
// so x0 fits to UQ0.HW without wrapping.
let mut x_uq0_hw: HalfRep<F> =
c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
// An e_0 error is comprised of errors due to
// * x0 being an inherently imprecise first approximation of 1/b_hw
// * C_hw being some (irrational) number **truncated** to W0 bits
// Please note that e_0 is calculated against the infinitely precise
// reciprocal of b_hw (that is, **truncated** version of b).
//
// e_0 <= 3/4 - 1/sqrt(2) + 2^-W0
//
// By construction, 1 <= b < 2
// f(x) = x * (2 - b*x) = 2*x - b*x^2
// f'(x) = 2 * (1 - b*x)
//
// On the [0, 1] interval, f(0) = 0,
// then it increses until f(1/b) = 1 / b, maximum on (0, 1),
// then it decreses to f(1) = 2 - b
//
// Let g(x) = x - f(x) = b*x^2 - x.
// On (0, 1/b), g(x) < 0 <=> f(x) > x
// On (1/b, 1], g(x) > 0 <=> f(x) < x
//
// For half-width iterations, b_hw is used instead of b.
for _ in 0..half_iterations {
// corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp
// of corr_UQ1_hw.
// "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1).
// On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
// expected to be strictly positive because b_UQ1_hw has its highest bit set
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
//
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
// obtaining an UQ1.(HW-1) number and proving its highest bit could be
// considered to be 0 to be able to represent it in UQ0.HW.
// From the above analysis of f(x), if corr_UQ1_hw would be represented
// without any intermediate loss of precision (that is, in twice_rep_t)
// x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly
// less otherwise. On the other hand, to obtain [1.]000..., one have to pass
// 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due
// to 1.0 being not representable as UQ0.HW).
// The fact corr_UQ1_hw was virtually round up (due to result of
// multiplication being **first** truncated, then negated - to improve
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
//
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
// any number of iterations, so just subtract 2 from the reciprocal
// approximation after last iteration.
//
// In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW:
// corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1
// = 1 - e_n * b_hw + 2*eps1
// x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2
// = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2
// = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2
// e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2
// = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw
// \------ >0 -------/ \-- >0 ---/
// abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U)
x_uq0_hw = next_guess(x_uq0_hw, b_uq1_hw);
}
// For initial half-width iterations, U = 2^-HW
// Let abs(e_n) <= u_n * U,
// then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U)
// u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2)
//
// Account for possible overflow (see above). For an overflow to occur for the
// first time, for "ideal" corr_UQ1_hw (that is, without intermediate
// truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum
// value representable in UQ0.HW or less by 1. This means that 1/b_hw have to
// be not below that value (see g(x) above), so it is safe to decrement just
// once after the final iteration. On the other hand, an effective value of
// divisor changes after this point (from b_hw to b), so adjust here.
x_uq0_hw = x_uq0_hw.wrapping_sub(one_hw);
// Error estimations for full-precision iterations are calculated just
// as above, but with U := 2^-W and taking extra decrementing into account.
// We need at least one such iteration.
//
// Simulating operations on a twice_rep_t to perform a single final full-width
// iteration. Using ad-hoc multiplication implementations to take advantage
// of particular structure of operands.
let blo: F::Int = b_uq1 & lo_mask;
// x_UQ0 = x_UQ0_hw * 2^HW - 1
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
//
// <--- higher half ---><--- lower half --->
// [x_UQ0_hw * b_UQ1_hw]
// + [ x_UQ0_hw * blo ]
// - [ b_UQ1 ]
// = [ result ][.... discarded ...]
let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw)
+ ((F::Int::from(x_uq0_hw) * blo) >> hw))
.wrapping_sub(one)
.wrapping_neg(); // account for *possible* carry
let lo_corr: F::Int = corr_uq1 & lo_mask;
let hi_corr: F::Int = corr_uq1 >> hw;
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
.wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
// 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
.wrapping_sub(F::Int::from(2u8));
x_uq0 -= one;
// ... and then traditional fixup by 2 should work
// On error estimation:
// abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW
// + (2^-HW + 2^-W))
// abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW
//
// Then like for the half-width iterations:
// With 0 <= eps1, eps2 < 2^-W
// E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b
// abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ]
// abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ]
x_uq0
} else {
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
let c: F::Int = F::Int::from(0x7504F333u32) << (F::BITS - 32);
let mut x_uq0: F::Int = c.wrapping_sub(b_uq1);
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
// x_uq0
for _ in 0..full_iterations {
x_uq0 = next_guess(x_uq0, b_uq1);
}
x_uq0
};
// Finally, account for possible overflow, as explained above.
x_uq0 = x_uq0.wrapping_sub(2.cast());
// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
x_uq0 -= recip_precision.cast();
// Now 1/b - (2*P) * 2^-W < x < 1/b
// FIXME Is x_UQ0 still >= 0.5?
let mut quotient_uq1: F::Int = x_uq0.widen_mul(a_significand << 1).hi();
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
// adjust it to be in [1.0, 2.0) as UQ1.SB.
let mut residual_lo = if quotient_uq1 < (implicit_bit << 1) {
// Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB,
// effectively doubling its value as well as its error estimation.
let residual_lo = (a_significand << (significand_bits + 1))
.wrapping_sub(quotient_uq1.wrapping_mul(b_significand));
res_exponent -= 1;
a_significand <<= 1;
residual_lo
} else {
// Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it
// to UQ1.SB by right shifting by 1. Least significant bit is omitted.
quotient_uq1 >>= 1;
(a_significand << significand_bits).wrapping_sub(quotient_uq1.wrapping_mul(b_significand))
};
// drop mutability
let quotient = quotient_uq1;
// NB: residualLo is calculated above for the normal result case.
// It is re-computed on denormal path that is expected to be not so
// performance-sensitive.
//
// Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB
// Each NextAfter() increments the floating point value by at least 2^-SB
// (more, if exponent was incremented).
// Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint):
// q
// | | * | | | | |
// <---> 2^t
// | | | | | * | |
// q
// To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB.
// (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB
// (8*P) * 2^-W < 0.5 * 2^-SB
// P < 2^(W-4-SB)
// Generally, for at most R NextAfter() to be enough,
// P < (2*R - 1) * 2^(W-4-SB)
// For f32 (0+3): 10 < 32 (OK)
// For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required
// For f64: 220 < 256 (OK)
// For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
//
// If we have overflowed the exponent, return infinity
if res_exponent >= i32::cast_from(exponent_sat) {
return F::from_bits(inf_rep | quotient_sign);
}
// Now, quotient <= the correctly-rounded result
// and may need taking NextAfter() up to 3 times (see error estimates above)
// r = a - b * q
let mut abs_result = if res_exponent > 0 {
let mut ret = quotient & significand_mask;
ret |= F::Int::from(res_exponent as u32) << significand_bits;
residual_lo <<= 1;
ret
} else {
if ((significand_bits as i32) + res_exponent) < 0 {
return F::from_bits(quotient_sign);
}
let ret = quotient.wrapping_shr(u32::cast_from(res_exponent.wrapping_neg()) + 1);
residual_lo = a_significand
.wrapping_shl(significand_bits.wrapping_add(CastInto::<u32>::cast(res_exponent)))
.wrapping_sub(ret.wrapping_mul(b_significand) << 1);
ret
};
residual_lo += abs_result & one; // tie to even
// conditionally turns the below LT comparison into LTE
abs_result += u8::from(residual_lo > b_significand).into();
if F::BITS == 128 || (F::BITS == 32 && half_iterations > 0) {
// Do not round Infinity to NaN
abs_result +=
u8::from(abs_result < inf_rep && residual_lo > (2 + 1).cast() * b_significand).into();
}
if F::BITS == 128 {
abs_result +=
u8::from(abs_result < inf_rep && residual_lo > (4 + 1).cast() * b_significand).into();
}
F::from_bits(abs_result | quotient_sign)
}
/// Calculate the number of iterations required for a float type's precision.
///
/// This returns `(h, f)` where `h` is the number of iterations to be done using integers at half
/// the float's bit width, and `f` is the number of iterations done using integers of the float's
/// full width. This is further explained in the module documentation.
///
/// # Requirements
///
/// The initial estimate should have at least 8 bits of precision. If this is not true, results
/// will be inaccurate.
const fn get_iterations<F: Float>() -> (usize, usize) {
// Precision doubles with each iteration. Assume we start with 8 bits of precision.
let total_iterations = F::BITS.ilog2() as usize - 2;
if 2 * size_of::<F>() <= size_of::<*const ()>() {
// If widening multiplication will be efficient (uses word-sized integers), there is no
// reason to use half-sized iterations.
(0, total_iterations)
} else {
// Otherwise, do as many iterations as possible at half width.
(total_iterations - 1, 1)
}
}
/// `u_n` for different precisions (with N-1 half-width iterations).
///
/// W0 is the precision of C
/// u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW
///
/// Estimated with bc:
///
/// ```text
/// define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; }
/// define half2(un) { return 2.0 * un / 2.0^hw + 2.0; }
/// define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; }
/// define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; }
///
/// | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1)
/// u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797
/// u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440
/// u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317
/// u_3 | < 7.31 | | < 7.31 | < 27054456580
/// u_4 | | | | < 80.4
/// Final (U_N) | same as u_3 | < 72 | < 218 | < 13920
/// ````
///
/// Add 2 to `U_N` due to final decrement.
const fn reciprocal_precision<F: Float>() -> u16 {
let (half_iterations, full_iterations) = get_iterations::<F>();
if full_iterations < 1 {
panic!("Must have at least one full iteration");
}
// FIXME(tgross35): calculate this programmatically
if F::BITS == 32 && half_iterations == 2 && full_iterations == 1 {
74u16
} else if F::BITS == 32 && half_iterations == 0 && full_iterations == 3 {
10
} else if F::BITS == 64 && half_iterations == 3 && full_iterations == 1 {
220
} else if F::BITS == 128 && half_iterations == 4 && full_iterations == 1 {
13922
} else {
panic!("Invalid number of iterations")
}
}
/// The value of `C` adjusted to half width.
///
/// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW with W0 being either
/// 16 or 32 and W0 <= HW. That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from
/// which b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
fn c_hw<F: Float>() -> HalfRep<F>
where
F::Int: DInt,
u128: CastInto<HalfRep<F>>,
{
const C_U128: u128 = 0x7504f333f9de6108b2fb1366eaa6a542;
const { C_U128 >> (u128::BITS - <HalfRep<F>>::BITS) }.cast()
}
/// Perform one iteration at any width to approach `1/b`, given previous guess `x`. Returns
/// the next `x` as a UQ0 number.
///
/// This is the `x_{n+1} = 2*x_n - b*x_n^2` algorithm, implemented as `x_n * (2 - b*x_n)`. It
/// uses widening multiplication to calculate the result with necessary precision.
fn next_guess<I>(x_uq0: I, b_uq1: I) -> I
where
I: Int + HInt,
<I as HInt>::D: ops::Shr<u32, Output = <I as HInt>::D>,
{
// `corr = 2 - b*x_n`
//
// This looks like `0 - b*x_n`. However, this works - in `UQ1`, `0.0 - x = 2.0 - x`.
let corr_uq1: I = I::ZERO.wrapping_sub(x_uq0.widen_mul(b_uq1).hi());
// `x_n * corr = x_n * (2 - b*x_n)`
(x_uq0.widen_mul(corr_uq1) >> (I::BITS - 1)).lo()
}
intrinsics! {
#[arm_aeabi_alias = __aeabi_fdiv]
pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 {
div(a, b)
}
#[arm_aeabi_alias = __aeabi_ddiv]
pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 {
div(a, b)
}
#[ppc_alias = __divkf3]
#[cfg(f128_enabled)]
pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 {
div(a, b)
}
#[cfg(target_arch = "arm")]
pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 {
a / b
}
#[cfg(target_arch = "arm")]
pub extern "C" fn __divdf3vfp(a: f64, b: f64) -> f64 {
a / b
}
}

View file

@ -0,0 +1,123 @@
use crate::float::Float;
use crate::int::{CastInto, Int, MinInt};
/// Generic conversion from a narrower to a wider IEEE-754 floating-point type
fn extend<F: Float, R: Float>(a: F) -> R
where
F::Int: CastInto<u64>,
u64: CastInto<F::Int>,
u32: CastInto<R::Int>,
R::Int: CastInto<u32>,
R::Int: CastInto<u64>,
u64: CastInto<R::Int>,
F::Int: CastInto<R::Int>,
{
let src_zero = F::Int::ZERO;
let src_one = F::Int::ONE;
let src_bits = F::BITS;
let src_sig_bits = F::SIG_BITS;
let src_exp_bias = F::EXP_BIAS;
let src_min_normal = F::IMPLICIT_BIT;
let src_infinity = F::EXP_MASK;
let src_sign_mask = F::SIGN_MASK;
let src_abs_mask = src_sign_mask - src_one;
let src_qnan = F::SIG_MASK;
let src_nan_code = src_qnan - src_one;
let dst_bits = R::BITS;
let dst_sig_bits = R::SIG_BITS;
let dst_inf_exp = R::EXP_SAT;
let dst_exp_bias = R::EXP_BIAS;
let dst_min_normal = R::IMPLICIT_BIT;
let sig_bits_delta = dst_sig_bits - src_sig_bits;
let exp_bias_delta = dst_exp_bias - src_exp_bias;
let a_abs = a.to_bits() & src_abs_mask;
let mut abs_result = R::Int::ZERO;
if a_abs.wrapping_sub(src_min_normal) < src_infinity.wrapping_sub(src_min_normal) {
// a is a normal number.
// Extend to the destination type by shifting the significand and
// exponent into the proper position and rebiasing the exponent.
let abs_dst: R::Int = a_abs.cast();
let bias_dst: R::Int = exp_bias_delta.cast();
abs_result = abs_dst.wrapping_shl(sig_bits_delta);
abs_result += bias_dst.wrapping_shl(dst_sig_bits);
} else if a_abs >= src_infinity {
// a is NaN or infinity.
// Conjure the result by beginning with infinity, then setting the qNaN
// bit (if needed) and right-aligning the rest of the trailing NaN
// payload field.
let qnan_dst: R::Int = (a_abs & src_qnan).cast();
let nan_code_dst: R::Int = (a_abs & src_nan_code).cast();
let inf_exp_dst: R::Int = dst_inf_exp.cast();
abs_result = inf_exp_dst.wrapping_shl(dst_sig_bits);
abs_result |= qnan_dst.wrapping_shl(sig_bits_delta);
abs_result |= nan_code_dst.wrapping_shl(sig_bits_delta);
} else if a_abs != src_zero {
// a is denormal.
// Renormalize the significand and clear the leading bit, then insert
// the correct adjusted exponent in the destination type.
let scale = a_abs.leading_zeros() - src_min_normal.leading_zeros();
let abs_dst: R::Int = a_abs.cast();
let bias_dst: R::Int = (exp_bias_delta - scale + 1).cast();
abs_result = abs_dst.wrapping_shl(sig_bits_delta + scale);
abs_result = (abs_result ^ dst_min_normal) | (bias_dst.wrapping_shl(dst_sig_bits));
}
let sign_result: R::Int = (a.to_bits() & src_sign_mask).cast();
R::from_bits(abs_result | (sign_result.wrapping_shl(dst_bits - src_bits)))
}
intrinsics! {
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_f2d]
pub extern "C" fn __extendsfdf2(a: f32) -> f64 {
extend(a)
}
}
intrinsics! {
#[aapcs_on_arm]
#[apple_f16_arg_abi]
#[arm_aeabi_alias = __aeabi_h2f]
#[cfg(f16_enabled)]
pub extern "C" fn __extendhfsf2(a: f16) -> f32 {
extend(a)
}
#[aapcs_on_arm]
#[apple_f16_arg_abi]
#[cfg(f16_enabled)]
pub extern "C" fn __gnu_h2f_ieee(a: f16) -> f32 {
extend(a)
}
#[aapcs_on_arm]
#[apple_f16_arg_abi]
#[cfg(f16_enabled)]
pub extern "C" fn __extendhfdf2(a: f16) -> f64 {
extend(a)
}
#[aapcs_on_arm]
#[ppc_alias = __extendhfkf2]
#[cfg(all(f16_enabled, f128_enabled))]
pub extern "C" fn __extendhftf2(a: f16) -> f128 {
extend(a)
}
#[aapcs_on_arm]
#[ppc_alias = __extendsfkf2]
#[cfg(f128_enabled)]
pub extern "C" fn __extendsftf2(a: f32) -> f128 {
extend(a)
}
#[aapcs_on_arm]
#[ppc_alias = __extenddfkf2]
#[cfg(f128_enabled)]
pub extern "C" fn __extenddftf2(a: f64) -> f128 {
extend(a)
}
}

View file

@ -0,0 +1,15 @@
pub mod add;
pub mod cmp;
pub mod conv;
pub mod div;
pub mod extend;
pub mod mul;
pub mod pow;
pub mod sub;
pub(crate) mod traits;
pub mod trunc;
#[cfg(not(feature = "unstable-public-internals"))]
pub(crate) use traits::{Float, HalfRep};
#[cfg(feature = "unstable-public-internals")]
pub use traits::{Float, HalfRep};

View file

@ -0,0 +1,200 @@
use crate::float::Float;
use crate::int::{CastInto, DInt, HInt, Int, MinInt};
fn mul<F: Float>(a: F, b: F) -> F
where
u32: CastInto<F::Int>,
F::Int: CastInto<u32>,
i32: CastInto<F::Int>,
F::Int: CastInto<i32>,
F::Int: HInt,
{
let one = F::Int::ONE;
let zero = F::Int::ZERO;
let bits = F::BITS;
let significand_bits = F::SIG_BITS;
let max_exponent = F::EXP_SAT;
let exponent_bias = F::EXP_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIG_MASK;
let sign_bit = F::SIGN_MASK;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXP_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let exponent_bits = F::EXP_BITS;
let a_rep = a.to_bits();
let b_rep = b.to_bits();
let a_exponent = (a_rep >> significand_bits) & max_exponent.cast();
let b_exponent = (b_rep >> significand_bits) & max_exponent.cast();
let product_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
let mut scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_bits(a_rep | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_bits(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_bits(a_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_bits(qnan_rep);
}
}
if b_abs == inf_rep {
if a_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_bits(b_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_bits(qnan_rep);
}
}
// zero * anything = +/- zero
if a_abs == zero {
return F::from_bits(product_sign);
}
// anything * zero = +/- zero
if b_abs == zero {
return F::from_bits(product_sign);
}
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
scale += exponent;
a_significand = significand;
}
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
scale += exponent;
b_significand = significand;
}
}
// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
a_significand |= implicit_bit;
b_significand |= implicit_bit;
// Get the significand of a*b. Before multiplying the significands, shift
// one of them left to left-align it in the field. Thus, the product will
// have (exponentBits + 2) integral digits, all but two of which must be
// zero. Normalizing this result is just a conditional left-shift by one
// and bumping the exponent accordingly.
let (mut product_low, mut product_high) = a_significand
.widen_mul(b_significand << exponent_bits)
.lo_hi();
let a_exponent_i32: i32 = a_exponent.cast();
let b_exponent_i32: i32 = b_exponent.cast();
let mut product_exponent: i32 = a_exponent_i32
.wrapping_add(b_exponent_i32)
.wrapping_add(scale)
.wrapping_sub(exponent_bias as i32);
// Normalize the significand, adjust exponent if needed.
if (product_high & implicit_bit) != zero {
product_exponent = product_exponent.wrapping_add(1);
} else {
product_high = (product_high << 1) | (product_low >> (bits - 1));
product_low <<= 1;
}
// If we have overflowed the type, return +/- infinity.
if product_exponent >= max_exponent as i32 {
return F::from_bits(inf_rep | product_sign);
}
if product_exponent <= 0 {
// Result is denormal before rounding
//
// If the result is so small that it just underflows to zero, return
// a zero of the appropriate sign. Mathematically there is no need to
// handle this case separately, but we make it a special case to
// simplify the shift logic.
let shift = one.wrapping_sub(product_exponent.cast()).cast();
if shift >= bits {
return F::from_bits(product_sign);
}
// Otherwise, shift the significand of the result so that the round
// bit is the high bit of `product_low`.
// Ensure one of the non-highest bits in `product_low` is set if the shifted out bit are
// not all zero so that the result is correctly rounded below.
let sticky = product_low << (bits - shift) != zero;
product_low =
(product_high << (bits - shift)) | (product_low >> shift) | (sticky as u32).cast();
product_high >>= shift;
} else {
// Result is normal before rounding; insert the exponent.
product_high &= significand_mask;
product_high |= product_exponent.cast() << significand_bits;
}
// Insert the sign of the result:
product_high |= product_sign;
// Final rounding. The final result may overflow to infinity, or underflow
// to zero, but those are the correct results in those cases. We use the
// default IEEE-754 round-to-nearest, ties-to-even rounding mode.
if product_low > sign_bit {
product_high += one;
}
if product_low == sign_bit {
product_high += product_high & one;
}
F::from_bits(product_high)
}
intrinsics! {
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_fmul]
pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 {
mul(a, b)
}
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_dmul]
pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 {
mul(a, b)
}
#[ppc_alias = __mulkf3]
#[cfg(f128_enabled)]
pub extern "C" fn __multf3(a: f128, b: f128) -> f128 {
mul(a, b)
}
}

View file

@ -0,0 +1,40 @@
use crate::float::Float;
use crate::int::Int;
/// Returns `a` raised to the power `b`
fn pow<F: Float>(a: F, b: i32) -> F {
let mut a = a;
let recip = b < 0;
let mut pow = Int::abs_diff(b, 0);
let mut mul = F::ONE;
loop {
if (pow & 1) != 0 {
mul *= a;
}
pow >>= 1;
if pow == 0 {
break;
}
a *= a;
}
if recip { F::ONE / mul } else { mul }
}
intrinsics! {
pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 {
pow(a, b)
}
pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 {
pow(a, b)
}
#[ppc_alias = __powikf2]
#[cfg(f128_enabled)]
// FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly.
#[cfg(not(target_env = "msvc"))]
pub extern "C" fn __powitf2(a: f128, b: i32) -> f128 {
pow(a, b)
}
}

View file

@ -0,0 +1,24 @@
use crate::float::Float;
intrinsics! {
#[arm_aeabi_alias = __aeabi_fsub]
pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 {
crate::float::add::__addsf3(a, f32::from_bits(b.to_bits() ^ f32::SIGN_MASK))
}
#[arm_aeabi_alias = __aeabi_dsub]
pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 {
crate::float::add::__adddf3(a, f64::from_bits(b.to_bits() ^ f64::SIGN_MASK))
}
#[ppc_alias = __subkf3]
#[cfg(f128_enabled)]
pub extern "C" fn __subtf3(a: f128, b: f128) -> f128 {
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
use crate::float::add::__addkf3 as __addtf3;
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
use crate::float::add::__addtf3;
__addtf3(a, f128::from_bits(b.to_bits() ^ f128::SIGN_MASK))
}
}

View file

@ -0,0 +1,189 @@
use core::ops;
use crate::int::{DInt, Int, MinInt};
/// Wrapper to extract the integer type half of the float's size
pub type HalfRep<F> = <<F as Float>::Int as DInt>::H;
/// Trait for some basic operations on floats
#[allow(dead_code)]
pub trait Float:
Copy
+ core::fmt::Debug
+ PartialEq
+ PartialOrd
+ ops::AddAssign
+ ops::MulAssign
+ ops::Add<Output = Self>
+ ops::Sub<Output = Self>
+ ops::Div<Output = Self>
+ ops::Rem<Output = Self>
{
/// A uint of the same width as the float
type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
/// A int of the same width as the float
type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
/// An int capable of containing the exponent bits plus a sign bit. This is signed.
type ExpInt: Int;
const ZERO: Self;
const ONE: Self;
/// The bitwidth of the float type.
const BITS: u32;
/// The bitwidth of the significand.
const SIG_BITS: u32;
/// The bitwidth of the exponent.
const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
/// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
/// representation.
///
/// This is in the rightmost position, use `EXP_MASK` for the shifted value.
const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
/// The exponent bias value.
const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
/// A mask for the sign bit.
const SIGN_MASK: Self::Int;
/// A mask for the significand.
const SIG_MASK: Self::Int;
/// The implicit bit of the float format.
const IMPLICIT_BIT: Self::Int;
/// A mask for the exponent.
const EXP_MASK: Self::Int;
/// Returns `self` transmuted to `Self::Int`
fn to_bits(self) -> Self::Int;
/// Returns `self` transmuted to `Self::SignedInt`
fn to_bits_signed(self) -> Self::SignedInt;
/// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
/// represented in multiple different ways. This method returns `true` if two NaNs are
/// compared.
fn eq_repr(self, rhs: Self) -> bool;
/// Returns true if the sign is negative
fn is_sign_negative(self) -> bool;
/// Returns the exponent, not adjusting for bias.
fn exp(self) -> Self::ExpInt;
/// Returns the significand with no implicit bit (or the "fractional" part)
fn frac(self) -> Self::Int;
/// Returns the significand with implicit bit
fn imp_frac(self) -> Self::Int;
/// Returns a `Self::Int` transmuted back to `Self`
fn from_bits(a: Self::Int) -> Self;
/// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self;
fn abs(self) -> Self {
let abs_mask = !Self::SIGN_MASK;
Self::from_bits(self.to_bits() & abs_mask)
}
/// Returns (normalized exponent, normalized significand)
fn normalize(significand: Self::Int) -> (i32, Self::Int);
/// Returns if `self` is subnormal
fn is_subnormal(self) -> bool;
}
macro_rules! float_impl {
($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
impl Float for $ty {
type Int = $ity;
type SignedInt = $sity;
type ExpInt = $expty;
const ZERO: Self = 0.0;
const ONE: Self = 1.0;
const BITS: u32 = $bits;
const SIG_BITS: u32 = $significand_bits;
const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
fn to_bits(self) -> Self::Int {
self.to_bits()
}
fn to_bits_signed(self) -> Self::SignedInt {
self.to_bits() as Self::SignedInt
}
fn eq_repr(self, rhs: Self) -> bool {
#[cfg(feature = "mangled-names")]
fn is_nan(x: $ty) -> bool {
// When using mangled-names, the "real" compiler-builtins might not have the
// necessary builtin (__unordtf2) to test whether `f128` is NaN.
// FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
// x is NaN if all the bits of the exponent are set and the significand is non-0
x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0
}
#[cfg(not(feature = "mangled-names"))]
fn is_nan(x: $ty) -> bool {
x.is_nan()
}
if is_nan(self) && is_nan(rhs) {
true
} else {
self.to_bits() == rhs.to_bits()
}
}
fn is_sign_negative(self) -> bool {
self.is_sign_negative()
}
fn exp(self) -> Self::ExpInt {
((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
}
fn frac(self) -> Self::Int {
self.to_bits() & Self::SIG_MASK
}
fn imp_frac(self) -> Self::Int {
self.frac() | Self::IMPLICIT_BIT
}
fn from_bits(a: Self::Int) -> Self {
Self::from_bits(a)
}
fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
Self::from_bits(
((negative as Self::Int) << (Self::BITS - 1))
| ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
| (significand & Self::SIG_MASK),
)
}
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
(
1i32.wrapping_sub(shift as i32),
significand << shift as Self::Int,
)
}
fn is_subnormal(self) -> bool {
(self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
}
}
};
}
#[cfg(f16_enabled)]
float_impl!(f16, u16, i16, i8, 16, 10);
float_impl!(f32, u32, i32, i16, 32, 23);
float_impl!(f64, u64, i64, i16, 64, 52);
#[cfg(f128_enabled)]
float_impl!(f128, u128, i128, i16, 128, 112);

View file

@ -0,0 +1,169 @@
use crate::float::Float;
use crate::int::{CastInto, Int, MinInt};
fn trunc<F: Float, R: Float>(a: F) -> R
where
F::Int: CastInto<u64>,
F::Int: CastInto<u32>,
u64: CastInto<F::Int>,
u32: CastInto<F::Int>,
R::Int: CastInto<u32>,
u32: CastInto<R::Int>,
F::Int: CastInto<R::Int>,
{
let src_zero = F::Int::ZERO;
let src_one = F::Int::ONE;
let src_bits = F::BITS;
let src_exp_bias = F::EXP_BIAS;
let src_min_normal = F::IMPLICIT_BIT;
let src_sig_mask = F::SIG_MASK;
let src_infinity = F::EXP_MASK;
let src_sign_mask = F::SIGN_MASK;
let src_abs_mask = src_sign_mask - src_one;
let round_mask = (src_one << (F::SIG_BITS - R::SIG_BITS)) - src_one;
let halfway = src_one << (F::SIG_BITS - R::SIG_BITS - 1);
let src_qnan = src_one << (F::SIG_BITS - 1);
let src_nan_code = src_qnan - src_one;
let dst_zero = R::Int::ZERO;
let dst_one = R::Int::ONE;
let dst_bits = R::BITS;
let dst_inf_exp = R::EXP_SAT;
let dst_exp_bias = R::EXP_BIAS;
let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast();
let overflow_exponent: F::Int = (src_exp_bias + dst_inf_exp - dst_exp_bias).cast();
let underflow: F::Int = underflow_exponent << F::SIG_BITS;
let overflow: F::Int = overflow_exponent << F::SIG_BITS;
let dst_qnan = R::Int::ONE << (R::SIG_BITS - 1);
let dst_nan_code = dst_qnan - dst_one;
let sig_bits_delta = F::SIG_BITS - R::SIG_BITS;
// Break a into a sign and representation of the absolute value.
let a_abs = a.to_bits() & src_abs_mask;
let sign = a.to_bits() & src_sign_mask;
let mut abs_result: R::Int;
if a_abs.wrapping_sub(underflow) < a_abs.wrapping_sub(overflow) {
// The exponent of a is within the range of normal numbers in the
// destination format. We can convert by simply right-shifting with
// rounding and adjusting the exponent.
abs_result = (a_abs >> sig_bits_delta).cast();
// Cast before shifting to prevent overflow.
let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast();
let tmp = bias_diff << R::SIG_BITS;
abs_result = abs_result.wrapping_sub(tmp);
let round_bits = a_abs & round_mask;
if round_bits > halfway {
// Round to nearest.
abs_result += dst_one;
} else if round_bits == halfway {
// Tie to even.
abs_result += abs_result & dst_one;
};
} else if a_abs > src_infinity {
// a is NaN.
// Conjure the result by beginning with infinity, setting the qNaN
// bit and inserting the (truncated) trailing NaN field.
// Cast before shifting to prevent overflow.
let dst_inf_exp: R::Int = dst_inf_exp.cast();
abs_result = dst_inf_exp << R::SIG_BITS;
abs_result |= dst_qnan;
abs_result |= dst_nan_code & ((a_abs & src_nan_code) >> (F::SIG_BITS - R::SIG_BITS)).cast();
} else if a_abs >= overflow {
// a overflows to infinity.
// Cast before shifting to prevent overflow.
let dst_inf_exp: R::Int = dst_inf_exp.cast();
abs_result = dst_inf_exp << R::SIG_BITS;
} else {
// a underflows on conversion to the destination type or is an exact
// zero. The result may be a denormal or zero. Extract the exponent
// to get the shift amount for the denormalization.
let a_exp: u32 = (a_abs >> F::SIG_BITS).cast();
let shift = src_exp_bias - dst_exp_bias - a_exp + 1;
let significand = (a.to_bits() & src_sig_mask) | src_min_normal;
// Right shift by the denormalization amount with sticky.
if shift > F::SIG_BITS {
abs_result = dst_zero;
} else {
let sticky = if (significand << (src_bits - shift)) != src_zero {
src_one
} else {
src_zero
};
let denormalized_significand: F::Int = (significand >> shift) | sticky;
abs_result = (denormalized_significand >> (F::SIG_BITS - R::SIG_BITS)).cast();
let round_bits = denormalized_significand & round_mask;
// Round to nearest
if round_bits > halfway {
abs_result += dst_one;
}
// Ties to even
else if round_bits == halfway {
abs_result += abs_result & dst_one;
};
}
}
// Apply the signbit to the absolute value.
R::from_bits(abs_result | sign.wrapping_shr(src_bits - dst_bits).cast())
}
intrinsics! {
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_d2f]
pub extern "C" fn __truncdfsf2(a: f64) -> f32 {
trunc(a)
}
}
intrinsics! {
#[aapcs_on_arm]
#[apple_f16_ret_abi]
#[arm_aeabi_alias = __aeabi_f2h]
#[cfg(f16_enabled)]
pub extern "C" fn __truncsfhf2(a: f32) -> f16 {
trunc(a)
}
#[aapcs_on_arm]
#[apple_f16_ret_abi]
#[cfg(f16_enabled)]
pub extern "C" fn __gnu_f2h_ieee(a: f32) -> f16 {
trunc(a)
}
#[aapcs_on_arm]
#[apple_f16_ret_abi]
#[arm_aeabi_alias = __aeabi_d2h]
#[cfg(f16_enabled)]
pub extern "C" fn __truncdfhf2(a: f64) -> f16 {
trunc(a)
}
#[aapcs_on_arm]
#[ppc_alias = __trunckfhf2]
#[cfg(all(f16_enabled, f128_enabled))]
pub extern "C" fn __trunctfhf2(a: f128) -> f16 {
trunc(a)
}
#[aapcs_on_arm]
#[ppc_alias = __trunckfsf2]
#[cfg(f128_enabled)]
pub extern "C" fn __trunctfsf2(a: f128) -> f32 {
trunc(a)
}
#[aapcs_on_arm]
#[ppc_alias = __trunckfdf2]
#[cfg(f128_enabled)]
pub extern "C" fn __trunctfdf2(a: f128) -> f64 {
trunc(a)
}
}

View file

@ -0,0 +1,55 @@
#![cfg(not(feature = "no-asm"))]
use core::arch::global_asm;
global_asm!(include_str!("hexagon/func_macro.s"), options(raw));
global_asm!(include_str!("hexagon/dfaddsub.s"), options(raw));
global_asm!(include_str!("hexagon/dfdiv.s"), options(raw));
global_asm!(include_str!("hexagon/dffma.s"), options(raw));
global_asm!(include_str!("hexagon/dfminmax.s"), options(raw));
global_asm!(include_str!("hexagon/dfmul.s"), options(raw));
global_asm!(include_str!("hexagon/dfsqrt.s"), options(raw));
global_asm!(include_str!("hexagon/divdi3.s"), options(raw));
global_asm!(include_str!("hexagon/divsi3.s"), options(raw));
global_asm!(include_str!("hexagon/fastmath2_dlib_asm.s"), options(raw));
global_asm!(include_str!("hexagon/fastmath2_ldlib_asm.s"), options(raw));
global_asm!(
include_str!("hexagon/memcpy_forward_vp4cp4n2.s"),
options(raw)
);
global_asm!(
include_str!("hexagon/memcpy_likely_aligned.s"),
options(raw)
);
global_asm!(include_str!("hexagon/moddi3.s"), options(raw));
global_asm!(include_str!("hexagon/modsi3.s"), options(raw));
global_asm!(include_str!("hexagon/sfdiv_opt.s"), options(raw));
global_asm!(include_str!("hexagon/sfsqrt_opt.s"), options(raw));
global_asm!(include_str!("hexagon/udivdi3.s"), options(raw));
global_asm!(include_str!("hexagon/udivmoddi4.s"), options(raw));
global_asm!(include_str!("hexagon/udivmodsi4.s"), options(raw));
global_asm!(include_str!("hexagon/udivsi3.s"), options(raw));
global_asm!(include_str!("hexagon/umoddi3.s"), options(raw));
global_asm!(include_str!("hexagon/umodsi3.s"), options(raw));

View file

@ -0,0 +1,321 @@
.text
.global __hexagon_adddf3
.global __hexagon_subdf3
.type __hexagon_adddf3, @function
.type __hexagon_subdf3, @function
.global __qdsp_adddf3 ; .set __qdsp_adddf3, __hexagon_adddf3
.global __hexagon_fast_adddf3 ; .set __hexagon_fast_adddf3, __hexagon_adddf3
.global __hexagon_fast2_adddf3 ; .set __hexagon_fast2_adddf3, __hexagon_adddf3
.global __qdsp_subdf3 ; .set __qdsp_subdf3, __hexagon_subdf3
.global __hexagon_fast_subdf3 ; .set __hexagon_fast_subdf3, __hexagon_subdf3
.global __hexagon_fast2_subdf3 ; .set __hexagon_fast2_subdf3, __hexagon_subdf3
.p2align 5
__hexagon_adddf3:
{
r4 = extractu(r1,#11,#20)
r5 = extractu(r3,#11,#20)
r13:12 = combine(##0x20000000,#0)
}
{
p3 = dfclass(r1:0,#2)
p3 = dfclass(r3:2,#2)
r9:8 = r13:12
p2 = cmp.gtu(r5,r4)
}
{
if (!p3) jump .Ladd_abnormal
if (p2) r1:0 = r3:2
if (p2) r3:2 = r1:0
if (p2) r5:4 = combine(r4,r5)
}
{
r13:12 = insert(r1:0,#52,#11 -2)
r9:8 = insert(r3:2,#52,#11 -2)
r15 = sub(r4,r5)
r7:6 = combine(#62,#1)
}
.Ladd_continue:
{
r15 = min(r15,r7)
r11:10 = neg(r13:12)
p2 = cmp.gt(r1,#-1)
r14 = #0
}
{
if (!p2) r13:12 = r11:10
r11:10 = extractu(r9:8,r15:14)
r9:8 = ASR(r9:8,r15)
r15:14 = #0
}
{
p1 = cmp.eq(r11:10,r15:14)
if (!p1.new) r8 = or(r8,r6)
r5 = add(r4,#-1024 -60)
p3 = cmp.gt(r3,#-1)
}
{
r13:12 = add(r13:12,r9:8)
r11:10 = sub(r13:12,r9:8)
r7:6 = combine(#54,##2045)
}
{
p0 = cmp.gtu(r4,r7)
p0 = !cmp.gtu(r4,r6)
if (!p0.new) jump:nt .Ladd_ovf_unf
if (!p3) r13:12 = r11:10
}
{
r1:0 = convert_d2df(r13:12)
p0 = cmp.eq(r13,#0)
p0 = cmp.eq(r12,#0)
if (p0.new) jump:nt .Ladd_zero
}
{
r1 += asl(r5,#20)
jumpr r31
}
.falign
__hexagon_subdf3:
{
r3 = togglebit(r3,#31)
jump __qdsp_adddf3
}
.falign
.Ladd_zero:
{
r28 = USR
r1:0 = #0
r3 = #1
}
{
r28 = extractu(r28,#2,#22)
r3 = asl(r3,#31)
}
{
p0 = cmp.eq(r28,#2)
if (p0.new) r1 = xor(r1,r3)
jumpr r31
}
.falign
.Ladd_ovf_unf:
{
r1:0 = convert_d2df(r13:12)
p0 = cmp.eq(r13,#0)
p0 = cmp.eq(r12,#0)
if (p0.new) jump:nt .Ladd_zero
}
{
r28 = extractu(r1,#11,#20)
r1 += asl(r5,#20)
}
{
r5 = add(r5,r28)
r3:2 = combine(##0x00100000,#0)
}
{
p0 = cmp.gt(r5,##1024 +1024 -2)
if (p0.new) jump:nt .Ladd_ovf
}
{
p0 = cmp.gt(r5,#0)
if (p0.new) jumpr:t r31
r28 = sub(#1,r5)
}
{
r3:2 = insert(r1:0,#52,#0)
r1:0 = r13:12
}
{
r3:2 = lsr(r3:2,r28)
}
{
r1:0 = insert(r3:2,#63,#0)
jumpr r31
}
.falign
.Ladd_ovf:
{
r1:0 = r13:12
r28 = USR
r13:12 = combine(##0x7fefffff,#-1)
}
{
r5 = extractu(r28,#2,#22)
r28 = or(r28,#0x28)
r9:8 = combine(##0x7ff00000,#0)
}
{
USR = r28
r5 ^= lsr(r1,#31)
r28 = r5
}
{
p0 = !cmp.eq(r28,#1)
p0 = !cmp.eq(r5,#2)
if (p0.new) r13:12 = r9:8
}
{
r1:0 = insert(r13:12,#63,#0)
}
{
p0 = dfcmp.eq(r1:0,r1:0)
jumpr r31
}
.Ladd_abnormal:
{
r13:12 = extractu(r1:0,#63,#0)
r9:8 = extractu(r3:2,#63,#0)
}
{
p3 = cmp.gtu(r13:12,r9:8)
if (!p3.new) r1:0 = r3:2
if (!p3.new) r3:2 = r1:0
}
{
p0 = dfclass(r1:0,#0x0f)
if (!p0.new) jump:nt .Linvalid_nan_add
if (!p3) r13:12 = r9:8
if (!p3) r9:8 = r13:12
}
{
p1 = dfclass(r1:0,#0x08)
if (p1.new) jump:nt .Linf_add
}
{
p2 = dfclass(r3:2,#0x01)
if (p2.new) jump:nt .LB_zero
r13:12 = #0
}
{
p0 = dfclass(r1:0,#4)
if (p0.new) jump:nt .Ladd_two_subnormal
r13:12 = combine(##0x20000000,#0)
}
{
r4 = extractu(r1,#11,#20)
r5 = #1
r9:8 = asl(r9:8,#11 -2)
}
{
r13:12 = insert(r1:0,#52,#11 -2)
r15 = sub(r4,r5)
r7:6 = combine(#62,#1)
jump .Ladd_continue
}
.Ladd_two_subnormal:
{
r13:12 = extractu(r1:0,#63,#0)
r9:8 = extractu(r3:2,#63,#0)
}
{
r13:12 = neg(r13:12)
r9:8 = neg(r9:8)
p0 = cmp.gt(r1,#-1)
p1 = cmp.gt(r3,#-1)
}
{
if (p0) r13:12 = r1:0
if (p1) r9:8 = r3:2
}
{
r13:12 = add(r13:12,r9:8)
}
{
r9:8 = neg(r13:12)
p0 = cmp.gt(r13,#-1)
r3:2 = #0
}
{
if (!p0) r1:0 = r9:8
if (p0) r1:0 = r13:12
r3 = ##0x80000000
}
{
if (!p0) r1 = or(r1,r3)
p0 = dfcmp.eq(r1:0,r3:2)
if (p0.new) jump:nt .Lzero_plus_zero
}
{
jumpr r31
}
.Linvalid_nan_add:
{
r28 = convert_df2sf(r1:0)
p0 = dfclass(r3:2,#0x0f)
if (p0.new) r3:2 = r1:0
}
{
r2 = convert_df2sf(r3:2)
r1:0 = #-1
jumpr r31
}
.falign
.LB_zero:
{
p0 = dfcmp.eq(r13:12,r1:0)
if (!p0.new) jumpr:t r31
}
.Lzero_plus_zero:
{
p0 = cmp.eq(r1:0,r3:2)
if (p0.new) jumpr:t r31
}
{
r28 = USR
}
{
r28 = extractu(r28,#2,#22)
r1:0 = #0
}
{
p0 = cmp.eq(r28,#2)
if (p0.new) r1 = ##0x80000000
jumpr r31
}
.Linf_add:
{
p0 = !cmp.eq(r1,r3)
p0 = dfclass(r3:2,#8)
if (!p0.new) jumpr:t r31
}
{
r2 = ##0x7f800001
}
{
r1:0 = convert_sf2df(r2)
jumpr r31
}
.size __hexagon_adddf3,.-__hexagon_adddf3

View file

@ -0,0 +1,372 @@
.text
.global __hexagon_divdf3
.type __hexagon_divdf3,@function
.global __qdsp_divdf3 ; .set __qdsp_divdf3, __hexagon_divdf3
.global __hexagon_fast_divdf3 ; .set __hexagon_fast_divdf3, __hexagon_divdf3
.global __hexagon_fast2_divdf3 ; .set __hexagon_fast2_divdf3, __hexagon_divdf3
.p2align 5
__hexagon_divdf3:
{
p2 = dfclass(r1:0,#0x02)
p2 = dfclass(r3:2,#0x02)
r13:12 = combine(r3,r1)
r28 = xor(r1,r3)
}
{
if (!p2) jump .Ldiv_abnormal
r7:6 = extractu(r3:2,#23,#52 -23)
r8 = ##0x3f800001
}
{
r9 = or(r8,r6)
r13 = extractu(r13,#11,#52 -32)
r12 = extractu(r12,#11,#52 -32)
p3 = cmp.gt(r28,#-1)
}
.Ldenorm_continue:
{
r11,p0 = sfrecipa(r8,r9)
r10 = and(r8,#-2)
r28 = #1
r12 = sub(r12,r13)
}
{
r10 -= sfmpy(r11,r9):lib
r1 = insert(r28,#11 +1,#52 -32)
r13 = ##0x00800000 << 3
}
{
r11 += sfmpy(r11,r10):lib
r3 = insert(r28,#11 +1,#52 -32)
r10 = and(r8,#-2)
}
{
r10 -= sfmpy(r11,r9):lib
r5 = #-0x3ff +1
r4 = #0x3ff -1
}
{
r11 += sfmpy(r11,r10):lib
p1 = cmp.gt(r12,r5)
p1 = !cmp.gt(r12,r4)
}
{
r13 = insert(r11,#23,#3)
r5:4 = #0
r12 = add(r12,#-61)
}
{
r13 = add(r13,#((-3) << 3))
}
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); }
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); }
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); }
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); }
{
r15:14 = sub(r1:0,r3:2)
p0 = cmp.gtu(r3:2,r1:0)
if (!p0.new) r6 = #2
}
{
r5:4 = add(r5:4,r7:6)
if (!p0) r1:0 = r15:14
r15:14 = #0
}
{
p0 = cmp.eq(r1:0,r15:14)
if (!p0.new) r4 = or(r4,r28)
}
{
r7:6 = neg(r5:4)
}
{
if (!p3) r5:4 = r7:6
}
{
r1:0 = convert_d2df(r5:4)
if (!p1) jump .Ldiv_ovf_unf
}
{
r1 += asl(r12,#52 -32)
jumpr r31
}
.Ldiv_ovf_unf:
{
r1 += asl(r12,#52 -32)
r13 = extractu(r1,#11,#52 -32)
}
{
r7:6 = abs(r5:4)
r12 = add(r12,r13)
}
{
p0 = cmp.gt(r12,##0x3ff +0x3ff)
if (p0.new) jump:nt .Ldiv_ovf
}
{
p0 = cmp.gt(r12,#0)
if (p0.new) jump:nt .Lpossible_unf2
}
{
r13 = add(clb(r7:6),#-1)
r12 = sub(#7,r12)
r10 = USR
r11 = #63
}
{
r13 = min(r12,r11)
r11 = or(r10,#0x030)
r7:6 = asl(r7:6,r13)
r12 = #0
}
{
r15:14 = extractu(r7:6,r13:12)
r7:6 = lsr(r7:6,r13)
r3:2 = #1
}
{
p0 = cmp.gtu(r3:2,r15:14)
if (!p0.new) r6 = or(r2,r6)
r7 = setbit(r7,#52 -32+4)
}
{
r5:4 = neg(r7:6)
p0 = bitsclr(r6,#(1<<4)-1)
if (!p0.new) r10 = r11
}
{
USR = r10
if (p3) r5:4 = r7:6
r10 = #-0x3ff -(52 +4)
}
{
r1:0 = convert_d2df(r5:4)
}
{
r1 += asl(r10,#52 -32)
jumpr r31
}
.Lpossible_unf2:
{
r3:2 = extractu(r1:0,#63,#0)
r15:14 = combine(##0x00100000,#0)
r10 = #0x7FFF
}
{
p0 = dfcmp.eq(r15:14,r3:2)
p0 = bitsset(r7,r10)
}
{
if (!p0) jumpr r31
r10 = USR
}
{
r10 = or(r10,#0x30)
}
{
USR = r10
}
{
p0 = dfcmp.eq(r1:0,r1:0)
jumpr r31
}
.Ldiv_ovf:
{
r10 = USR
r3:2 = combine(##0x7fefffff,#-1)
r1 = mux(p3,#0,#-1)
}
{
r7:6 = combine(##0x7ff00000,#0)
r5 = extractu(r10,#2,#22)
r10 = or(r10,#0x28)
}
{
USR = r10
r5 ^= lsr(r1,#31)
r4 = r5
}
{
p0 = !cmp.eq(r4,#1)
p0 = !cmp.eq(r5,#2)
if (p0.new) r3:2 = r7:6
p0 = dfcmp.eq(r3:2,r3:2)
}
{
r1:0 = insert(r3:2,#63,#0)
jumpr r31
}
.Ldiv_abnormal:
{
p0 = dfclass(r1:0,#0x0F)
p0 = dfclass(r3:2,#0x0F)
p3 = cmp.gt(r28,#-1)
}
{
p1 = dfclass(r1:0,#0x08)
p1 = dfclass(r3:2,#0x08)
}
{
p2 = dfclass(r1:0,#0x01)
p2 = dfclass(r3:2,#0x01)
}
{
if (!p0) jump .Ldiv_nan
if (p1) jump .Ldiv_invalid
}
{
if (p2) jump .Ldiv_invalid
}
{
p2 = dfclass(r1:0,#(0x0F ^ 0x01))
p2 = dfclass(r3:2,#(0x0F ^ 0x08))
}
{
p1 = dfclass(r1:0,#(0x0F ^ 0x08))
p1 = dfclass(r3:2,#(0x0F ^ 0x01))
}
{
if (!p2) jump .Ldiv_zero_result
if (!p1) jump .Ldiv_inf_result
}
{
p0 = dfclass(r1:0,#0x02)
p1 = dfclass(r3:2,#0x02)
r10 = ##0x00100000
}
{
r13:12 = combine(r3,r1)
r1 = insert(r10,#11 +1,#52 -32)
r3 = insert(r10,#11 +1,#52 -32)
}
{
if (p0) r1 = or(r1,r10)
if (p1) r3 = or(r3,r10)
}
{
r5 = add(clb(r1:0),#-11)
r4 = add(clb(r3:2),#-11)
r10 = #1
}
{
r12 = extractu(r12,#11,#52 -32)
r13 = extractu(r13,#11,#52 -32)
}
{
r1:0 = asl(r1:0,r5)
r3:2 = asl(r3:2,r4)
if (!p0) r12 = sub(r10,r5)
if (!p1) r13 = sub(r10,r4)
}
{
r7:6 = extractu(r3:2,#23,#52 -23)
}
{
r9 = or(r8,r6)
jump .Ldenorm_continue
}
.Ldiv_zero_result:
{
r1 = xor(r1,r3)
r3:2 = #0
}
{
r1:0 = insert(r3:2,#63,#0)
jumpr r31
}
.Ldiv_inf_result:
{
p2 = dfclass(r3:2,#0x01)
p2 = dfclass(r1:0,#(0x0F ^ 0x08))
}
{
r10 = USR
if (!p2) jump 1f
r1 = xor(r1,r3)
}
{
r10 = or(r10,#0x04)
}
{
USR = r10
}
1:
{
r3:2 = combine(##0x7ff00000,#0)
p0 = dfcmp.uo(r3:2,r3:2)
}
{
r1:0 = insert(r3:2,#63,#0)
jumpr r31
}
.Ldiv_nan:
{
p0 = dfclass(r1:0,#0x10)
p1 = dfclass(r3:2,#0x10)
if (!p0.new) r1:0 = r3:2
if (!p1.new) r3:2 = r1:0
}
{
r5 = convert_df2sf(r1:0)
r4 = convert_df2sf(r3:2)
}
{
r1:0 = #-1
jumpr r31
}
.Ldiv_invalid:
{
r10 = ##0x7f800001
}
{
r1:0 = convert_sf2df(r10)
jumpr r31
}
.size __hexagon_divdf3,.-__hexagon_divdf3

View file

@ -0,0 +1,534 @@
.text
.global __hexagon_fmadf4
.type __hexagon_fmadf4,@function
.global __hexagon_fmadf5
.type __hexagon_fmadf5,@function
.global __qdsp_fmadf5 ; .set __qdsp_fmadf5, __hexagon_fmadf5
.p2align 5
__hexagon_fmadf4:
__hexagon_fmadf5:
fma:
{
p0 = dfclass(r1:0,#2)
p0 = dfclass(r3:2,#2)
r13:12 = #0
r15:14 = #0
}
{
r13:12 = insert(r1:0,#52,#11 -3)
r15:14 = insert(r3:2,#52,#11 -3)
r7 = ##0x10000000
allocframe(#32)
}
{
r9:8 = mpyu(r12,r14)
if (!p0) jump .Lfma_abnormal_ab
r13 = or(r13,r7)
r15 = or(r15,r7)
}
{
p0 = dfclass(r5:4,#2)
if (!p0.new) jump:nt .Lfma_abnormal_c
r11:10 = combine(r7,#0)
r7:6 = combine(#0,r9)
}
.Lfma_abnormal_c_restart:
{
r7:6 += mpyu(r14,r13)
r11:10 = insert(r5:4,#52,#11 -3)
memd(r29+#0) = r17:16
memd(r29+#8) = r19:18
}
{
r7:6 += mpyu(r12,r15)
r19:18 = neg(r11:10)
p0 = cmp.gt(r5,#-1)
r28 = xor(r1,r3)
}
{
r18 = extractu(r1,#11,#20)
r19 = extractu(r3,#11,#20)
r17:16 = combine(#0,r7)
if (!p0) r11:10 = r19:18
}
{
r17:16 += mpyu(r13,r15)
r9:8 = combine(r6,r8)
r18 = add(r18,r19)
r19 = extractu(r5,#11,#20)
}
{
r18 = add(r18,#-1023 +(4))
p3 = !cmp.gt(r28,#-1)
r7:6 = #0
r15:14 = #0
}
{
r7:6 = sub(r7:6,r9:8,p3):carry
p0 = !cmp.gt(r28,#-1)
p1 = cmp.gt(r19,r18)
if (p1.new) r19:18 = combine(r18,r19)
}
{
r15:14 = sub(r15:14,r17:16,p3):carry
if (p0) r9:8 = r7:6
r7:6 = #0
r19 = sub(r18,r19)
}
{
if (p0) r17:16 = r15:14
p0 = cmp.gt(r19,#63)
if (p1) r9:8 = r7:6
if (p1) r7:6 = r9:8
}
{
if (p1) r17:16 = r11:10
if (p1) r11:10 = r17:16
if (p0) r19 = add(r19,#-64)
r28 = #63
}
{
if (p0) r7:6 = r11:10
r28 = asr(r11,#31)
r13 = min(r19,r28)
r12 = #0
}
{
if (p0) r11:10 = combine(r28,r28)
r5:4 = extract(r7:6,r13:12)
r7:6 = lsr(r7:6,r13)
r12 = sub(#64,r13)
}
{
r15:14 = #0
r28 = #-2
r7:6 |= lsl(r11:10,r12)
r11:10 = asr(r11:10,r13)
}
{
p3 = cmp.gtu(r5:4,r15:14)
if (p3.new) r6 = and(r6,r28)
r15:14 = #1
r5:4 = #0
}
{
r9:8 = add(r7:6,r9:8,p3):carry
}
{
r17:16 = add(r11:10,r17:16,p3):carry
r28 = #62
}
{
r12 = add(clb(r17:16),#-2)
if (!cmp.eq(r12.new,r28)) jump:t 1f
}
{
r11:10 = extractu(r9:8,#62,#2)
r9:8 = asl(r9:8,#62)
r18 = add(r18,#-62)
}
{
r17:16 = insert(r11:10,#62,#0)
}
{
r12 = add(clb(r17:16),#-2)
}
.falign
1:
{
r11:10 = asl(r17:16,r12)
r5:4 |= asl(r9:8,r12)
r13 = sub(#64,r12)
r18 = sub(r18,r12)
}
{
r11:10 |= lsr(r9:8,r13)
p2 = cmp.gtu(r15:14,r5:4)
r28 = #1023 +1023 -2
}
{
if (!p2) r10 = or(r10,r14)
p0 = !cmp.gt(r18,r28)
p0 = cmp.gt(r18,#1)
if (!p0.new) jump:nt .Lfma_ovf_unf
}
{
p0 = cmp.gtu(r15:14,r11:10)
r1:0 = convert_d2df(r11:10)
r18 = add(r18,#-1023 -60)
r17:16 = memd(r29+#0)
}
{
r1 += asl(r18,#20)
r19:18 = memd(r29+#8)
if (!p0) dealloc_return
}
.Ladd_yields_zero:
{
r28 = USR
r1:0 = #0
}
{
r28 = extractu(r28,#2,#22)
r17:16 = memd(r29+#0)
r19:18 = memd(r29+#8)
}
{
p0 = cmp.eq(r28,#2)
if (p0.new) r1 = ##0x80000000
dealloc_return
}
.Lfma_ovf_unf:
{
p0 = cmp.gtu(r15:14,r11:10)
if (p0.new) jump:nt .Ladd_yields_zero
}
{
r1:0 = convert_d2df(r11:10)
r18 = add(r18,#-1023 -60)
r28 = r18
}
{
r1 += asl(r18,#20)
r7 = extractu(r1,#11,#20)
}
{
r6 = add(r18,r7)
r17:16 = memd(r29+#0)
r19:18 = memd(r29+#8)
r9:8 = abs(r11:10)
}
{
p0 = cmp.gt(r6,##1023 +1023)
if (p0.new) jump:nt .Lfma_ovf
}
{
p0 = cmp.gt(r6,#0)
if (p0.new) jump:nt .Lpossible_unf0
}
{
r7 = add(clb(r9:8),#-2)
r6 = sub(#1+5,r28)
p3 = cmp.gt(r11,#-1)
}
{
r6 = add(r6,r7)
r9:8 = asl(r9:8,r7)
r1 = USR
r28 = #63
}
{
r7 = min(r6,r28)
r6 = #0
r0 = #0x0030
}
{
r3:2 = extractu(r9:8,r7:6)
r9:8 = asr(r9:8,r7)
}
{
p0 = cmp.gtu(r15:14,r3:2)
if (!p0.new) r8 = or(r8,r14)
r9 = setbit(r9,#20 +3)
}
{
r11:10 = neg(r9:8)
p1 = bitsclr(r8,#(1<<3)-1)
if (!p1.new) r1 = or(r1,r0)
r3:2 = #0
}
{
if (p3) r11:10 = r9:8
USR = r1
r28 = #-1023 -(52 +3)
}
{
r1:0 = convert_d2df(r11:10)
}
{
r1 += asl(r28,#20)
dealloc_return
}
.Lpossible_unf0:
{
r28 = ##0x7fefffff
r9:8 = abs(r11:10)
}
{
p0 = cmp.eq(r0,#0)
p0 = bitsclr(r1,r28)
if (!p0.new) dealloc_return:t
r28 = #0x7fff
}
{
p0 = bitsset(r9,r28)
r3 = USR
r2 = #0x0030
}
{
if (p0) r3 = or(r3,r2)
}
{
USR = r3
}
{
p0 = dfcmp.eq(r1:0,r1:0)
dealloc_return
}
.Lfma_ovf:
{
r28 = USR
r11:10 = combine(##0x7fefffff,#-1)
r1:0 = r11:10
}
{
r9:8 = combine(##0x7ff00000,#0)
r3 = extractu(r28,#2,#22)
r28 = or(r28,#0x28)
}
{
USR = r28
r3 ^= lsr(r1,#31)
r2 = r3
}
{
p0 = !cmp.eq(r2,#1)
p0 = !cmp.eq(r3,#2)
}
{
p0 = dfcmp.eq(r9:8,r9:8)
if (p0.new) r11:10 = r9:8
}
{
r1:0 = insert(r11:10,#63,#0)
dealloc_return
}
.Lfma_abnormal_ab:
{
r9:8 = extractu(r1:0,#63,#0)
r11:10 = extractu(r3:2,#63,#0)
deallocframe
}
{
p3 = cmp.gtu(r9:8,r11:10)
if (!p3.new) r1:0 = r3:2
if (!p3.new) r3:2 = r1:0
}
{
p0 = dfclass(r1:0,#0x0f)
if (!p0.new) jump:nt .Lnan
if (!p3) r9:8 = r11:10
if (!p3) r11:10 = r9:8
}
{
p1 = dfclass(r1:0,#0x08)
p1 = dfclass(r3:2,#0x0e)
}
{
p0 = dfclass(r1:0,#0x08)
p0 = dfclass(r3:2,#0x01)
}
{
if (p1) jump .Lab_inf
p2 = dfclass(r3:2,#0x01)
}
{
if (p0) jump .Linvalid
if (p2) jump .Lab_true_zero
r28 = ##0x7c000000
}
{
p0 = bitsclr(r1,r28)
if (p0.new) jump:nt .Lfma_ab_tiny
}
{
r28 = add(clb(r11:10),#-11)
}
{
r11:10 = asl(r11:10,r28)
}
{
r3:2 = insert(r11:10,#63,#0)
r1 -= asl(r28,#20)
}
jump fma
.Lfma_ab_tiny:
r9:8 = combine(##0x00100000,#0)
{
r1:0 = insert(r9:8,#63,#0)
r3:2 = insert(r9:8,#63,#0)
}
jump fma
.Lab_inf:
{
r3:2 = lsr(r3:2,#63)
p0 = dfclass(r5:4,#0x10)
}
{
r1:0 ^= asl(r3:2,#63)
if (p0) jump .Lnan
}
{
p1 = dfclass(r5:4,#0x08)
if (p1.new) jump:nt .Lfma_inf_plus_inf
}
{
jumpr r31
}
.falign
.Lfma_inf_plus_inf:
{
p0 = dfcmp.eq(r1:0,r5:4)
if (!p0.new) jump:nt .Linvalid
}
{
jumpr r31
}
.Lnan:
{
p0 = dfclass(r3:2,#0x10)
p1 = dfclass(r5:4,#0x10)
if (!p0.new) r3:2 = r1:0
if (!p1.new) r5:4 = r1:0
}
{
r3 = convert_df2sf(r3:2)
r2 = convert_df2sf(r5:4)
}
{
r3 = convert_df2sf(r1:0)
r1:0 = #-1
jumpr r31
}
.Linvalid:
{
r28 = ##0x7f800001
}
{
r1:0 = convert_sf2df(r28)
jumpr r31
}
.Lab_true_zero:
{
p0 = dfclass(r5:4,#0x10)
if (p0.new) jump:nt .Lnan
if (p0.new) r1:0 = r5:4
}
{
p0 = dfcmp.eq(r3:2,r5:4)
r1 = lsr(r1,#31)
}
{
r3 ^= asl(r1,#31)
if (!p0) r1:0 = r5:4
if (!p0) jumpr r31
}
{
p0 = cmp.eq(r3:2,r5:4)
if (p0.new) jumpr:t r31
r1:0 = r3:2
}
{
r28 = USR
}
{
r28 = extractu(r28,#2,#22)
r1:0 = #0
}
{
p0 = cmp.eq(r28,#2)
if (p0.new) r1 = ##0x80000000
jumpr r31
}
.falign
.Lfma_abnormal_c:
{
p0 = dfclass(r5:4,#0x10)
if (p0.new) jump:nt .Lnan
if (p0.new) r1:0 = r5:4
deallocframe
}
{
p0 = dfclass(r5:4,#0x08)
if (p0.new) r1:0 = r5:4
if (p0.new) jumpr:nt r31
}
{
p0 = dfclass(r5:4,#0x01)
if (p0.new) jump:nt __hexagon_muldf3
r28 = #1
}
{
allocframe(#32)
r11:10 = #0
r5 = insert(r28,#11,#20)
jump .Lfma_abnormal_c_restart
}
.size fma,.-fma

View file

@ -0,0 +1,45 @@
.text
.global __hexagon_mindf3
.global __hexagon_maxdf3
.type __hexagon_mindf3,@function
.type __hexagon_maxdf3,@function
.global __qdsp_mindf3 ; .set __qdsp_mindf3, __hexagon_mindf3
.global __qdsp_maxdf3 ; .set __qdsp_maxdf3, __hexagon_maxdf3
.p2align 5
__hexagon_mindf3:
{
p0 = dfclass(r1:0,#0x10)
p1 = dfcmp.gt(r1:0,r3:2)
r5:4 = r1:0
}
{
if (p0) r1:0 = r3:2
if (p1) r1:0 = r3:2
p2 = dfcmp.eq(r1:0,r3:2)
if (!p2.new) jumpr:t r31
}
{
r1:0 = or(r5:4,r3:2)
jumpr r31
}
.size __hexagon_mindf3,.-__hexagon_mindf3
.falign
__hexagon_maxdf3:
{
p0 = dfclass(r1:0,#0x10)
p1 = dfcmp.gt(r3:2,r1:0)
r5:4 = r1:0
}
{
if (p0) r1:0 = r3:2
if (p1) r1:0 = r3:2
p2 = dfcmp.eq(r1:0,r3:2)
if (!p2.new) jumpr:t r31
}
{
r1:0 = and(r5:4,r3:2)
jumpr r31
}
.size __hexagon_maxdf3,.-__hexagon_maxdf3

Some files were not shown because too many files have changed in this diff Show more