Merge compiler-builtins as a Josh subtree
Use the Josh [1] utility to add `compiler-builtins` as a subtree, which
will allow us to stop using crates.io for updates. This is intended to
help resolve some problems when unstable features change and require
code changes in `compiler-builtins`, which sometimes gets trapped in a
bootstrap cycle.
This was done using `josh-filter` built from the r24.10.04 tag:
git fetch https://github.com/rust-lang/compiler-builtins.git 233434412fe7eced8f1ddbfeddabef1d55e493bd
josh-filter ":prefix=library/compiler-builtins" FETCH_HEAD
git merge --allow-unrelated FILTERED_HEAD
The HEAD in the `compiler-builtins` repository is 233434412f ("fix an if
statement that can be collapsed").
[1]: https://github.com/josh-project/josh
This commit is contained in:
commit
fcb3000340
380 changed files with 52998 additions and 0 deletions
16
library/compiler-builtins/.editorconfig
Normal file
16
library/compiler-builtins/.editorconfig
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# EditorConfig helps developers define and maintain consistent
|
||||
# coding styles between different editors and IDEs
|
||||
# editorconfig.org
|
||||
|
||||
root = true
|
||||
|
||||
[*]
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
[*.yml]
|
||||
indent_size = 2
|
||||
6
library/compiler-builtins/.git-blame-ignore-revs
Normal file
6
library/compiler-builtins/.git-blame-ignore-revs
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
# Use `git config blame.ignorerevsfile .git-blame-ignore-revs` to make
|
||||
# `git blame` ignore the following commits.
|
||||
|
||||
# Reformat with a new `.rustfmt.toml`
|
||||
# In rust-lang/libm this was 5882cabb83c30bf7c36023f9a55a80583636b0e8
|
||||
4bb07a6275cc628ef81c65ac971dc6479963322f
|
||||
344
library/compiler-builtins/.github/workflows/main.yaml
vendored
Normal file
344
library/compiler-builtins/.github/workflows/main.yaml
vendored
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
name: CI
|
||||
on:
|
||||
push: { branches: [master] }
|
||||
pull_request:
|
||||
|
||||
concurrency:
|
||||
# Make sure that new pushes cancel running jobs
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUSTDOCFLAGS: -Dwarnings
|
||||
RUSTFLAGS: -Dwarnings
|
||||
RUST_BACKTRACE: full
|
||||
BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results
|
||||
|
||||
jobs:
|
||||
# Determine which tests should be run based on changed files.
|
||||
calculate_vars:
|
||||
name: Calculate workflow variables
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
outputs:
|
||||
extensive_matrix: ${{ steps.script.outputs.extensive_matrix }}
|
||||
may_skip_libm_ci: ${{ steps.script.outputs.may_skip_libm_ci }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 500
|
||||
- name: Fetch pull request ref
|
||||
run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
|
||||
if: github.event_name == 'pull_request'
|
||||
- run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT"
|
||||
id: script
|
||||
|
||||
test:
|
||||
name: Build and test
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: aarch64-apple-darwin
|
||||
os: macos-15
|
||||
- target: aarch64-unknown-linux-gnu
|
||||
os: ubuntu-24.04-arm
|
||||
- target: aarch64-pc-windows-msvc
|
||||
os: windows-2025
|
||||
test_verbatim: 1
|
||||
build_only: 1
|
||||
- target: arm-unknown-linux-gnueabi
|
||||
os: ubuntu-24.04
|
||||
- target: arm-unknown-linux-gnueabihf
|
||||
os: ubuntu-24.04
|
||||
- target: armv7-unknown-linux-gnueabihf
|
||||
os: ubuntu-24.04
|
||||
- target: i586-unknown-linux-gnu
|
||||
os: ubuntu-24.04
|
||||
- target: i686-unknown-linux-gnu
|
||||
os: ubuntu-24.04
|
||||
- target: loongarch64-unknown-linux-gnu
|
||||
os: ubuntu-24.04
|
||||
- target: powerpc-unknown-linux-gnu
|
||||
os: ubuntu-24.04
|
||||
- target: powerpc64-unknown-linux-gnu
|
||||
os: ubuntu-24.04
|
||||
- target: powerpc64le-unknown-linux-gnu
|
||||
os: ubuntu-24.04
|
||||
- target: riscv64gc-unknown-linux-gnu
|
||||
os: ubuntu-24.04
|
||||
- target: thumbv6m-none-eabi
|
||||
os: ubuntu-24.04
|
||||
- target: thumbv7em-none-eabi
|
||||
os: ubuntu-24.04
|
||||
- target: thumbv7em-none-eabihf
|
||||
os: ubuntu-24.04
|
||||
- target: thumbv7m-none-eabi
|
||||
os: ubuntu-24.04
|
||||
- target: wasm32-unknown-unknown
|
||||
os: ubuntu-24.04
|
||||
- target: x86_64-unknown-linux-gnu
|
||||
os: ubuntu-24.04
|
||||
- target: x86_64-apple-darwin
|
||||
os: macos-13
|
||||
- target: i686-pc-windows-msvc
|
||||
os: windows-2025
|
||||
test_verbatim: 1
|
||||
- target: x86_64-pc-windows-msvc
|
||||
os: windows-2025
|
||||
test_verbatim: 1
|
||||
- target: i686-pc-windows-gnu
|
||||
os: windows-2025
|
||||
channel: nightly-i686-gnu
|
||||
- target: x86_64-pc-windows-gnu
|
||||
os: windows-2025
|
||||
channel: nightly-x86_64-gnu
|
||||
runs-on: ${{ matrix.os }}
|
||||
needs: [calculate_vars]
|
||||
env:
|
||||
BUILD_ONLY: ${{ matrix.build_only }}
|
||||
TEST_VERBATIM: ${{ matrix.test_verbatim }}
|
||||
MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }}
|
||||
steps:
|
||||
- name: Print runner information
|
||||
run: uname -a
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Install Rust (rustup)
|
||||
shell: bash
|
||||
run: |
|
||||
channel="nightly"
|
||||
# Account for channels that have required components (MinGW)
|
||||
[ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}"
|
||||
rustup update "$channel" --no-self-update
|
||||
rustup default "$channel"
|
||||
rustup target add "${{ matrix.target }}"
|
||||
rustup component add llvm-tools-preview
|
||||
- uses: taiki-e/install-action@nextest
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
key: ${{ matrix.target }}
|
||||
- name: Cache Docker layers
|
||||
uses: actions/cache@v4
|
||||
if: matrix.os == 'ubuntu-24.04'
|
||||
with:
|
||||
path: /tmp/.buildx-cache
|
||||
key: ${{ matrix.target }}-buildx-${{ github.sha }}
|
||||
restore-keys: ${{ matrix.target }}-buildx-
|
||||
# Configure buildx to use Docker layer caching
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
if: matrix.os == 'ubuntu-24.04'
|
||||
|
||||
- name: Cache compiler-rt
|
||||
id: cache-compiler-rt
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: compiler-rt
|
||||
key: ${{ runner.os }}-compiler-rt-${{ hashFiles('ci/download-compiler-rt.sh') }}
|
||||
- name: Download compiler-rt reference sources
|
||||
if: steps.cache-compiler-rt.outputs.cache-hit != 'true'
|
||||
run: ./ci/download-compiler-rt.sh
|
||||
shell: bash
|
||||
- run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV"
|
||||
shell: bash
|
||||
|
||||
- name: Verify API list
|
||||
if: matrix.os == 'ubuntu-24.04'
|
||||
run: python3 etc/update-api-list.py --check
|
||||
|
||||
# Non-linux tests just use our raw script
|
||||
- name: Run locally
|
||||
if: matrix.os != 'ubuntu-24.04'
|
||||
shell: bash
|
||||
run: ./ci/run.sh ${{ matrix.target }}
|
||||
|
||||
# Otherwise we use our docker containers to run builds
|
||||
- name: Run in Docker
|
||||
if: matrix.os == 'ubuntu-24.04'
|
||||
run: ./ci/run-docker.sh ${{ matrix.target }}
|
||||
|
||||
- name: Print test logs if available
|
||||
if: always()
|
||||
run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
|
||||
shell: bash
|
||||
|
||||
# Workaround to keep Docker cache smaller
|
||||
# https://github.com/docker/build-push-action/issues/252
|
||||
# https://github.com/moby/buildkit/issues/1896
|
||||
- name: Move Docker cache
|
||||
if: matrix.os == 'ubuntu-24.04'
|
||||
run: |
|
||||
rm -rf /tmp/.buildx-cache
|
||||
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
|
||||
|
||||
clippy:
|
||||
name: Clippy
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
# Unlike rustfmt, stable clippy does not work on code with nightly features.
|
||||
- name: Install nightly `clippy`
|
||||
run: |
|
||||
rustup set profile minimal
|
||||
rustup default nightly
|
||||
rustup component add clippy
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- run: cargo clippy --workspace --all-targets
|
||||
|
||||
benchmarks:
|
||||
name: Benchmarks
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
with:
|
||||
submodules: true
|
||||
- uses: taiki-e/install-action@cargo-binstall
|
||||
|
||||
- name: Set up dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y valgrind gdb libc6-dbg # Needed for iai-callgrind
|
||||
rustup update "$BENCHMARK_RUSTC" --no-self-update
|
||||
rustup default "$BENCHMARK_RUSTC"
|
||||
# Install the version of iai-callgrind-runner that is specified in Cargo.toml
|
||||
iai_version="$(cargo metadata --format-version=1 --features icount |
|
||||
jq -r '.packages[] | select(.name == "iai-callgrind").version')"
|
||||
cargo binstall -y iai-callgrind-runner --version "$iai_version"
|
||||
sudo apt-get install valgrind
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
|
||||
- name: Run icount benchmarks
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
run: ./ci/bench-icount.sh
|
||||
|
||||
- name: Upload the benchmark baseline
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.BASELINE_NAME }}
|
||||
path: ${{ env.BASELINE_NAME }}.tar.xz
|
||||
|
||||
- name: Run wall time benchmarks
|
||||
run: |
|
||||
# Always use the same seed for benchmarks. Ideally we should switch to a
|
||||
# non-random generator.
|
||||
export LIBM_SEED=benchesbenchesbenchesbencheswoo!
|
||||
cargo bench --package libm-test \
|
||||
--no-default-features \
|
||||
--features short-benchmarks,build-musl,libm/force-soft-floats
|
||||
|
||||
- name: Print test logs if available
|
||||
if: always()
|
||||
run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
|
||||
shell: bash
|
||||
|
||||
miri:
|
||||
name: Miri
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Install Rust (rustup)
|
||||
run: rustup update nightly --no-self-update && rustup default nightly
|
||||
shell: bash
|
||||
- run: rustup component add miri
|
||||
- run: cargo miri setup
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- run: ./ci/miri.sh
|
||||
|
||||
msrv:
|
||||
name: Check libm MSRV
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings`
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: Install Rust
|
||||
run: |
|
||||
msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)"
|
||||
echo "MSRV: $msrv"
|
||||
rustup update "$msrv" --no-self-update && rustup default "$msrv"
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- run: |
|
||||
# FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see
|
||||
# `edition = "2024"` and get spooked.
|
||||
rm Cargo.toml
|
||||
cargo build --manifest-path libm/Cargo.toml
|
||||
|
||||
rustfmt:
|
||||
name: Rustfmt
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Install stable `rustfmt`
|
||||
run: rustup set profile minimal && rustup default stable && rustup component add rustfmt
|
||||
- run: cargo fmt -- --check
|
||||
|
||||
extensive:
|
||||
name: Extensive tests for ${{ matrix.ty }}
|
||||
needs:
|
||||
# Wait on `clippy` so we have some confidence that the crate will build
|
||||
- clippy
|
||||
- calculate_vars
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 240 # 4 hours
|
||||
strategy:
|
||||
matrix:
|
||||
# Use the output from `calculate_vars` to create the matrix
|
||||
# FIXME: it would be better to run all jobs (i.e. all types) but mark those that
|
||||
# didn't change as skipped, rather than completely excluding the job. However,
|
||||
# this is not currently possible https://github.com/actions/runner/issues/1985.
|
||||
include: ${{ fromJSON(needs.calculate_vars.outputs.extensive_matrix).extensive_matrix }}
|
||||
env:
|
||||
TO_TEST: ${{ matrix.to_test }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Install Rust
|
||||
run: |
|
||||
rustup update nightly --no-self-update
|
||||
rustup default nightly
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Run extensive tests
|
||||
run: ./ci/run-extensive.sh
|
||||
- name: Print test logs if available
|
||||
run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
|
||||
shell: bash
|
||||
|
||||
success:
|
||||
needs:
|
||||
- benchmarks
|
||||
- clippy
|
||||
- extensive
|
||||
- miri
|
||||
- msrv
|
||||
- rustfmt
|
||||
- test
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 10
|
||||
# GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
|
||||
# failed" as success. So we have to do some contortions to ensure the job fails if any of its
|
||||
# dependencies fails.
|
||||
if: always() # make sure this is never "skipped"
|
||||
steps:
|
||||
# Manually check the status of all dependencies. `if: failure()` does not work.
|
||||
- name: check if any dependency failed
|
||||
run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}'
|
||||
25
library/compiler-builtins/.github/workflows/publish.yaml
vendored
Normal file
25
library/compiler-builtins/.github/workflows/publish.yaml
vendored
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
name: Release-plz
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: write
|
||||
|
||||
on:
|
||||
push: { branches: [master] }
|
||||
|
||||
jobs:
|
||||
release-plz:
|
||||
name: Release-plz
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install Rust (rustup)
|
||||
run: rustup update nightly --no-self-update && rustup default nightly
|
||||
- name: Run release-plz
|
||||
uses: MarcoIeni/release-plz-action@v0.5
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
16
library/compiler-builtins/.gitignore
vendored
Normal file
16
library/compiler-builtins/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# Rust files
|
||||
Cargo.lock
|
||||
target
|
||||
|
||||
# Sources for external files
|
||||
compiler-rt
|
||||
*.tar.gz
|
||||
|
||||
# Benchmark cache
|
||||
baseline-*
|
||||
iai-home
|
||||
|
||||
# Temporary files
|
||||
*.bk
|
||||
*.rs.bk
|
||||
.#*
|
||||
4
library/compiler-builtins/.gitmodules
vendored
Normal file
4
library/compiler-builtins/.gitmodules
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
[submodule "crates/musl-math-sys/musl"]
|
||||
path = crates/musl-math-sys/musl
|
||||
url = https://git.musl-libc.org/git/musl
|
||||
shallow = true
|
||||
13
library/compiler-builtins/.release-plz.toml
Normal file
13
library/compiler-builtins/.release-plz.toml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
[workspace]
|
||||
# As part of the release process, we delete `libm/Cargo.toml`. Since
|
||||
# this is only run in CI, we shouldn't need to worry about it.
|
||||
allow_dirty = true
|
||||
publish_allow_dirty = true
|
||||
|
||||
[[package]]
|
||||
name = "compiler_builtins"
|
||||
semver_check = false
|
||||
changelog_include = ["libm"] # libm is included as part of builtins
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
4
library/compiler-builtins/.rustfmt.toml
Normal file
4
library/compiler-builtins/.rustfmt.toml
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
# This matches rustc
|
||||
style_edition = "2024"
|
||||
group_imports = "StdExternalCrate"
|
||||
imports_granularity = "Module"
|
||||
167
library/compiler-builtins/CONTRIBUTING.md
Normal file
167
library/compiler-builtins/CONTRIBUTING.md
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
# How to contribute
|
||||
|
||||
## compiler-builtins
|
||||
|
||||
1. From the [pending list](compiler-builtins/README.md#progress), pick one or
|
||||
more intrinsics.
|
||||
2. Port the version from [`compiler-rt`] and, if applicable, their
|
||||
[tests][rt-tests]. Note that this crate has generic implementations for a lot
|
||||
of routines, which may be usable without porting the entire implementation.
|
||||
3. Add a test to `builtins-test`, comparing the behavior of the ported
|
||||
intrinsic(s) with their implementation on the testing host.
|
||||
4. Add the intrinsic to `builtins-test-intrinsics/src/main.rs` to verify it can
|
||||
be linked on all targets.
|
||||
5. Send a Pull Request (PR) :tada:.
|
||||
|
||||
[`compiler-rt`]: https://github.com/llvm/llvm-project/tree/b6820c35c59a4da3e59c11f657093ffbd79ae1db/compiler-rt/lib/builtins
|
||||
[rt-tests]: https://github.com/llvm/llvm-project/tree/b6820c35c59a4da3e59c11f657093ffbd79ae1db/compiler-rt/test/builtins
|
||||
|
||||
## Porting Reminders
|
||||
|
||||
1. [Rust][prec-rust] and [C][prec-c] have slightly different operator
|
||||
precedence. C evaluates comparisons (`== !=`) before bitwise operations
|
||||
(`& | ^`), while Rust evaluates the other way.
|
||||
2. C assumes wrapping operations everywhere. Rust panics on overflow when in
|
||||
debug mode. Consider using the [Wrapping][wrap-ty] type or the explicit
|
||||
[wrapping_*][wrap-fn] functions where applicable.
|
||||
3. Note [C implicit casts][casts], especially integer promotion. Rust is much
|
||||
more explicit about casting, so be sure that any cast which affects the
|
||||
output is ported to the Rust implementation.
|
||||
4. Rust has [many functions][i32] for integer or floating point manipulation in
|
||||
the standard library. Consider using one of these functions rather than
|
||||
porting a new one.
|
||||
|
||||
[prec-rust]: https://doc.rust-lang.org/reference/expressions.html#expression-precedence
|
||||
[prec-c]: http://en.cppreference.com/w/c/language/operator_precedence
|
||||
[wrap-ty]: https://doc.rust-lang.org/core/num/struct.Wrapping.html
|
||||
[wrap-fn]: https://doc.rust-lang.org/std/primitive.i32.html#method.wrapping_add
|
||||
[casts]: http://en.cppreference.com/w/cpp/language/implicit_conversion
|
||||
[i32]: https://doc.rust-lang.org/std/primitive.i32.html
|
||||
|
||||
## Tips and tricks
|
||||
|
||||
- _IMPORTANT_ The code in this crate will end up being used in the `core` crate
|
||||
so it can **not** have any external dependencies (other than a subset of
|
||||
`core` itself).
|
||||
- Only use relative imports within the `math` directory / module, e.g.
|
||||
`use self::fabs::fabs` or `use super::k_cos`. Absolute imports from core are
|
||||
OK, e.g. `use core::u64`.
|
||||
- To reinterpret a float as an integer use the `to_bits` method. The MUSL code
|
||||
uses the `GET_FLOAT_WORD` macro, or a union, to do this operation.
|
||||
- To reinterpret an integer as a float use the `f32::from_bits` constructor. The
|
||||
MUSL code uses the `SET_FLOAT_WORD` macro, or a union, to do this operation.
|
||||
- You may use other methods from core like `f64::is_nan`, etc. as appropriate.
|
||||
- Rust does not have hex float literals. This crate provides two `hf16!`,
|
||||
`hf32!`, `hf64!`, and `hf128!` which convert string literals to floats at
|
||||
compile time.
|
||||
|
||||
```rust
|
||||
assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000);
|
||||
assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000);
|
||||
```
|
||||
|
||||
- Rust code panics on arithmetic overflows when not optimized. You may need to
|
||||
use the [`Wrapping`] newtype to avoid this problem, or individual methods like
|
||||
[`wrapping_add`].
|
||||
|
||||
[`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html
|
||||
[`wrapping_add`]: https://doc.rust-lang.org/std/primitive.u32.html#method.wrapping_add
|
||||
|
||||
## Testing
|
||||
|
||||
Testing for these crates can be somewhat complex, so feel free to rely on CI.
|
||||
|
||||
The easiest way replicate CI testing is using Docker. This can be done by
|
||||
running `./ci/run-docker.sh [target]`. If no target is specified, all targets
|
||||
will be run.
|
||||
|
||||
Tests can also be run without Docker:
|
||||
|
||||
```sh
|
||||
# Run basic tests
|
||||
#
|
||||
# --no-default-features always needs to be passed, an unfortunate limitation
|
||||
# since the `#![compiler_builtins]` feature is enabled by default.
|
||||
cargo test --workspace --no-default-features
|
||||
|
||||
# Test with all interesting features
|
||||
cargo test --workspace --no-default-features \
|
||||
--features arch,unstable-float,unstable-intrinsics,mem
|
||||
|
||||
# Run with more detailed tests for libm
|
||||
cargo test --workspace --no-default-features \
|
||||
--features arch,unstable-float,unstable-intrinsics,mem \
|
||||
--features build-mpfr,build-musl \
|
||||
--profile release-checked
|
||||
```
|
||||
|
||||
The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can be
|
||||
difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help.
|
||||
|
||||
`build-musl` does not build with MSVC, Wasm, or Thumb.
|
||||
|
||||
[`rug`]: https://docs.rs/rug/latest/rug/
|
||||
[`gmp_mpfr_sys`]: https://docs.rs/gmp-mpfr-sys/1.6.4/gmp_mpfr_sys/
|
||||
|
||||
In order to run all tests, some dependencies may be required:
|
||||
|
||||
```sh
|
||||
# Allow testing compiler-builtins
|
||||
./ci/download-compiler-rt.sh
|
||||
|
||||
# Optional, initialize musl for `--features build-musl`
|
||||
git submodule init
|
||||
git submodule update
|
||||
|
||||
# `--release` ables more test cases
|
||||
cargo test --release
|
||||
```
|
||||
|
||||
### Extensive tests
|
||||
|
||||
Libm also has tests that are exhaustive (for single-argument `f32` and 1- or 2-
|
||||
argument `f16`) or extensive (for all other float and argument combinations).
|
||||
These take quite a long time to run, but are launched in CI when relevant files
|
||||
are changed.
|
||||
|
||||
Exhaustive tests can be selected by passing an environment variable:
|
||||
|
||||
```sh
|
||||
LIBM_EXTENSIVE_TESTS=sqrt,sqrtf cargo test --features build-mpfr \
|
||||
--test z_extensive \
|
||||
--profile release-checked
|
||||
|
||||
# Run all tests for one type
|
||||
LIBM_EXTENSIVE_TESTS=all_f16 cargo test ...
|
||||
|
||||
# Ensure `f64` tests can run exhaustively. Estimated completion test for a
|
||||
# single test is 57306 years on my machine so this may be worth skipping.
|
||||
LIBM_EXTENSIVE_TESTS=all LIBM_EXTENSIVE_ITERATIONS=18446744073709551615 cargo test ...
|
||||
```
|
||||
|
||||
## Benchmarking
|
||||
|
||||
Regular walltime benchmarks can be run with `cargo bench`:
|
||||
|
||||
```sh
|
||||
cargo bench --no-default-features \
|
||||
--features arch,unstable-float,unstable-intrinsics,mem \
|
||||
--features benchmarking-reports
|
||||
```
|
||||
|
||||
There are also benchmarks that check instruction count behind the `icount`
|
||||
feature. These require [`iai-callgrind-runner`] (via Cargo) and [Valgrind]
|
||||
to be installed, which means these only run on limited platforms.
|
||||
|
||||
Instruction count benchmarks are run as part of CI to flag performance
|
||||
regresions.
|
||||
|
||||
```sh
|
||||
cargo bench --no-default-features \
|
||||
--features arch,unstable-float,unstable-intrinsics,mem \
|
||||
--features icount \
|
||||
--bench icount --bench mem_icount
|
||||
```
|
||||
|
||||
[`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner
|
||||
[Valgrind]: https://valgrind.org/
|
||||
50
library/compiler-builtins/Cargo.toml
Normal file
50
library/compiler-builtins/Cargo.toml
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
[workspace]
|
||||
resolver = "2"
|
||||
members = [
|
||||
"builtins-test",
|
||||
"compiler-builtins",
|
||||
"crates/libm-macros",
|
||||
"crates/musl-math-sys",
|
||||
"crates/panic-handler",
|
||||
"crates/util",
|
||||
"libm",
|
||||
"libm-test",
|
||||
]
|
||||
|
||||
default-members = [
|
||||
"builtins-test",
|
||||
"compiler-builtins",
|
||||
"crates/libm-macros",
|
||||
"libm",
|
||||
"libm-test",
|
||||
]
|
||||
|
||||
exclude = [
|
||||
# `builtins-test-intrinsics` needs the feature `compiler-builtins` enabled
|
||||
# and `mangled-names` disabled, which is the opposite of what is needed for
|
||||
# other tests, so it makes sense to keep it out of the workspace.
|
||||
"builtins-test-intrinsics",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
panic = "abort"
|
||||
|
||||
[profile.dev]
|
||||
panic = "abort"
|
||||
|
||||
# Release mode with debug assertions
|
||||
[profile.release-checked]
|
||||
inherits = "release"
|
||||
debug-assertions = true
|
||||
overflow-checks = true
|
||||
|
||||
# Release with maximum optimizations, which is very slow to build. This is also
|
||||
# what is needed to check `no-panic`.
|
||||
[profile.release-opt]
|
||||
inherits = "release"
|
||||
codegen-units = 1
|
||||
lto = "fat"
|
||||
|
||||
[profile.bench]
|
||||
# Required for iai-callgrind
|
||||
debug = true
|
||||
275
library/compiler-builtins/LICENSE.txt
Normal file
275
library/compiler-builtins/LICENSE.txt
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
The compiler-builtins crate is available for use under both the MIT license
|
||||
and the Apache-2.0 license with the LLVM exception (MIT AND Apache-2.0 WITH
|
||||
LLVM-exception).
|
||||
|
||||
The libm crate is available for use under the MIT license.
|
||||
|
||||
As a contributor, you agree that your code may be used under any of the
|
||||
following: the MIT license, the Apache-2.0 license, or the Apache-2.0 license
|
||||
with the LLVM exception. In other words, original (non-derivative) work is
|
||||
licensed under MIT OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception. This is
|
||||
the default license for all other source in this repository.
|
||||
|
||||
Text of the relevant licenses is provided below:
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
MIT License
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
------------------------------------------------------------------------------
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
---- LLVM Exceptions to the Apache 2.0 License ----
|
||||
|
||||
As an exception, if, as a result of your compiling your source code, portions
|
||||
of this Software are embedded into an Object form of such source code, you
|
||||
may redistribute such embedded portions in such Object form without complying
|
||||
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
|
||||
|
||||
In addition, if you combine or link compiled forms of this Software with
|
||||
software that is licensed under the GPLv2 ("Combined Software") and if a
|
||||
court of competent jurisdiction determines that the patent provision (Section
|
||||
3), the indemnity provision (Section 9) or other Section of the License
|
||||
conflicts with the conditions of the GPLv2, you may retroactively and
|
||||
prospectively choose to deem waived or otherwise exclude such Section(s) of
|
||||
the License, but only in their entirety and only with respect to the Combined
|
||||
Software.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
Portions of this software are derived from third-party works licensed under
|
||||
terms compatible with the above Apache-2.0 WITH LLVM-exception AND MIT
|
||||
license:
|
||||
|
||||
* compiler-builtins is derived from LLVM's compiler-rt (https://llvm.org/).
|
||||
Work derived from compiler-rt prior to 2019-01-19 is usable under the MIT
|
||||
license, with the following copyright:
|
||||
|
||||
Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT
|
||||
|
||||
The relevant CREDITS.TXT is located at
|
||||
https://github.com/llvm/llvm-project/blob/main/compiler-rt/CREDITS.TXT.
|
||||
|
||||
* Work derived from compiler-rt after 2019-01-19 is usable under the
|
||||
Apache-2.0 license with the LLVM exception.
|
||||
|
||||
* The bundled `math` module is from the libm crate, usable under the MIT
|
||||
license. For further details and copyrights, see see libm/LICENSE.txt at
|
||||
https://github.com/rust-lang/compiler-builtins.
|
||||
|
||||
Additionally, some source files may contain comments with specific copyrights
|
||||
or licenses.
|
||||
16
library/compiler-builtins/PUBLISHING.md
Normal file
16
library/compiler-builtins/PUBLISHING.md
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# Publishing to crates.io
|
||||
|
||||
Publishing `compiler-builtins` to crates.io takes a few steps unfortunately.
|
||||
It's not great, but it works for now. PRs to improve this process would be
|
||||
greatly appreciated!
|
||||
|
||||
1. Make sure you've got a clean working tree and it's updated with the latest
|
||||
changes on `master`
|
||||
2. Edit `Cargo.toml` to bump the version number
|
||||
3. Commit this change
|
||||
4. Run `git tag` to create a tag for this version
|
||||
5. Delete the `libm/Cargo.toml` file
|
||||
6. Run `cargo +nightly publish`
|
||||
7. Push the tag
|
||||
8. Push the commit
|
||||
9. Undo changes to `Cargo.toml` and the `libm` submodule
|
||||
27
library/compiler-builtins/README.md
Normal file
27
library/compiler-builtins/README.md
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# `compiler-builtins` and `libm`
|
||||
|
||||
This repository contains two main crates:
|
||||
|
||||
* `compiler-builtins`: symbols that the compiler expects to be available at
|
||||
link time
|
||||
* `libm`: a Rust implementation of C math libraries, used to provide
|
||||
implementations in `ocre`.
|
||||
|
||||
More details are at [compiler-builtins/README.md](compiler-builtins/README.md)
|
||||
and [libm/README.md](libm/README.md).
|
||||
|
||||
For instructions on contributing, see [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
|
||||
## License
|
||||
|
||||
* `libm` may be used under the [MIT License]
|
||||
* `compiler-builtins` may be used under the [MIT License] and the
|
||||
[Apache License, Version 2.0] with the LLVM exception.
|
||||
* All original contributions must be under all of: the MIT license, the
|
||||
Apache-2.0 license, and the Apache-2.0 license with the LLVM exception.
|
||||
|
||||
More details are in [LICENSE.txt](LICENSE.txt) and
|
||||
[libm/LICENSE.txt](libm/LICENSE.txt).
|
||||
|
||||
[MIT License]: https://opensource.org/license/mit
|
||||
[Apache License, Version 2.0]: htps://www.apache.org/licenses/LICENSE-2.0
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
[package]
|
||||
name = "builtins-test-intrinsics"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
publish = false
|
||||
license = "MIT OR Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"]}
|
||||
panic-handler = { path = "../crates/panic-handler" }
|
||||
|
||||
[features]
|
||||
c = ["compiler_builtins/c"]
|
||||
|
||||
[profile.release]
|
||||
panic = "abort"
|
||||
|
||||
[profile.dev]
|
||||
panic = "abort"
|
||||
11
library/compiler-builtins/builtins-test-intrinsics/build.rs
Normal file
11
library/compiler-builtins/builtins-test-intrinsics/build.rs
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
mod builtins_configure {
|
||||
include!("../compiler-builtins/configure.rs");
|
||||
}
|
||||
|
||||
fn main() {
|
||||
println!("cargo::rerun-if-changed=../configure.rs");
|
||||
|
||||
let target = builtins_configure::Target::from_env();
|
||||
builtins_configure::configure_f16_f128(&target);
|
||||
builtins_configure::configure_aliases(&target);
|
||||
}
|
||||
697
library/compiler-builtins/builtins-test-intrinsics/src/main.rs
Normal file
697
library/compiler-builtins/builtins-test-intrinsics/src/main.rs
Normal file
|
|
@ -0,0 +1,697 @@
|
|||
// By compiling this file we check that all the intrinsics we care about continue to be provided by
|
||||
// the `compiler_builtins` crate regardless of the changes we make to it. If we, by mistake, stop
|
||||
// compiling a C implementation and forget to implement that intrinsic in Rust, this file will fail
|
||||
// to link due to the missing intrinsic (symbol).
|
||||
|
||||
#![allow(unused_features)]
|
||||
#![allow(internal_features)]
|
||||
#![deny(dead_code)]
|
||||
#![feature(allocator_api)]
|
||||
#![feature(f128)]
|
||||
#![feature(f16)]
|
||||
#![feature(lang_items)]
|
||||
#![no_std]
|
||||
#![no_main]
|
||||
|
||||
extern crate panic_handler;
|
||||
|
||||
#[cfg(all(not(thumb), not(windows), not(target_arch = "wasm32")))]
|
||||
#[link(name = "c")]
|
||||
extern "C" {}
|
||||
|
||||
// Every function in this module maps will be lowered to an intrinsic by LLVM, if the platform
|
||||
// doesn't have native support for the operation used in the function. ARM has a naming convention
|
||||
// convention for its intrinsics that's different from other architectures; that's why some function
|
||||
// have an additional comment: the function name is the ARM name for the intrinsic and the comment
|
||||
// in the non-ARM name for the intrinsic.
|
||||
mod intrinsics {
|
||||
/* f16 operations */
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
pub fn extendhfsf(x: f16) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
pub fn extendhfdf(x: f16) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
f16_enabled,
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
pub fn extendhftf(x: f16) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
/* f32 operations */
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
pub fn truncsfhf(x: f32) -> f16 {
|
||||
x as f16
|
||||
}
|
||||
|
||||
// extendsfdf2
|
||||
pub fn aeabi_f2d(x: f32) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn extendsftf(x: f32) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
// fixsfsi
|
||||
pub fn aeabi_f2iz(x: f32) -> i32 {
|
||||
x as i32
|
||||
}
|
||||
|
||||
// fixsfdi
|
||||
pub fn aeabi_f2lz(x: f32) -> i64 {
|
||||
x as i64
|
||||
}
|
||||
|
||||
pub fn fixsfti(x: f32) -> i128 {
|
||||
x as i128
|
||||
}
|
||||
|
||||
// fixunssfsi
|
||||
pub fn aeabi_f2uiz(x: f32) -> u32 {
|
||||
x as u32
|
||||
}
|
||||
|
||||
// fixunssfdi
|
||||
pub fn aeabi_f2ulz(x: f32) -> u64 {
|
||||
x as u64
|
||||
}
|
||||
|
||||
pub fn fixunssfti(x: f32) -> u128 {
|
||||
x as u128
|
||||
}
|
||||
|
||||
// addsf3
|
||||
pub fn aeabi_fadd(a: f32, b: f32) -> f32 {
|
||||
a + b
|
||||
}
|
||||
|
||||
// eqsf2
|
||||
pub fn aeabi_fcmpeq(a: f32, b: f32) -> bool {
|
||||
a == b
|
||||
}
|
||||
|
||||
// gtsf2
|
||||
pub fn aeabi_fcmpgt(a: f32, b: f32) -> bool {
|
||||
a > b
|
||||
}
|
||||
|
||||
// ltsf2
|
||||
pub fn aeabi_fcmplt(a: f32, b: f32) -> bool {
|
||||
a < b
|
||||
}
|
||||
|
||||
// divsf3
|
||||
pub fn aeabi_fdiv(a: f32, b: f32) -> f32 {
|
||||
a / b
|
||||
}
|
||||
|
||||
// mulsf3
|
||||
pub fn aeabi_fmul(a: f32, b: f32) -> f32 {
|
||||
a * b
|
||||
}
|
||||
|
||||
// subsf3
|
||||
pub fn aeabi_fsub(a: f32, b: f32) -> f32 {
|
||||
a - b
|
||||
}
|
||||
|
||||
/* f64 operations */
|
||||
|
||||
// truncdfsf2
|
||||
pub fn aeabi_d2f(x: f64) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
// fixdfsi
|
||||
pub fn aeabi_d2i(x: f64) -> i32 {
|
||||
x as i32
|
||||
}
|
||||
|
||||
// fixdfdi
|
||||
pub fn aeabi_d2l(x: f64) -> i64 {
|
||||
x as i64
|
||||
}
|
||||
|
||||
pub fn fixdfti(x: f64) -> i128 {
|
||||
x as i128
|
||||
}
|
||||
|
||||
// fixunsdfsi
|
||||
pub fn aeabi_d2uiz(x: f64) -> u32 {
|
||||
x as u32
|
||||
}
|
||||
|
||||
// fixunsdfdi
|
||||
pub fn aeabi_d2ulz(x: f64) -> u64 {
|
||||
x as u64
|
||||
}
|
||||
|
||||
pub fn fixunsdfti(x: f64) -> u128 {
|
||||
x as u128
|
||||
}
|
||||
|
||||
// adddf3
|
||||
pub fn aeabi_dadd(a: f64, b: f64) -> f64 {
|
||||
a + b
|
||||
}
|
||||
|
||||
// eqdf2
|
||||
pub fn aeabi_dcmpeq(a: f64, b: f64) -> bool {
|
||||
a == b
|
||||
}
|
||||
|
||||
// gtdf2
|
||||
pub fn aeabi_dcmpgt(a: f64, b: f64) -> bool {
|
||||
a > b
|
||||
}
|
||||
|
||||
// ltdf2
|
||||
pub fn aeabi_dcmplt(a: f64, b: f64) -> bool {
|
||||
a < b
|
||||
}
|
||||
|
||||
// divdf3
|
||||
pub fn aeabi_ddiv(a: f64, b: f64) -> f64 {
|
||||
a / b
|
||||
}
|
||||
|
||||
// muldf3
|
||||
pub fn aeabi_dmul(a: f64, b: f64) -> f64 {
|
||||
a * b
|
||||
}
|
||||
|
||||
// subdf3
|
||||
pub fn aeabi_dsub(a: f64, b: f64) -> f64 {
|
||||
a - b
|
||||
}
|
||||
|
||||
/* f128 operations */
|
||||
|
||||
#[cfg(all(
|
||||
f16_enabled,
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
pub fn trunctfhf(x: f128) -> f16 {
|
||||
x as f16
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn trunctfsf(x: f128) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn trunctfdf(x: f128) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
pub fn fixtfsi(x: f128) -> i32 {
|
||||
x as i32
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
pub fn fixtfdi(x: f128) -> i64 {
|
||||
x as i64
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
pub fn fixtfti(x: f128) -> i128 {
|
||||
x as i128
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
pub fn fixunstfsi(x: f128) -> u32 {
|
||||
x as u32
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
pub fn fixunstfdi(x: f128) -> u64 {
|
||||
x as u64
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
pub fn fixunstfti(x: f128) -> u128 {
|
||||
x as u128
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn addtf(a: f128, b: f128) -> f128 {
|
||||
a + b
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn eqtf(a: f128, b: f128) -> bool {
|
||||
a == b
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn gttf(a: f128, b: f128) -> bool {
|
||||
a > b
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn lttf(a: f128, b: f128) -> bool {
|
||||
a < b
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn multf(a: f128, b: f128) -> f128 {
|
||||
a * b
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn divtf(a: f128, b: f128) -> f128 {
|
||||
a / b
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn subtf(a: f128, b: f128) -> f128 {
|
||||
a - b
|
||||
}
|
||||
|
||||
/* i32 operations */
|
||||
|
||||
// floatsisf
|
||||
pub fn aeabi_i2f(x: i32) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
// floatsidf
|
||||
pub fn aeabi_i2d(x: i32) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn floatsitf(x: i32) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn aeabi_idiv(a: i32, b: i32) -> i32 {
|
||||
a.wrapping_div(b)
|
||||
}
|
||||
|
||||
pub fn aeabi_idivmod(a: i32, b: i32) -> i32 {
|
||||
a % b
|
||||
}
|
||||
|
||||
/* i64 operations */
|
||||
|
||||
// floatdisf
|
||||
pub fn aeabi_l2f(x: i64) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
// floatdidf
|
||||
pub fn aeabi_l2d(x: i64) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn floatditf(x: i64) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn mulodi4(a: i64, b: i64) -> i64 {
|
||||
a * b
|
||||
}
|
||||
|
||||
// divdi3
|
||||
pub fn aeabi_ldivmod(a: i64, b: i64) -> i64 {
|
||||
a / b
|
||||
}
|
||||
|
||||
pub fn moddi3(a: i64, b: i64) -> i64 {
|
||||
a % b
|
||||
}
|
||||
|
||||
// muldi3
|
||||
pub fn aeabi_lmul(a: i64, b: i64) -> i64 {
|
||||
a.wrapping_mul(b)
|
||||
}
|
||||
|
||||
/* i128 operations */
|
||||
|
||||
pub fn floattisf(x: i128) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
pub fn floattidf(x: i128) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn floattitf(x: i128) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn lshrti3(a: i128, b: usize) -> i128 {
|
||||
a >> b
|
||||
}
|
||||
|
||||
pub fn divti3(a: i128, b: i128) -> i128 {
|
||||
a / b
|
||||
}
|
||||
|
||||
pub fn modti3(a: i128, b: i128) -> i128 {
|
||||
a % b
|
||||
}
|
||||
|
||||
/* u32 operations */
|
||||
|
||||
// floatunsisf
|
||||
pub fn aeabi_ui2f(x: u32) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
// floatunsidf
|
||||
pub fn aeabi_ui2d(x: u32) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn floatunsitf(x: u32) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn aeabi_uidiv(a: u32, b: u32) -> u32 {
|
||||
a / b
|
||||
}
|
||||
|
||||
pub fn aeabi_uidivmod(a: u32, b: u32) -> u32 {
|
||||
a % b
|
||||
}
|
||||
|
||||
/* u64 operations */
|
||||
|
||||
// floatundisf
|
||||
pub fn aeabi_ul2f(x: u64) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
// floatundidf
|
||||
pub fn aeabi_ul2d(x: u64) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn floatunditf(x: u64) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
// udivdi3
|
||||
pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 {
|
||||
a * b
|
||||
}
|
||||
|
||||
pub fn umoddi3(a: u64, b: u64) -> u64 {
|
||||
a % b
|
||||
}
|
||||
|
||||
/* u128 operations */
|
||||
|
||||
pub fn floatuntisf(x: u128) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
pub fn floatuntidf(x: u128) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn floatuntitf(x: u128) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn muloti4(a: u128, b: u128) -> Option<u128> {
|
||||
a.checked_mul(b)
|
||||
}
|
||||
|
||||
pub fn multi3(a: u128, b: u128) -> u128 {
|
||||
a.wrapping_mul(b)
|
||||
}
|
||||
|
||||
pub fn ashlti3(a: u128, b: usize) -> u128 {
|
||||
a >> b
|
||||
}
|
||||
|
||||
pub fn ashrti3(a: u128, b: usize) -> u128 {
|
||||
a << b
|
||||
}
|
||||
|
||||
pub fn udivti3(a: u128, b: u128) -> u128 {
|
||||
a / b
|
||||
}
|
||||
|
||||
pub fn umodti3(a: u128, b: u128) -> u128 {
|
||||
a % b
|
||||
}
|
||||
}
|
||||
|
||||
fn run() {
|
||||
use core::hint::black_box as bb;
|
||||
|
||||
use intrinsics::*;
|
||||
|
||||
// FIXME(f16_f128): some PPC f128 <-> int conversion functions have the wrong names
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
bb(addtf(bb(2.), bb(2.)));
|
||||
bb(aeabi_d2f(bb(2.)));
|
||||
bb(aeabi_d2i(bb(2.)));
|
||||
bb(aeabi_d2l(bb(2.)));
|
||||
bb(aeabi_d2uiz(bb(2.)));
|
||||
bb(aeabi_d2ulz(bb(2.)));
|
||||
bb(aeabi_dadd(bb(2.), bb(3.)));
|
||||
bb(aeabi_dcmpeq(bb(2.), bb(3.)));
|
||||
bb(aeabi_dcmpgt(bb(2.), bb(3.)));
|
||||
bb(aeabi_dcmplt(bb(2.), bb(3.)));
|
||||
bb(aeabi_ddiv(bb(2.), bb(3.)));
|
||||
bb(aeabi_dmul(bb(2.), bb(3.)));
|
||||
bb(aeabi_dsub(bb(2.), bb(3.)));
|
||||
bb(aeabi_f2d(bb(2.)));
|
||||
bb(aeabi_f2iz(bb(2.)));
|
||||
bb(aeabi_f2lz(bb(2.)));
|
||||
bb(aeabi_f2uiz(bb(2.)));
|
||||
bb(aeabi_f2ulz(bb(2.)));
|
||||
bb(aeabi_fadd(bb(2.), bb(3.)));
|
||||
bb(aeabi_fcmpeq(bb(2.), bb(3.)));
|
||||
bb(aeabi_fcmpgt(bb(2.), bb(3.)));
|
||||
bb(aeabi_fcmplt(bb(2.), bb(3.)));
|
||||
bb(aeabi_fdiv(bb(2.), bb(3.)));
|
||||
bb(aeabi_fmul(bb(2.), bb(3.)));
|
||||
bb(aeabi_fsub(bb(2.), bb(3.)));
|
||||
bb(aeabi_i2d(bb(2)));
|
||||
bb(aeabi_i2f(bb(2)));
|
||||
bb(aeabi_idiv(bb(2), bb(3)));
|
||||
bb(aeabi_idivmod(bb(2), bb(3)));
|
||||
bb(aeabi_l2d(bb(2)));
|
||||
bb(aeabi_l2f(bb(2)));
|
||||
bb(aeabi_ldivmod(bb(2), bb(3)));
|
||||
bb(aeabi_lmul(bb(2), bb(3)));
|
||||
bb(aeabi_ui2d(bb(2)));
|
||||
bb(aeabi_ui2f(bb(2)));
|
||||
bb(aeabi_uidiv(bb(2), bb(3)));
|
||||
bb(aeabi_uidivmod(bb(2), bb(3)));
|
||||
bb(aeabi_ul2d(bb(2)));
|
||||
bb(aeabi_ul2f(bb(2)));
|
||||
bb(aeabi_uldivmod(bb(2), bb(3)));
|
||||
bb(ashlti3(bb(2), bb(2)));
|
||||
bb(ashrti3(bb(2), bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(divtf(bb(2.), bb(2.)));
|
||||
bb(divti3(bb(2), bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(eqtf(bb(2.), bb(2.)));
|
||||
#[cfg(f16_enabled)]
|
||||
bb(extendhfdf(bb(2.)));
|
||||
#[cfg(f16_enabled)]
|
||||
bb(extendhfsf(bb(2.)));
|
||||
#[cfg(all(
|
||||
f16_enabled,
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
bb(extendhftf(bb(2.)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(extendsftf(bb(2.)));
|
||||
bb(fixdfti(bb(2.)));
|
||||
bb(fixsfti(bb(2.)));
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
bb(fixtfdi(bb(2.)));
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
bb(fixtfsi(bb(2.)));
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
bb(fixtfti(bb(2.)));
|
||||
bb(fixunsdfti(bb(2.)));
|
||||
bb(fixunssfti(bb(2.)));
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
bb(fixunstfdi(bb(2.)));
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
bb(fixunstfsi(bb(2.)));
|
||||
#[cfg(all(
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
bb(fixunstfti(bb(2.)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(floatditf(bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(floatsitf(bb(2)));
|
||||
bb(floattidf(bb(2)));
|
||||
bb(floattisf(bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(floattitf(bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(floatunditf(bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(floatunsitf(bb(2)));
|
||||
bb(floatuntidf(bb(2)));
|
||||
bb(floatuntisf(bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(floatuntitf(bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(gttf(bb(2.), bb(2.)));
|
||||
bb(lshrti3(bb(2), bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(lttf(bb(2.), bb(2.)));
|
||||
bb(moddi3(bb(2), bb(3)));
|
||||
bb(modti3(bb(2), bb(2)));
|
||||
bb(mulodi4(bb(2), bb(3)));
|
||||
bb(muloti4(bb(2), bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(multf(bb(2.), bb(2.)));
|
||||
bb(multi3(bb(2), bb(2)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(subtf(bb(2.), bb(2.)));
|
||||
#[cfg(f16_enabled)]
|
||||
bb(truncsfhf(bb(2.)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(trunctfdf(bb(2.)));
|
||||
#[cfg(all(
|
||||
f16_enabled,
|
||||
f128_enabled,
|
||||
not(any(target_arch = "powerpc", target_arch = "powerpc64"))
|
||||
))]
|
||||
bb(trunctfhf(bb(2.)));
|
||||
#[cfg(f128_enabled)]
|
||||
bb(trunctfsf(bb(2.)));
|
||||
bb(udivti3(bb(2), bb(2)));
|
||||
bb(umoddi3(bb(2), bb(3)));
|
||||
bb(umodti3(bb(2), bb(2)));
|
||||
|
||||
something_with_a_dtor(&|| assert_eq!(bb(1), 1));
|
||||
|
||||
// FIXME(#802): This should be re-enabled once a workaround is found.
|
||||
// extern "C" {
|
||||
// fn rust_begin_unwind(x: usize);
|
||||
// }
|
||||
|
||||
// unsafe {
|
||||
// rust_begin_unwind(0);
|
||||
// }
|
||||
}
|
||||
|
||||
fn something_with_a_dtor(f: &dyn Fn()) {
|
||||
struct A<'a>(&'a (dyn Fn() + 'a));
|
||||
|
||||
impl Drop for A<'_> {
|
||||
fn drop(&mut self) {
|
||||
(self.0)();
|
||||
}
|
||||
}
|
||||
let _a = A(f);
|
||||
f();
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
#[cfg(not(thumb))]
|
||||
fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int {
|
||||
run();
|
||||
0
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
#[cfg(thumb)]
|
||||
pub fn _start() -> ! {
|
||||
run();
|
||||
loop {}
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
#[link(name = "kernel32")]
|
||||
#[link(name = "msvcrt")]
|
||||
extern "C" {}
|
||||
|
||||
// ARM targets need these symbols
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn __aeabi_unwind_cpp_pr0() {}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn __aeabi_unwind_cpp_pr1() {}
|
||||
|
||||
#[cfg(not(any(windows, target_os = "cygwin")))]
|
||||
#[allow(non_snake_case)]
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn _Unwind_Resume() {}
|
||||
|
||||
#[cfg(not(any(windows, target_os = "cygwin")))]
|
||||
#[lang = "eh_personality"]
|
||||
pub extern "C" fn eh_personality() {}
|
||||
|
||||
#[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin"))]
|
||||
mod mingw_unwinding {
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn rust_eh_personality() {}
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn rust_eh_unwind_resume() {}
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn rust_eh_register_frames() {}
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn rust_eh_unregister_frames() {}
|
||||
}
|
||||
99
library/compiler-builtins/builtins-test/Cargo.toml
Normal file
99
library/compiler-builtins/builtins-test/Cargo.toml
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
[package]
|
||||
name = "builtins-test"
|
||||
version = "0.1.0"
|
||||
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
||||
edition = "2024"
|
||||
publish = false
|
||||
license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
|
||||
|
||||
[dependencies]
|
||||
# For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential
|
||||
# problems with system RNGs on the variety of platforms this crate is tested on.
|
||||
# `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts.
|
||||
rand_xoshiro = "0.6"
|
||||
# To compare float builtins against
|
||||
rustc_apfloat = "0.2.1"
|
||||
# Really a dev dependency, but dev dependencies can't be optional
|
||||
iai-callgrind = { version = "0.14.0", optional = true }
|
||||
|
||||
[dependencies.compiler_builtins]
|
||||
path = "../compiler-builtins"
|
||||
default-features = false
|
||||
features = ["unstable-public-internals"]
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
|
||||
paste = "1.0.15"
|
||||
|
||||
[target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies]
|
||||
test = { git = "https://github.com/japaric/utest" }
|
||||
utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" }
|
||||
utest-macros = { git = "https://github.com/japaric/utest" }
|
||||
|
||||
[features]
|
||||
default = ["mangled-names"]
|
||||
c = ["compiler_builtins/c"]
|
||||
no-asm = ["compiler_builtins/no-asm"]
|
||||
no-f16-f128 = ["compiler_builtins/no-f16-f128"]
|
||||
mem = ["compiler_builtins/mem"]
|
||||
mangled-names = ["compiler_builtins/mangled-names"]
|
||||
# Skip tests that rely on f128 symbols being available on the system
|
||||
no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
|
||||
# Some platforms have some f128 functions but everything except integer conversions
|
||||
no-sys-f128-int-convert = []
|
||||
no-sys-f16-f128-convert = []
|
||||
no-sys-f16-f64-convert = []
|
||||
# Skip tests that rely on f16 symbols being available on the system
|
||||
no-sys-f16 = ["no-sys-f16-f64-convert"]
|
||||
|
||||
# Enable icount benchmarks (requires iai-callgrind and valgrind)
|
||||
icount = ["dep:iai-callgrind"]
|
||||
|
||||
# Enable report generation without bringing in more dependencies by default
|
||||
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
|
||||
|
||||
# NOTE: benchmarks must be run with `--no-default-features` or with
|
||||
# `-p builtins-test`, otherwise the default `compiler-builtins` feature
|
||||
# of the `compiler_builtins` crate gets activated, resulting in linker
|
||||
# errors.
|
||||
|
||||
[[bench]]
|
||||
name = "float_add"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_sub"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_mul"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_div"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_cmp"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_conv"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_extend"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_trunc"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_pow"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "mem_icount"
|
||||
harness = false
|
||||
required-features = ["icount"]
|
||||
93
library/compiler-builtins/builtins-test/benches/float_add.rs
Normal file
93
library/compiler-builtins/builtins-test/benches/float_add.rs
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::float_bench;
|
||||
use compiler_builtins::float::add;
|
||||
use criterion::{Criterion, criterion_main};
|
||||
|
||||
float_bench! {
|
||||
name: add_f32,
|
||||
sig: (a: f32, b: f32) -> f32,
|
||||
crate_fn: add::__addsf3,
|
||||
sys_fn: __addsf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"addss {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fadd {a:s}, {a:s}, {b:s}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: add_f64,
|
||||
sig: (a: f64, b: f64) -> f64,
|
||||
crate_fn: add::__adddf3,
|
||||
sys_fn: __adddf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"addsd {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fadd {a:d}, {a:d}, {b:d}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: add_f128,
|
||||
sig: (a: f128, b: f128) -> f128,
|
||||
crate_fn: add::__addtf3,
|
||||
crate_fn_ppc: add::__addkf3,
|
||||
sys_fn: __addtf3,
|
||||
sys_fn_ppc: __addkf3,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
pub fn float_add() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
add_f32(&mut criterion);
|
||||
add_f64(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
{
|
||||
add_f128(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(float_add);
|
||||
207
library/compiler-builtins/builtins-test/benches/float_cmp.rs
Normal file
207
library/compiler-builtins/builtins-test/benches/float_cmp.rs
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::float_bench;
|
||||
use compiler_builtins::float::cmp;
|
||||
use criterion::{Criterion, criterion_main};
|
||||
|
||||
/// `gt` symbols are allowed to return differing results, they just get compared
|
||||
/// to 0.
|
||||
fn gt_res_eq(a: i32, b: i32) -> bool {
|
||||
let a_lt_0 = a <= 0;
|
||||
let b_lt_0 = b <= 0;
|
||||
(a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0)
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f32_gt,
|
||||
sig: (a: f32, b: f32) -> i32,
|
||||
crate_fn: cmp::__gtsf2,
|
||||
sys_fn: __gtsf2,
|
||||
sys_available: all(),
|
||||
output_eq: gt_res_eq,
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"xor {ret:e}, {ret:e}",
|
||||
"ucomiss {a}, {b}",
|
||||
"seta {ret:l}",
|
||||
a = in(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcmp {a:s}, {b:s}",
|
||||
"cset {ret:w}, gt",
|
||||
a = in(vreg) a,
|
||||
b = in(vreg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem,nostack),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f32_unord,
|
||||
sig: (a: f32, b: f32) -> i32,
|
||||
crate_fn: cmp::__unordsf2,
|
||||
sys_fn: __unordsf2,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"xor {ret:e}, {ret:e}",
|
||||
"ucomiss {a}, {b}",
|
||||
"setp {ret:l}",
|
||||
a = in(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcmp {a:s}, {b:s}",
|
||||
"cset {ret:w}, vs",
|
||||
a = in(vreg) a,
|
||||
b = in(vreg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f64_gt,
|
||||
sig: (a: f64, b: f64) -> i32,
|
||||
crate_fn: cmp::__gtdf2,
|
||||
sys_fn: __gtdf2,
|
||||
sys_available: all(),
|
||||
output_eq: gt_res_eq,
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"xor {ret:e}, {ret:e}",
|
||||
"ucomisd {a}, {b}",
|
||||
"seta {ret:l}",
|
||||
a = in(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcmp {a:d}, {b:d}",
|
||||
"cset {ret:w}, gt",
|
||||
a = in(vreg) a,
|
||||
b = in(vreg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f64_unord,
|
||||
sig: (a: f64, b: f64) -> i32,
|
||||
crate_fn: cmp::__unorddf2,
|
||||
sys_fn: __unorddf2,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"xor {ret:e}, {ret:e}",
|
||||
"ucomisd {a}, {b}",
|
||||
"setp {ret:l}",
|
||||
a = in(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcmp {a:d}, {b:d}",
|
||||
"cset {ret:w}, vs",
|
||||
a = in(vreg) a,
|
||||
b = in(vreg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f128_gt,
|
||||
sig: (a: f128, b: f128) -> i32,
|
||||
crate_fn: cmp::__gttf2,
|
||||
crate_fn_ppc: cmp::__gtkf2,
|
||||
sys_fn: __gttf2,
|
||||
sys_fn_ppc: __gtkf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
output_eq: gt_res_eq,
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f128_unord,
|
||||
sig: (a: f128, b: f128) -> i32,
|
||||
crate_fn: cmp::__unordtf2,
|
||||
crate_fn_ppc: cmp::__unordkf2,
|
||||
sys_fn: __unordtf2,
|
||||
sys_fn_ppc: __unordkf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
pub fn float_cmp() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
cmp_f32_gt(&mut criterion);
|
||||
cmp_f32_unord(&mut criterion);
|
||||
cmp_f64_gt(&mut criterion);
|
||||
cmp_f64_unord(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
{
|
||||
cmp_f128_gt(&mut criterion);
|
||||
cmp_f128_unord(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(float_cmp);
|
||||
688
library/compiler-builtins/builtins-test/benches/float_conv.rs
Normal file
688
library/compiler-builtins/builtins-test/benches/float_conv.rs
Normal file
|
|
@ -0,0 +1,688 @@
|
|||
#![allow(improper_ctypes)]
|
||||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::float_bench;
|
||||
use compiler_builtins::float::conv;
|
||||
use criterion::{Criterion, criterion_main};
|
||||
|
||||
/* unsigned int -> float */
|
||||
|
||||
float_bench! {
|
||||
name: conv_u32_f32,
|
||||
sig: (a: u32) -> f32,
|
||||
crate_fn: conv::__floatunsisf,
|
||||
sys_fn: __floatunsisf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"mov {tmp:e}, {a:e}",
|
||||
"cvtsi2ss {ret}, {tmp}",
|
||||
a = in(reg) a,
|
||||
tmp = out(reg) _,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"ucvtf {ret:s}, {a:w}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u32_f64,
|
||||
sig: (a: u32) -> f64,
|
||||
crate_fn: conv::__floatunsidf,
|
||||
sys_fn: __floatunsidf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"mov {tmp:e}, {a:e}",
|
||||
"cvtsi2sd {ret}, {tmp}",
|
||||
a = in(reg) a,
|
||||
tmp = out(reg) _,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"ucvtf {ret:d}, {a:w}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_u32_f128,
|
||||
sig: (a: u32) -> f128,
|
||||
crate_fn: conv::__floatunsitf,
|
||||
crate_fn_ppc: conv::__floatunsikf,
|
||||
sys_fn: __floatunsitf,
|
||||
sys_fn_ppc: __floatunsikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u64_f32,
|
||||
sig: (a: u64) -> f32,
|
||||
crate_fn: conv::__floatundisf,
|
||||
sys_fn: __floatundisf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"ucvtf {ret:s}, {a:x}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u64_f64,
|
||||
sig: (a: u64) -> f64,
|
||||
crate_fn: conv::__floatundidf,
|
||||
sys_fn: __floatundidf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"ucvtf {ret:d}, {a:x}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_u64_f128,
|
||||
sig: (a: u64) -> f128,
|
||||
crate_fn: conv::__floatunditf,
|
||||
crate_fn_ppc: conv::__floatundikf,
|
||||
sys_fn: __floatunditf,
|
||||
sys_fn_ppc: __floatundikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u128_f32,
|
||||
sig: (a: u128) -> f32,
|
||||
crate_fn: conv::__floatuntisf,
|
||||
sys_fn: __floatuntisf,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u128_f64,
|
||||
sig: (a: u128) -> f64,
|
||||
crate_fn: conv::__floatuntidf,
|
||||
sys_fn: __floatuntidf,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_u128_f128,
|
||||
sig: (a: u128) -> f128,
|
||||
crate_fn: conv::__floatuntitf,
|
||||
crate_fn_ppc: conv::__floatuntikf,
|
||||
sys_fn: __floatuntitf,
|
||||
sys_fn_ppc: __floatuntikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
/* signed int -> float */
|
||||
|
||||
float_bench! {
|
||||
name: conv_i32_f32,
|
||||
sig: (a: i32) -> f32,
|
||||
crate_fn: conv::__floatsisf,
|
||||
sys_fn: __floatsisf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"cvtsi2ss {ret}, {a:e}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"scvtf {ret:s}, {a:w}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i32_f64,
|
||||
sig: (a: i32) -> f64,
|
||||
crate_fn: conv::__floatsidf,
|
||||
sys_fn: __floatsidf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"cvtsi2sd {ret}, {a:e}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"scvtf {ret:d}, {a:w}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_i32_f128,
|
||||
sig: (a: i32) -> f128,
|
||||
crate_fn: conv::__floatsitf,
|
||||
crate_fn_ppc: conv::__floatsikf,
|
||||
sys_fn: __floatsitf,
|
||||
sys_fn_ppc: __floatsikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i64_f32,
|
||||
sig: (a: i64) -> f32,
|
||||
crate_fn: conv::__floatdisf,
|
||||
sys_fn: __floatdisf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"cvtsi2ss {ret}, {a:r}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"scvtf {ret:s}, {a:x}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i64_f64,
|
||||
sig: (a: i64) -> f64,
|
||||
crate_fn: conv::__floatdidf,
|
||||
sys_fn: __floatdidf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"cvtsi2sd {ret}, {a:r}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"scvtf {ret:d}, {a:x}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_i64_f128,
|
||||
sig: (a: i64) -> f128,
|
||||
crate_fn: conv::__floatditf,
|
||||
crate_fn_ppc: conv::__floatdikf,
|
||||
sys_fn: __floatditf,
|
||||
sys_fn_ppc: __floatdikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i128_f32,
|
||||
sig: (a: i128) -> f32,
|
||||
crate_fn: conv::__floattisf,
|
||||
sys_fn: __floattisf,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i128_f64,
|
||||
sig: (a: i128) -> f64,
|
||||
crate_fn: conv::__floattidf,
|
||||
sys_fn: __floattidf,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_i128_f128,
|
||||
sig: (a: i128) -> f128,
|
||||
crate_fn: conv::__floattitf,
|
||||
crate_fn_ppc: conv::__floattikf,
|
||||
sys_fn: __floattitf,
|
||||
sys_fn_ppc: __floattikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
/* float -> unsigned int */
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_u32,
|
||||
sig: (a: f32) -> u32,
|
||||
crate_fn: conv::__fixunssfsi,
|
||||
sys_fn: __fixunssfsi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: u32;
|
||||
asm!(
|
||||
"fcvtzu {ret:w}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_u64,
|
||||
sig: (a: f32) -> u64,
|
||||
crate_fn: conv::__fixunssfdi,
|
||||
sys_fn: __fixunssfdi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: u64;
|
||||
asm!(
|
||||
"fcvtzu {ret:x}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_u128,
|
||||
sig: (a: f32) -> u128,
|
||||
crate_fn: conv::__fixunssfti,
|
||||
sys_fn: __fixunssfti,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_u32,
|
||||
sig: (a: f64) -> u32,
|
||||
crate_fn: conv::__fixunsdfsi,
|
||||
sys_fn: __fixunsdfsi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: u32;
|
||||
asm!(
|
||||
"fcvtzu {ret:w}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_u64,
|
||||
sig: (a: f64) -> u64,
|
||||
crate_fn: conv::__fixunsdfdi,
|
||||
sys_fn: __fixunsdfdi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: u64;
|
||||
asm!(
|
||||
"fcvtzu {ret:x}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_u128,
|
||||
sig: (a: f64) -> u128,
|
||||
crate_fn: conv::__fixunsdfti,
|
||||
sys_fn: __fixunsdfti,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_u32,
|
||||
sig: (a: f128) -> u32,
|
||||
crate_fn: conv::__fixunstfsi,
|
||||
crate_fn_ppc: conv::__fixunskfsi,
|
||||
sys_fn: __fixunstfsi,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_u64,
|
||||
sig: (a: f128) -> u64,
|
||||
crate_fn: conv::__fixunstfdi,
|
||||
crate_fn_ppc: conv::__fixunskfdi,
|
||||
sys_fn: __fixunstfdi,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_u128,
|
||||
sig: (a: f128) -> u128,
|
||||
crate_fn: conv::__fixunstfti,
|
||||
crate_fn_ppc: conv::__fixunskfti,
|
||||
sys_fn: __fixunstfti,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
/* float -> signed int */
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_i32,
|
||||
sig: (a: f32) -> i32,
|
||||
crate_fn: conv::__fixsfsi,
|
||||
sys_fn: __fixsfsi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcvtzs {ret:w}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_i64,
|
||||
sig: (a: f32) -> i64,
|
||||
crate_fn: conv::__fixsfdi,
|
||||
sys_fn: __fixsfdi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i64;
|
||||
asm!(
|
||||
"fcvtzs {ret:x}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_i128,
|
||||
sig: (a: f32) -> i128,
|
||||
crate_fn: conv::__fixsfti,
|
||||
sys_fn: __fixsfti,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_i32,
|
||||
sig: (a: f64) -> i32,
|
||||
crate_fn: conv::__fixdfsi,
|
||||
sys_fn: __fixdfsi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcvtzs {ret:w}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_i64,
|
||||
sig: (a: f64) -> i64,
|
||||
crate_fn: conv::__fixdfdi,
|
||||
sys_fn: __fixdfdi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i64;
|
||||
asm!(
|
||||
"fcvtzs {ret:x}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_i128,
|
||||
sig: (a: f64) -> i128,
|
||||
crate_fn: conv::__fixdfti,
|
||||
sys_fn: __fixdfti,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_i32,
|
||||
sig: (a: f128) -> i32,
|
||||
crate_fn: conv::__fixtfsi,
|
||||
crate_fn_ppc: conv::__fixkfsi,
|
||||
sys_fn: __fixtfsi,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_i64,
|
||||
sig: (a: f128) -> i64,
|
||||
crate_fn: conv::__fixtfdi,
|
||||
crate_fn_ppc: conv::__fixkfdi,
|
||||
sys_fn: __fixtfdi,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_i128,
|
||||
sig: (a: f128) -> i128,
|
||||
crate_fn: conv::__fixtfti,
|
||||
crate_fn_ppc: conv::__fixkfti,
|
||||
sys_fn: __fixtfti,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
pub fn float_conv() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
conv_u32_f32(&mut criterion);
|
||||
conv_u32_f64(&mut criterion);
|
||||
conv_u64_f32(&mut criterion);
|
||||
conv_u64_f64(&mut criterion);
|
||||
conv_u128_f32(&mut criterion);
|
||||
conv_u128_f64(&mut criterion);
|
||||
conv_i32_f32(&mut criterion);
|
||||
conv_i32_f64(&mut criterion);
|
||||
conv_i64_f32(&mut criterion);
|
||||
conv_i64_f64(&mut criterion);
|
||||
conv_i128_f32(&mut criterion);
|
||||
conv_i128_f64(&mut criterion);
|
||||
conv_f64_u32(&mut criterion);
|
||||
conv_f64_u64(&mut criterion);
|
||||
conv_f64_u128(&mut criterion);
|
||||
conv_f64_i32(&mut criterion);
|
||||
conv_f64_i64(&mut criterion);
|
||||
conv_f64_i128(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
// FIXME: ppc64le has a sporadic overflow panic in the crate functions
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
{
|
||||
conv_u32_f128(&mut criterion);
|
||||
conv_u64_f128(&mut criterion);
|
||||
conv_u128_f128(&mut criterion);
|
||||
conv_i32_f128(&mut criterion);
|
||||
conv_i64_f128(&mut criterion);
|
||||
conv_i128_f128(&mut criterion);
|
||||
conv_f128_u32(&mut criterion);
|
||||
conv_f128_u64(&mut criterion);
|
||||
conv_f128_u128(&mut criterion);
|
||||
conv_f128_i32(&mut criterion);
|
||||
conv_f128_i64(&mut criterion);
|
||||
conv_f128_i128(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(float_conv);
|
||||
93
library/compiler-builtins/builtins-test/benches/float_div.rs
Normal file
93
library/compiler-builtins/builtins-test/benches/float_div.rs
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::float_bench;
|
||||
use compiler_builtins::float::div;
|
||||
use criterion::{Criterion, criterion_main};
|
||||
|
||||
float_bench! {
|
||||
name: div_f32,
|
||||
sig: (a: f32, b: f32) -> f32,
|
||||
crate_fn: div::__divsf3,
|
||||
sys_fn: __divsf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"divss {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fdiv {a:s}, {a:s}, {b:s}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: div_f64,
|
||||
sig: (a: f64, b: f64) -> f64,
|
||||
crate_fn: div::__divdf3,
|
||||
sys_fn: __divdf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"divsd {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fdiv {a:d}, {a:d}, {b:d}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: div_f128,
|
||||
sig: (a: f128, b: f128) -> f128,
|
||||
crate_fn: div::__divtf3,
|
||||
crate_fn_ppc: div::__divkf3,
|
||||
sys_fn: __divtf3,
|
||||
sys_fn_ppc: __divkf3,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
pub fn float_div() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
div_f32(&mut criterion);
|
||||
div_f64(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
{
|
||||
div_f128(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(float_div);
|
||||
133
library/compiler-builtins/builtins-test/benches/float_extend.rs
Normal file
133
library/compiler-builtins/builtins-test/benches/float_extend.rs
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
#![allow(unused_variables)] // "unused" f16 registers
|
||||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
#![cfg_attr(f16_enabled, feature(f16))]
|
||||
|
||||
use builtins_test::float_bench;
|
||||
use compiler_builtins::float::extend;
|
||||
use criterion::{Criterion, criterion_main};
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
float_bench! {
|
||||
name: extend_f16_f32,
|
||||
sig: (a: f16) -> f32,
|
||||
crate_fn: extend::__extendhfsf2,
|
||||
sys_fn: __extendhfsf2,
|
||||
sys_available: not(feature = "no-sys-f16"),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"fcvt {ret:s}, {a:h}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
float_bench! {
|
||||
name: extend_f16_f64,
|
||||
sig: (a: f16) -> f64,
|
||||
crate_fn: extend::__extendhfdf2,
|
||||
sys_fn: __extendhfdf2,
|
||||
sys_available: not(feature = "no-sys-f16-f64-convert"),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"fcvt {ret:d}, {a:h}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(all(f16_enabled, f128_enabled))]
|
||||
float_bench! {
|
||||
name: extend_f16_f128,
|
||||
sig: (a: f16) -> f128,
|
||||
crate_fn: extend::__extendhftf2,
|
||||
crate_fn_ppc: extend::__extendhfkf2,
|
||||
sys_fn: __extendhftf2,
|
||||
sys_fn_ppc: __extendhfkf2,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: extend_f32_f64,
|
||||
sig: (a: f32) -> f64,
|
||||
crate_fn: extend::__extendsfdf2,
|
||||
sys_fn: __extendsfdf2,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"fcvt {ret:d}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: extend_f32_f128,
|
||||
sig: (a: f32) -> f128,
|
||||
crate_fn: extend::__extendsftf2,
|
||||
crate_fn_ppc: extend::__extendsfkf2,
|
||||
sys_fn: __extendsftf2,
|
||||
sys_fn_ppc: __extendsfkf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: extend_f64_f128,
|
||||
sig: (a: f64) -> f128,
|
||||
crate_fn: extend::__extenddftf2,
|
||||
crate_fn_ppc: extend::__extenddfkf2,
|
||||
sys_fn: __extenddftf2,
|
||||
sys_fn_ppc: __extenddfkf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
pub fn float_extend() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
|
||||
#[cfg(f16_enabled)]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
{
|
||||
extend_f16_f32(&mut criterion);
|
||||
extend_f16_f64(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
extend_f16_f128(&mut criterion);
|
||||
}
|
||||
|
||||
extend_f32_f64(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
{
|
||||
extend_f32_f128(&mut criterion);
|
||||
extend_f64_f128(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(float_extend);
|
||||
93
library/compiler-builtins/builtins-test/benches/float_mul.rs
Normal file
93
library/compiler-builtins/builtins-test/benches/float_mul.rs
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::float_bench;
|
||||
use compiler_builtins::float::mul;
|
||||
use criterion::{Criterion, criterion_main};
|
||||
|
||||
float_bench! {
|
||||
name: mul_f32,
|
||||
sig: (a: f32, b: f32) -> f32,
|
||||
crate_fn: mul::__mulsf3,
|
||||
sys_fn: __mulsf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"mulss {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fmul {a:s}, {a:s}, {b:s}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: mul_f64,
|
||||
sig: (a: f64, b: f64) -> f64,
|
||||
crate_fn: mul::__muldf3,
|
||||
sys_fn: __muldf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"mulsd {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fmul {a:d}, {a:d}, {b:d}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: mul_f128,
|
||||
sig: (a: f128, b: f128) -> f128,
|
||||
crate_fn: mul::__multf3,
|
||||
crate_fn_ppc: mul::__mulkf3,
|
||||
sys_fn: __multf3,
|
||||
sys_fn_ppc: __mulkf3,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
pub fn float_mul() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
mul_f32(&mut criterion);
|
||||
mul_f64(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
{
|
||||
mul_f128(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(float_mul);
|
||||
49
library/compiler-builtins/builtins-test/benches/float_pow.rs
Normal file
49
library/compiler-builtins/builtins-test/benches/float_pow.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::float_bench;
|
||||
use compiler_builtins::float::pow;
|
||||
use criterion::{Criterion, criterion_main};
|
||||
|
||||
float_bench! {
|
||||
name: powi_f32,
|
||||
sig: (a: f32, b: i32) -> f32,
|
||||
crate_fn: pow::__powisf2,
|
||||
sys_fn: __powisf2,
|
||||
sys_available: all(),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: powi_f64,
|
||||
sig: (a: f64, b: i32) -> f64,
|
||||
crate_fn: pow::__powidf2,
|
||||
sys_fn: __powidf2,
|
||||
sys_available: all(),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
// FIXME(f16_f128): can be changed to only `f128_enabled` once `__multf3` and `__divtf3` are
|
||||
// distributed by nightly.
|
||||
#[cfg(all(f128_enabled, not(feature = "no-sys-f128")))]
|
||||
float_bench! {
|
||||
name: powi_f128,
|
||||
sig: (a: f128, b: i32) -> f128,
|
||||
crate_fn: pow::__powitf2,
|
||||
crate_fn_ppc: pow::__powikf2,
|
||||
sys_fn: __powitf2,
|
||||
sys_fn_ppc: __powikf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
pub fn float_pow() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
powi_f32(&mut criterion);
|
||||
powi_f64(&mut criterion);
|
||||
|
||||
#[cfg(all(f128_enabled, not(feature = "no-sys-f128")))]
|
||||
powi_f128(&mut criterion);
|
||||
}
|
||||
|
||||
criterion_main!(float_pow);
|
||||
93
library/compiler-builtins/builtins-test/benches/float_sub.rs
Normal file
93
library/compiler-builtins/builtins-test/benches/float_sub.rs
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::float_bench;
|
||||
use compiler_builtins::float::sub;
|
||||
use criterion::{Criterion, criterion_main};
|
||||
|
||||
float_bench! {
|
||||
name: sub_f32,
|
||||
sig: (a: f32, b: f32) -> f32,
|
||||
crate_fn: sub::__subsf3,
|
||||
sys_fn: __subsf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"subss {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fsub {a:s}, {a:s}, {b:s}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: sub_f64,
|
||||
sig: (a: f64, b: f64) -> f64,
|
||||
crate_fn: sub::__subdf3,
|
||||
sys_fn: __subdf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"subsd {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fsub {a:d}, {a:d}, {b:d}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: sub_f128,
|
||||
sig: (a: f128, b: f128) -> f128,
|
||||
crate_fn: sub::__subtf3,
|
||||
crate_fn_ppc: sub::__subkf3,
|
||||
sys_fn: __subtf3,
|
||||
sys_fn_ppc: __subkf3,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
pub fn float_sub() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
sub_f32(&mut criterion);
|
||||
sub_f64(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
{
|
||||
sub_f128(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(float_sub);
|
||||
146
library/compiler-builtins/builtins-test/benches/float_trunc.rs
Normal file
146
library/compiler-builtins/builtins-test/benches/float_trunc.rs
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
#![cfg_attr(f16_enabled, feature(f16))]
|
||||
|
||||
use builtins_test::float_bench;
|
||||
use compiler_builtins::float::trunc;
|
||||
use criterion::{Criterion, criterion_main};
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
float_bench! {
|
||||
name: trunc_f32_f16,
|
||||
sig: (a: f32) -> f16,
|
||||
crate_fn: trunc::__truncsfhf2,
|
||||
sys_fn: __truncsfhf2,
|
||||
sys_available: not(feature = "no-sys-f16"),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f16;
|
||||
asm!(
|
||||
"fcvt {ret:h}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
float_bench! {
|
||||
name: trunc_f64_f16,
|
||||
sig: (a: f64) -> f16,
|
||||
crate_fn: trunc::__truncdfhf2,
|
||||
sys_fn: __truncdfhf2,
|
||||
sys_available: not(feature = "no-sys-f16-f64-convert"),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f16;
|
||||
asm!(
|
||||
"fcvt {ret:h}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: trunc_f64_f32,
|
||||
sig: (a: f64) -> f32,
|
||||
crate_fn: trunc::__truncdfsf2,
|
||||
sys_fn: __truncdfsf2,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"cvtsd2ss {ret}, {a}",
|
||||
a = in(xmm_reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"fcvt {ret:s}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(all(f16_enabled, f128_enabled))]
|
||||
float_bench! {
|
||||
name: trunc_f128_f16,
|
||||
sig: (a: f128) -> f16,
|
||||
crate_fn: trunc::__trunctfhf2,
|
||||
crate_fn_ppc: trunc::__trunckfhf2,
|
||||
sys_fn: __trunctfhf2,
|
||||
sys_fn_ppc: __trunckfhf2,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: trunc_f128_f32,
|
||||
sig: (a: f128) -> f32,
|
||||
crate_fn: trunc::__trunctfsf2,
|
||||
crate_fn_ppc: trunc::__trunckfsf2,
|
||||
sys_fn: __trunctfsf2,
|
||||
sys_fn_ppc: __trunckfsf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: trunc_f128_f64,
|
||||
sig: (a: f128) -> f64,
|
||||
crate_fn: trunc::__trunctfdf2,
|
||||
crate_fn_ppc: trunc::__trunckfdf2,
|
||||
sys_fn: __trunctfdf2,
|
||||
sys_fn_ppc: __trunckfdf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
pub fn float_trunc() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
|
||||
#[cfg(f16_enabled)]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
{
|
||||
trunc_f32_f16(&mut criterion);
|
||||
trunc_f64_f16(&mut criterion);
|
||||
}
|
||||
|
||||
trunc_f64_f32(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
{
|
||||
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
|
||||
#[cfg(f16_enabled)]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
trunc_f128_f16(&mut criterion);
|
||||
|
||||
trunc_f128_f32(&mut criterion);
|
||||
trunc_f128_f64(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(float_trunc);
|
||||
364
library/compiler-builtins/builtins-test/benches/mem.rs
Normal file
364
library/compiler-builtins/builtins-test/benches/mem.rs
Normal file
|
|
@ -0,0 +1,364 @@
|
|||
#![feature(test)]
|
||||
|
||||
extern crate test;
|
||||
use test::{Bencher, black_box};
|
||||
|
||||
extern crate compiler_builtins;
|
||||
use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
|
||||
|
||||
const WORD_SIZE: usize = core::mem::size_of::<usize>();
|
||||
|
||||
struct AlignedVec {
|
||||
vec: Vec<usize>,
|
||||
size: usize,
|
||||
}
|
||||
|
||||
impl AlignedVec {
|
||||
fn new(fill: u8, size: usize) -> Self {
|
||||
let mut broadcast = fill as usize;
|
||||
let mut bits = 8;
|
||||
while bits < WORD_SIZE * 8 {
|
||||
broadcast |= broadcast << bits;
|
||||
bits *= 2;
|
||||
}
|
||||
|
||||
let vec = vec![broadcast; (size + WORD_SIZE - 1) & !WORD_SIZE];
|
||||
AlignedVec { vec, size }
|
||||
}
|
||||
}
|
||||
|
||||
impl core::ops::Deref for AlignedVec {
|
||||
type Target = [u8];
|
||||
fn deref(&self) -> &[u8] {
|
||||
unsafe { core::slice::from_raw_parts(self.vec.as_ptr() as *const u8, self.size) }
|
||||
}
|
||||
}
|
||||
|
||||
impl core::ops::DerefMut for AlignedVec {
|
||||
fn deref_mut(&mut self) -> &mut [u8] {
|
||||
unsafe { core::slice::from_raw_parts_mut(self.vec.as_mut_ptr() as *mut u8, self.size) }
|
||||
}
|
||||
}
|
||||
|
||||
fn memcpy_builtin(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) {
|
||||
let v1 = AlignedVec::new(1, n + offset1);
|
||||
let mut v2 = AlignedVec::new(0, n + offset2);
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let src: &[u8] = black_box(&v1[offset1..]);
|
||||
let dst: &mut [u8] = black_box(&mut v2[offset2..]);
|
||||
dst.copy_from_slice(src);
|
||||
})
|
||||
}
|
||||
|
||||
fn memcpy_rust(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) {
|
||||
let v1 = AlignedVec::new(1, n + offset1);
|
||||
let mut v2 = AlignedVec::new(0, n + offset2);
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let src: &[u8] = black_box(&v1[offset1..]);
|
||||
let dst: &mut [u8] = black_box(&mut v2[offset2..]);
|
||||
unsafe { memcpy(dst.as_mut_ptr(), src.as_ptr(), n) }
|
||||
})
|
||||
}
|
||||
|
||||
fn memset_builtin(b: &mut Bencher, n: usize, offset: usize) {
|
||||
let mut v1 = AlignedVec::new(0, n + offset);
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let dst: &mut [u8] = black_box(&mut v1[offset..]);
|
||||
let val: u8 = black_box(27);
|
||||
for b in dst {
|
||||
*b = val;
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn memset_rust(b: &mut Bencher, n: usize, offset: usize) {
|
||||
let mut v1 = AlignedVec::new(0, n + offset);
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let dst: &mut [u8] = black_box(&mut v1[offset..]);
|
||||
let val = black_box(27);
|
||||
unsafe { memset(dst.as_mut_ptr(), val, n) }
|
||||
})
|
||||
}
|
||||
|
||||
fn memcmp_builtin(b: &mut Bencher, n: usize) {
|
||||
let v1 = AlignedVec::new(0, n);
|
||||
let mut v2 = AlignedVec::new(0, n);
|
||||
v2[n - 1] = 1;
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let s1: &[u8] = black_box(&v1);
|
||||
let s2: &[u8] = black_box(&v2);
|
||||
s1.cmp(s2)
|
||||
})
|
||||
}
|
||||
|
||||
fn memcmp_builtin_unaligned(b: &mut Bencher, n: usize) {
|
||||
let v1 = AlignedVec::new(0, n);
|
||||
let mut v2 = AlignedVec::new(0, n);
|
||||
v2[n - 1] = 1;
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let s1: &[u8] = black_box(&v1[0..]);
|
||||
let s2: &[u8] = black_box(&v2[1..]);
|
||||
s1.cmp(s2)
|
||||
})
|
||||
}
|
||||
|
||||
fn memcmp_rust(b: &mut Bencher, n: usize) {
|
||||
let v1 = AlignedVec::new(0, n);
|
||||
let mut v2 = AlignedVec::new(0, n);
|
||||
v2[n - 1] = 1;
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let s1: &[u8] = black_box(&v1);
|
||||
let s2: &[u8] = black_box(&v2);
|
||||
unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n) }
|
||||
})
|
||||
}
|
||||
|
||||
fn memcmp_rust_unaligned(b: &mut Bencher, n: usize) {
|
||||
let v1 = AlignedVec::new(0, n);
|
||||
let mut v2 = AlignedVec::new(0, n);
|
||||
v2[n - 1] = 1;
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let s1: &[u8] = black_box(&v1[0..]);
|
||||
let s2: &[u8] = black_box(&v2[1..]);
|
||||
unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n - 1) }
|
||||
})
|
||||
}
|
||||
|
||||
fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) {
|
||||
let mut v = AlignedVec::new(0, n + n / 2 + offset);
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let s: &mut [u8] = black_box(&mut v);
|
||||
s.copy_within(0..n, n / 2 + offset);
|
||||
})
|
||||
}
|
||||
|
||||
fn memmove_rust(b: &mut Bencher, n: usize, offset: usize) {
|
||||
let mut v = AlignedVec::new(0, n + n / 2 + offset);
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let dst: *mut u8 = black_box(&mut v[n / 2 + offset..]).as_mut_ptr();
|
||||
let src: *const u8 = black_box(&v).as_ptr();
|
||||
unsafe { memmove(dst, src, n) };
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn memcpy_builtin_4096(b: &mut Bencher) {
|
||||
memcpy_builtin(b, 4096, 0, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_rust_4096(b: &mut Bencher) {
|
||||
memcpy_rust(b, 4096, 0, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_builtin_1048576(b: &mut Bencher) {
|
||||
memcpy_builtin(b, 1048576, 0, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_rust_1048576(b: &mut Bencher) {
|
||||
memcpy_rust(b, 1048576, 0, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_builtin_4096_offset(b: &mut Bencher) {
|
||||
memcpy_builtin(b, 4096, 65, 65)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_rust_4096_offset(b: &mut Bencher) {
|
||||
memcpy_rust(b, 4096, 65, 65)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_builtin_1048576_offset(b: &mut Bencher) {
|
||||
memcpy_builtin(b, 1048576, 65, 65)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_rust_1048576_offset(b: &mut Bencher) {
|
||||
memcpy_rust(b, 1048576, 65, 65)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_builtin_4096_misalign(b: &mut Bencher) {
|
||||
memcpy_builtin(b, 4096, 65, 66)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_rust_4096_misalign(b: &mut Bencher) {
|
||||
memcpy_rust(b, 4096, 65, 66)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_builtin_1048576_misalign(b: &mut Bencher) {
|
||||
memcpy_builtin(b, 1048576, 65, 66)
|
||||
}
|
||||
#[bench]
|
||||
fn memcpy_rust_1048576_misalign(b: &mut Bencher) {
|
||||
memcpy_rust(b, 1048576, 65, 66)
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn memset_builtin_4096(b: &mut Bencher) {
|
||||
memset_builtin(b, 4096, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memset_rust_4096(b: &mut Bencher) {
|
||||
memset_rust(b, 4096, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memset_builtin_1048576(b: &mut Bencher) {
|
||||
memset_builtin(b, 1048576, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memset_rust_1048576(b: &mut Bencher) {
|
||||
memset_rust(b, 1048576, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memset_builtin_4096_offset(b: &mut Bencher) {
|
||||
memset_builtin(b, 4096, 65)
|
||||
}
|
||||
#[bench]
|
||||
fn memset_rust_4096_offset(b: &mut Bencher) {
|
||||
memset_rust(b, 4096, 65)
|
||||
}
|
||||
#[bench]
|
||||
fn memset_builtin_1048576_offset(b: &mut Bencher) {
|
||||
memset_builtin(b, 1048576, 65)
|
||||
}
|
||||
#[bench]
|
||||
fn memset_rust_1048576_offset(b: &mut Bencher) {
|
||||
memset_rust(b, 1048576, 65)
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn memcmp_builtin_8(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 8)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_8(b: &mut Bencher) {
|
||||
memcmp_rust(b, 8)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_16(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 16)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_16(b: &mut Bencher) {
|
||||
memcmp_rust(b, 16)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_32(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 32)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_32(b: &mut Bencher) {
|
||||
memcmp_rust(b, 32)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_64(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 64)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_64(b: &mut Bencher) {
|
||||
memcmp_rust(b, 64)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_4096(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 4096)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_4096(b: &mut Bencher) {
|
||||
memcmp_rust(b, 4096)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_1048576(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 1048576)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_1048576(b: &mut Bencher) {
|
||||
memcmp_rust(b, 1048576)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_7(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 8)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_7(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 8)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_15(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 16)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_15(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 16)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_31(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 32)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_31(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 32)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_63(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 64)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_63(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 64)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_4095(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 4096)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_4095(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 4096)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_1048575(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 1048576)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_1048575(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 1048576)
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn memmove_builtin_4096(b: &mut Bencher) {
|
||||
memmove_builtin(b, 4096, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memmove_rust_4096(b: &mut Bencher) {
|
||||
memmove_rust(b, 4096, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memmove_builtin_1048576(b: &mut Bencher) {
|
||||
memmove_builtin(b, 1048576, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memmove_rust_1048576(b: &mut Bencher) {
|
||||
memmove_rust(b, 1048576, 0)
|
||||
}
|
||||
#[bench]
|
||||
fn memmove_builtin_4096_misalign(b: &mut Bencher) {
|
||||
memmove_builtin(b, 4096, 1)
|
||||
}
|
||||
#[bench]
|
||||
fn memmove_rust_4096_misalign(b: &mut Bencher) {
|
||||
memmove_rust(b, 4096, 1)
|
||||
}
|
||||
#[bench]
|
||||
fn memmove_builtin_1048576_misalign(b: &mut Bencher) {
|
||||
memmove_builtin(b, 1048576, 1)
|
||||
}
|
||||
#[bench]
|
||||
fn memmove_rust_1048576_misalign(b: &mut Bencher) {
|
||||
memmove_rust(b, 1048576, 1)
|
||||
}
|
||||
500
library/compiler-builtins/builtins-test/benches/mem_icount.rs
Normal file
500
library/compiler-builtins/builtins-test/benches/mem_icount.rs
Normal file
|
|
@ -0,0 +1,500 @@
|
|||
//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This
|
||||
//! is stable enough to be tested in CI.
|
||||
|
||||
use std::hint::black_box;
|
||||
use std::{ops, slice};
|
||||
|
||||
use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
|
||||
use iai_callgrind::{library_benchmark, library_benchmark_group, main};
|
||||
|
||||
const PAGE_SIZE: usize = 0x1000; // 4 kiB
|
||||
const MAX_ALIGN: usize = 512; // assume we may use avx512 operations one day
|
||||
const MEG1: usize = 1 << 20; // 1 MiB
|
||||
|
||||
#[derive(Clone)]
|
||||
#[repr(C, align(0x1000))]
|
||||
struct Page([u8; PAGE_SIZE]);
|
||||
|
||||
/// A buffer that is page-aligned by default, with an optional offset to create a
|
||||
/// misalignment.
|
||||
struct AlignedSlice {
|
||||
buf: Box<[Page]>,
|
||||
len: usize,
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
impl AlignedSlice {
|
||||
/// Allocate a slice aligned to ALIGN with at least `len` items, with `offset` from
|
||||
/// page alignment.
|
||||
fn new_zeroed(len: usize, offset: usize) -> Self {
|
||||
assert!(offset < PAGE_SIZE);
|
||||
let total_len = len + offset;
|
||||
let items = (total_len / PAGE_SIZE) + if total_len % PAGE_SIZE > 0 { 1 } else { 0 };
|
||||
let buf = vec![Page([0u8; PAGE_SIZE]); items].into_boxed_slice();
|
||||
AlignedSlice { buf, len, offset }
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Deref for AlignedSlice {
|
||||
type Target = [u8];
|
||||
fn deref(&self) -> &Self::Target {
|
||||
unsafe { slice::from_raw_parts(self.buf.as_ptr().cast::<u8>().add(self.offset), self.len) }
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::DerefMut for AlignedSlice {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
unsafe {
|
||||
slice::from_raw_parts_mut(
|
||||
self.buf.as_mut_ptr().cast::<u8>().add(self.offset),
|
||||
self.len,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod mcpy {
|
||||
use super::*;
|
||||
|
||||
struct Cfg {
|
||||
len: usize,
|
||||
s_off: usize,
|
||||
d_off: usize,
|
||||
}
|
||||
|
||||
fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) {
|
||||
let Cfg { len, s_off, d_off } = cfg;
|
||||
println!("bytes: {len} bytes, src offset: {s_off}, dst offset: {d_off}");
|
||||
let mut src = AlignedSlice::new_zeroed(len, s_off);
|
||||
let dst = AlignedSlice::new_zeroed(len, d_off);
|
||||
src.fill(1);
|
||||
(len, src, dst)
|
||||
}
|
||||
|
||||
#[library_benchmark]
|
||||
#[benches::aligned(
|
||||
// Both aligned
|
||||
args = [
|
||||
Cfg { len: 16, s_off: 0, d_off: 0 },
|
||||
Cfg { len: 32, s_off: 0, d_off: 0 },
|
||||
Cfg { len: 64, s_off: 0, d_off: 0 },
|
||||
Cfg { len: 512, s_off: 0, d_off: 0 },
|
||||
Cfg { len: 4096, s_off: 0, d_off: 0 },
|
||||
Cfg { len: MEG1, s_off: 0, d_off: 0 },
|
||||
],
|
||||
setup = setup,
|
||||
)]
|
||||
#[benches::offset(
|
||||
// Both at the same offset
|
||||
args = [
|
||||
Cfg { len: 16, s_off: 65, d_off: 65 },
|
||||
Cfg { len: 32, s_off: 65, d_off: 65 },
|
||||
Cfg { len: 64, s_off: 65, d_off: 65 },
|
||||
Cfg { len: 512, s_off: 65, d_off: 65 },
|
||||
Cfg { len: 4096, s_off: 65, d_off: 65 },
|
||||
Cfg { len: MEG1, s_off: 65, d_off: 65 },
|
||||
],
|
||||
setup = setup,
|
||||
)]
|
||||
#[benches::misaligned(
|
||||
// `src` and `dst` both misaligned by different amounts
|
||||
args = [
|
||||
Cfg { len: 16, s_off: 65, d_off: 66 },
|
||||
Cfg { len: 32, s_off: 65, d_off: 66 },
|
||||
Cfg { len: 64, s_off: 65, d_off: 66 },
|
||||
Cfg { len: 512, s_off: 65, d_off: 66 },
|
||||
Cfg { len: 4096, s_off: 65, d_off: 66 },
|
||||
Cfg { len: MEG1, s_off: 65, d_off: 66 },
|
||||
],
|
||||
setup = setup,
|
||||
)]
|
||||
fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) {
|
||||
unsafe {
|
||||
black_box(memcpy(
|
||||
black_box(dst.as_mut_ptr()),
|
||||
black_box(src.as_ptr()),
|
||||
black_box(len),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
library_benchmark_group!(name = memcpy; benchmarks = bench);
|
||||
}
|
||||
|
||||
mod mset {
|
||||
use super::*;
|
||||
|
||||
struct Cfg {
|
||||
len: usize,
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
fn setup(Cfg { len, offset }: Cfg) -> (usize, AlignedSlice) {
|
||||
println!("bytes: {len}, offset: {offset}");
|
||||
(len, AlignedSlice::new_zeroed(len, offset))
|
||||
}
|
||||
|
||||
#[library_benchmark]
|
||||
#[benches::aligned(
|
||||
args = [
|
||||
Cfg { len: 16, offset: 0 },
|
||||
Cfg { len: 32, offset: 0 },
|
||||
Cfg { len: 64, offset: 0 },
|
||||
Cfg { len: 512, offset: 0 },
|
||||
Cfg { len: 4096, offset: 0 },
|
||||
Cfg { len: MEG1, offset: 0 },
|
||||
],
|
||||
setup = setup,
|
||||
)]
|
||||
#[benches::offset(
|
||||
args = [
|
||||
Cfg { len: 16, offset: 65 },
|
||||
Cfg { len: 32, offset: 65 },
|
||||
Cfg { len: 64, offset: 65 },
|
||||
Cfg { len: 512, offset: 65 },
|
||||
Cfg { len: 4096, offset: 65 },
|
||||
Cfg { len: MEG1, offset: 65 },
|
||||
],
|
||||
setup = setup,
|
||||
)]
|
||||
fn bench((len, mut dst): (usize, AlignedSlice)) {
|
||||
unsafe {
|
||||
black_box(memset(
|
||||
black_box(dst.as_mut_ptr()),
|
||||
black_box(27),
|
||||
black_box(len),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
library_benchmark_group!(name = memset; benchmarks = bench);
|
||||
}
|
||||
|
||||
mod mcmp {
|
||||
use super::*;
|
||||
|
||||
struct Cfg {
|
||||
len: usize,
|
||||
s_off: usize,
|
||||
d_off: usize,
|
||||
}
|
||||
|
||||
fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) {
|
||||
let Cfg { len, s_off, d_off } = cfg;
|
||||
println!("bytes: {len}, src offset: {s_off}, dst offset: {d_off}");
|
||||
let b1 = AlignedSlice::new_zeroed(len, s_off);
|
||||
let mut b2 = AlignedSlice::new_zeroed(len, d_off);
|
||||
b2[len - 1] = 1;
|
||||
(len, b1, b2)
|
||||
}
|
||||
|
||||
#[library_benchmark]
|
||||
#[benches::aligned(
|
||||
// Both aligned
|
||||
args = [
|
||||
Cfg { len: 16, s_off: 0, d_off: 0 },
|
||||
Cfg { len: 32, s_off: 0, d_off: 0 },
|
||||
Cfg { len: 64, s_off: 0, d_off: 0 },
|
||||
Cfg { len: 512, s_off: 0, d_off: 0 },
|
||||
Cfg { len: 4096, s_off: 0, d_off: 0 },
|
||||
Cfg { len: MEG1, s_off: 0, d_off: 0 },
|
||||
],
|
||||
setup = setup
|
||||
)]
|
||||
#[benches::offset(
|
||||
// Both at the same offset
|
||||
args = [
|
||||
Cfg { len: 16, s_off: 65, d_off: 65 },
|
||||
Cfg { len: 32, s_off: 65, d_off: 65 },
|
||||
Cfg { len: 64, s_off: 65, d_off: 65 },
|
||||
Cfg { len: 512, s_off: 65, d_off: 65 },
|
||||
Cfg { len: 4096, s_off: 65, d_off: 65 },
|
||||
Cfg { len: MEG1, s_off: 65, d_off: 65 },
|
||||
],
|
||||
setup = setup
|
||||
)]
|
||||
#[benches::misaligned(
|
||||
// `src` and `dst` both misaligned by different amounts
|
||||
args = [
|
||||
Cfg { len: 16, s_off: 65, d_off: 66 },
|
||||
Cfg { len: 32, s_off: 65, d_off: 66 },
|
||||
Cfg { len: 64, s_off: 65, d_off: 66 },
|
||||
Cfg { len: 512, s_off: 65, d_off: 66 },
|
||||
Cfg { len: 4096, s_off: 65, d_off: 66 },
|
||||
Cfg { len: MEG1, s_off: 65, d_off: 66 },
|
||||
],
|
||||
setup = setup
|
||||
)]
|
||||
fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) {
|
||||
unsafe {
|
||||
black_box(memcmp(
|
||||
black_box(dst.as_mut_ptr()),
|
||||
black_box(src.as_ptr()),
|
||||
black_box(len),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
library_benchmark_group!(name = memcmp; benchmarks = bench);
|
||||
}
|
||||
|
||||
mod mmove {
|
||||
use Spread::{Aligned, Large, Medium, Small};
|
||||
|
||||
use super::*;
|
||||
|
||||
struct Cfg {
|
||||
len: usize,
|
||||
spread: Spread,
|
||||
off: usize,
|
||||
}
|
||||
|
||||
enum Spread {
|
||||
/// `src` and `dst` are close and have the same alignment (or offset).
|
||||
Aligned,
|
||||
/// `src` and `dst` are close.
|
||||
Small,
|
||||
/// `src` and `dst` are halfway offset in the buffer.
|
||||
Medium,
|
||||
/// `src` and `dst` only overlap by a single byte.
|
||||
Large,
|
||||
}
|
||||
|
||||
// Note that small and large are
|
||||
fn calculate_spread(len: usize, spread: Spread) -> usize {
|
||||
match spread {
|
||||
// Note that this test doesn't make sense for lengths less than len=128
|
||||
Aligned => {
|
||||
assert!(len > MAX_ALIGN, "aligned memset would have no overlap");
|
||||
MAX_ALIGN
|
||||
}
|
||||
Small => 1,
|
||||
Medium => (len / 2) + 1, // add 1 so all are misaligned
|
||||
Large => len - 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn setup_forward(cfg: Cfg) -> (usize, usize, AlignedSlice) {
|
||||
let Cfg { len, spread, off } = cfg;
|
||||
let spread = calculate_spread(len, spread);
|
||||
println!("bytes: {len}, spread: {spread}, offset: {off}, forward");
|
||||
assert!(spread < len, "memmove tests should have some overlap");
|
||||
let mut buf = AlignedSlice::new_zeroed(len + spread, off);
|
||||
let mut fill: usize = 0;
|
||||
buf[..len].fill_with(|| {
|
||||
fill += 1;
|
||||
fill as u8
|
||||
});
|
||||
(len, spread, buf)
|
||||
}
|
||||
|
||||
fn setup_backward(cfg: Cfg) -> (usize, usize, AlignedSlice) {
|
||||
let Cfg { len, spread, off } = cfg;
|
||||
let spread = calculate_spread(len, spread);
|
||||
println!("bytes: {len}, spread: {spread}, offset: {off}, backward");
|
||||
assert!(spread < len, "memmove tests should have some overlap");
|
||||
let mut buf = AlignedSlice::new_zeroed(len + spread, off);
|
||||
let mut fill: usize = 0;
|
||||
buf[spread..].fill_with(|| {
|
||||
fill += 1;
|
||||
fill as u8
|
||||
});
|
||||
(len, spread, buf)
|
||||
}
|
||||
|
||||
#[library_benchmark]
|
||||
#[benches::aligned(
|
||||
args = [
|
||||
// Don't test small spreads since there is no overlap
|
||||
Cfg { len: 4096, spread: Aligned, off: 0 },
|
||||
Cfg { len: MEG1, spread: Aligned, off: 0 },
|
||||
],
|
||||
setup = setup_forward
|
||||
)]
|
||||
#[benches::small_spread(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Small, off: 0 },
|
||||
Cfg { len: 32, spread: Small, off: 0 },
|
||||
Cfg { len: 64, spread: Small, off: 0 },
|
||||
Cfg { len: 512, spread: Small, off: 0 },
|
||||
Cfg { len: 4096, spread: Small, off: 0 },
|
||||
Cfg { len: MEG1, spread: Small, off: 0 },
|
||||
],
|
||||
setup = setup_forward
|
||||
)]
|
||||
#[benches::medium_spread(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Medium, off: 0 },
|
||||
Cfg { len: 32, spread: Medium, off: 0 },
|
||||
Cfg { len: 64, spread: Medium, off: 0 },
|
||||
Cfg { len: 512, spread: Medium, off: 0 },
|
||||
Cfg { len: 4096, spread: Medium, off: 0 },
|
||||
Cfg { len: MEG1, spread: Medium, off: 0 },
|
||||
],
|
||||
setup = setup_forward
|
||||
)]
|
||||
#[benches::large_spread(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Large, off: 0 },
|
||||
Cfg { len: 32, spread: Large, off: 0 },
|
||||
Cfg { len: 64, spread: Large, off: 0 },
|
||||
Cfg { len: 512, spread: Large, off: 0 },
|
||||
Cfg { len: 4096, spread: Large, off: 0 },
|
||||
Cfg { len: MEG1, spread: Large, off: 0 },
|
||||
],
|
||||
setup = setup_forward
|
||||
)]
|
||||
#[benches::aligned_off(
|
||||
args = [
|
||||
Cfg { len: 4096, spread: Aligned, off: 65 },
|
||||
Cfg { len: MEG1, spread: Aligned, off: 65 },
|
||||
],
|
||||
setup = setup_forward
|
||||
)]
|
||||
#[benches::small_spread_off(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Small, off: 65 },
|
||||
Cfg { len: 32, spread: Small, off: 65 },
|
||||
Cfg { len: 64, spread: Small, off: 65 },
|
||||
Cfg { len: 512, spread: Small, off: 65 },
|
||||
Cfg { len: 4096, spread: Small, off: 65 },
|
||||
Cfg { len: MEG1, spread: Small, off: 65 },
|
||||
],
|
||||
setup = setup_forward
|
||||
)]
|
||||
#[benches::medium_spread_off(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Medium, off: 65 },
|
||||
Cfg { len: 32, spread: Medium, off: 65 },
|
||||
Cfg { len: 64, spread: Medium, off: 65 },
|
||||
Cfg { len: 512, spread: Medium, off: 65 },
|
||||
Cfg { len: 4096, spread: Medium, off: 65 },
|
||||
Cfg { len: MEG1, spread: Medium, off: 65 },
|
||||
],
|
||||
setup = setup_forward
|
||||
)]
|
||||
#[benches::large_spread_off(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Large, off: 65 },
|
||||
Cfg { len: 32, spread: Large, off: 65 },
|
||||
Cfg { len: 64, spread: Large, off: 65 },
|
||||
Cfg { len: 512, spread: Large, off: 65 },
|
||||
Cfg { len: 4096, spread: Large, off: 65 },
|
||||
Cfg { len: MEG1, spread: Large, off: 65 },
|
||||
],
|
||||
setup = setup_forward
|
||||
)]
|
||||
fn forward((len, spread, mut buf): (usize, usize, AlignedSlice)) {
|
||||
// Test moving from the start of the buffer toward the end
|
||||
unsafe {
|
||||
black_box(memmove(
|
||||
black_box(buf[spread..].as_mut_ptr()),
|
||||
black_box(buf.as_ptr()),
|
||||
black_box(len),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
#[library_benchmark]
|
||||
#[benches::aligned(
|
||||
args = [
|
||||
// Don't test small spreads since there is no overlap
|
||||
Cfg { len: 4096, spread: Aligned, off: 0 },
|
||||
Cfg { len: MEG1, spread: Aligned, off: 0 },
|
||||
],
|
||||
setup = setup_backward
|
||||
)]
|
||||
#[benches::small_spread(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Small, off: 0 },
|
||||
Cfg { len: 32, spread: Small, off: 0 },
|
||||
Cfg { len: 64, spread: Small, off: 0 },
|
||||
Cfg { len: 512, spread: Small, off: 0 },
|
||||
Cfg { len: 4096, spread: Small, off: 0 },
|
||||
Cfg { len: MEG1, spread: Small, off: 0 },
|
||||
],
|
||||
setup = setup_backward
|
||||
)]
|
||||
#[benches::medium_spread(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Medium, off: 0 },
|
||||
Cfg { len: 32, spread: Medium, off: 0 },
|
||||
Cfg { len: 64, spread: Medium, off: 0 },
|
||||
Cfg { len: 512, spread: Medium, off: 0 },
|
||||
Cfg { len: 4096, spread: Medium, off: 0 },
|
||||
Cfg { len: MEG1, spread: Medium, off: 0 },
|
||||
],
|
||||
setup = setup_backward
|
||||
)]
|
||||
#[benches::large_spread(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Large, off: 0 },
|
||||
Cfg { len: 32, spread: Large, off: 0 },
|
||||
Cfg { len: 64, spread: Large, off: 0 },
|
||||
Cfg { len: 512, spread: Large, off: 0 },
|
||||
Cfg { len: 4096, spread: Large, off: 0 },
|
||||
Cfg { len: MEG1, spread: Large, off: 0 },
|
||||
],
|
||||
setup = setup_backward
|
||||
)]
|
||||
#[benches::aligned_off(
|
||||
args = [
|
||||
// Don't test small spreads since there is no overlap
|
||||
Cfg { len: 4096, spread: Aligned, off: 65 },
|
||||
Cfg { len: MEG1, spread: Aligned, off: 65 },
|
||||
],
|
||||
setup = setup_backward
|
||||
)]
|
||||
#[benches::small_spread_off(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Small, off: 65 },
|
||||
Cfg { len: 32, spread: Small, off: 65 },
|
||||
Cfg { len: 64, spread: Small, off: 65 },
|
||||
Cfg { len: 512, spread: Small, off: 65 },
|
||||
Cfg { len: 4096, spread: Small, off: 65 },
|
||||
Cfg { len: MEG1, spread: Small, off: 65 },
|
||||
],
|
||||
setup = setup_backward
|
||||
)]
|
||||
#[benches::medium_spread_off(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Medium, off: 65 },
|
||||
Cfg { len: 32, spread: Medium, off: 65 },
|
||||
Cfg { len: 64, spread: Medium, off: 65 },
|
||||
Cfg { len: 512, spread: Medium, off: 65 },
|
||||
Cfg { len: 4096, spread: Medium, off: 65 },
|
||||
Cfg { len: MEG1, spread: Medium, off: 65 },
|
||||
],
|
||||
setup = setup_backward
|
||||
)]
|
||||
#[benches::large_spread_off(
|
||||
args = [
|
||||
Cfg { len: 16, spread: Large, off: 65 },
|
||||
Cfg { len: 32, spread: Large, off: 65 },
|
||||
Cfg { len: 64, spread: Large, off: 65 },
|
||||
Cfg { len: 512, spread: Large, off: 65 },
|
||||
Cfg { len: 4096, spread: Large, off: 65 },
|
||||
Cfg { len: MEG1, spread: Large, off: 65 },
|
||||
],
|
||||
setup = setup_backward
|
||||
)]
|
||||
fn backward((len, spread, mut buf): (usize, usize, AlignedSlice)) {
|
||||
// Test moving from the end of the buffer toward the start
|
||||
unsafe {
|
||||
black_box(memmove(
|
||||
black_box(buf.as_mut_ptr()),
|
||||
black_box(buf[spread..].as_ptr()),
|
||||
black_box(len),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
library_benchmark_group!(name = memmove; benchmarks = forward, backward);
|
||||
}
|
||||
|
||||
use mcmp::memcmp;
|
||||
use mcpy::memcpy;
|
||||
use mmove::memmove;
|
||||
use mset::memset;
|
||||
|
||||
main!(library_benchmark_groups = memcpy, memset, memcmp, memmove);
|
||||
120
library/compiler-builtins/builtins-test/build.rs
Normal file
120
library/compiler-builtins/builtins-test/build.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
use std::collections::HashSet;
|
||||
|
||||
mod builtins_configure {
|
||||
include!("../compiler-builtins/configure.rs");
|
||||
}
|
||||
|
||||
/// Features to enable
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
enum Feature {
|
||||
NoSysF128,
|
||||
NoSysF128IntConvert,
|
||||
NoSysF16,
|
||||
NoSysF16F64Convert,
|
||||
NoSysF16F128Convert,
|
||||
}
|
||||
|
||||
impl Feature {
|
||||
fn implies(self) -> &'static [Self] {
|
||||
match self {
|
||||
Self::NoSysF128 => [Self::NoSysF128IntConvert, Self::NoSysF16F128Convert].as_slice(),
|
||||
Self::NoSysF128IntConvert => [].as_slice(),
|
||||
Self::NoSysF16 => [Self::NoSysF16F64Convert, Self::NoSysF16F128Convert].as_slice(),
|
||||
Self::NoSysF16F64Convert => [].as_slice(),
|
||||
Self::NoSysF16F128Convert => [].as_slice(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
println!("cargo::rerun-if-changed=../configure.rs");
|
||||
|
||||
let target = builtins_configure::Target::from_env();
|
||||
let mut features = HashSet::new();
|
||||
|
||||
// These platforms do not have f128 symbols available in their system libraries, so
|
||||
// skip related tests.
|
||||
if target.arch == "arm"
|
||||
|| target.vendor == "apple"
|
||||
|| target.env == "msvc"
|
||||
// GCC and LLVM disagree on the ABI of `f16` and `f128` with MinGW. See
|
||||
// <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054>.
|
||||
|| (target.os == "windows" && target.env == "gnu")
|
||||
// FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86.
|
||||
// See <https://github.com/llvm/llvm-project/issues/77401>.
|
||||
|| target.arch == "x86"
|
||||
// 32-bit PowerPC and 64-bit LE gets code generated that Qemu cannot handle. See
|
||||
// <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105635926>.
|
||||
|| target.arch == "powerpc"
|
||||
|| target.arch == "powerpc64le"
|
||||
// FIXME: We get different results from the builtin functions. See
|
||||
// <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105657287>.
|
||||
|| target.arch == "powerpc64"
|
||||
{
|
||||
features.insert(Feature::NoSysF128);
|
||||
}
|
||||
|
||||
if target.arch == "x86" {
|
||||
// 32-bit x86 does not have `__fixunstfti`/`__fixtfti` but does have everything else
|
||||
features.insert(Feature::NoSysF128IntConvert);
|
||||
// FIXME: 32-bit x86 has a bug in `f128 -> f16` system libraries
|
||||
features.insert(Feature::NoSysF16F128Convert);
|
||||
}
|
||||
|
||||
// These platforms do not have f16 symbols available in their system libraries, so
|
||||
// skip related tests. Most of these are missing `f16 <-> f32` conversion routines.
|
||||
if (target.arch == "aarch64" && target.os == "linux")
|
||||
|| target.arch.starts_with("arm")
|
||||
|| target.arch == "powerpc"
|
||||
|| target.arch == "powerpc64"
|
||||
|| target.arch == "powerpc64le"
|
||||
|| target.arch == "loongarch64"
|
||||
|| (target.arch == "x86" && !target.has_feature("sse"))
|
||||
|| target.os == "windows"
|
||||
// Linking says "error: function signature mismatch: __extendhfsf2" and seems to
|
||||
// think the signature is either `(i32) -> f32` or `(f32) -> f32`. See
|
||||
// <https://github.com/llvm/llvm-project/issues/96438>.
|
||||
|| target.arch == "wasm32"
|
||||
|| target.arch == "wasm64"
|
||||
{
|
||||
features.insert(Feature::NoSysF16);
|
||||
}
|
||||
|
||||
// These platforms are missing either `__extendhfdf2` or `__truncdfhf2`.
|
||||
if target.vendor == "apple" || target.os == "windows" {
|
||||
features.insert(Feature::NoSysF16F64Convert);
|
||||
}
|
||||
|
||||
// Add implied features. Collection is required for borrows.
|
||||
features.extend(
|
||||
features
|
||||
.iter()
|
||||
.flat_map(|x| x.implies())
|
||||
.copied()
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
|
||||
for feature in features {
|
||||
let (name, warning) = match feature {
|
||||
Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"),
|
||||
Feature::NoSysF128IntConvert => (
|
||||
"no-sys-f128-int-convert",
|
||||
"using apfloat fallback for f128 <-> int conversions",
|
||||
),
|
||||
Feature::NoSysF16F64Convert => (
|
||||
"no-sys-f16-f64-convert",
|
||||
"using apfloat fallback for f16 <-> f64 conversions",
|
||||
),
|
||||
Feature::NoSysF16F128Convert => (
|
||||
"no-sys-f16-f128-convert",
|
||||
"using apfloat fallback for f16 <-> f128 conversions",
|
||||
),
|
||||
Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"),
|
||||
};
|
||||
println!("cargo:warning={warning}");
|
||||
println!("cargo:rustc-cfg=feature=\"{name}\"");
|
||||
}
|
||||
|
||||
builtins_configure::configure_aliases(&target);
|
||||
builtins_configure::configure_f16_f128(&target);
|
||||
}
|
||||
366
library/compiler-builtins/builtins-test/src/bench.rs
Normal file
366
library/compiler-builtins/builtins-test/src/bench.rs
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
use alloc::vec::Vec;
|
||||
use core::cell::RefCell;
|
||||
|
||||
use compiler_builtins::float::Float;
|
||||
|
||||
/// Fuzz with these many items to ensure equal functions
|
||||
pub const CHECK_ITER_ITEMS: u32 = 10_000;
|
||||
/// Benchmark with this many items to get a variety
|
||||
pub const BENCH_ITER_ITEMS: u32 = 500;
|
||||
|
||||
/// Still run benchmarks/tests but don't check correctness between compiler-builtins and
|
||||
/// builtin system functions functions
|
||||
pub fn skip_sys_checks(test_name: &str) -> bool {
|
||||
const ALWAYS_SKIPPED: &[&str] = &[
|
||||
// FIXME(f16_f128): system symbols have incorrect results
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617>
|
||||
"extend_f16_f32",
|
||||
"trunc_f32_f16",
|
||||
"trunc_f64_f16",
|
||||
// FIXME(#616): re-enable once fix is in nightly
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/616>
|
||||
"mul_f32",
|
||||
"mul_f64",
|
||||
];
|
||||
|
||||
// FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely
|
||||
// in their benchmark modules due to runtime panics.
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
|
||||
const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"];
|
||||
|
||||
// FIXME(f16_f128): system symbols have incorrect results
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
|
||||
const X86_NO_SSE_SKIPPED: &[&str] = &[
|
||||
"add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64",
|
||||
];
|
||||
|
||||
// FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer
|
||||
// uses `compiler-rt` version.
|
||||
// <https://github.com/llvm/llvm-project/issues/91840>
|
||||
const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"];
|
||||
|
||||
// FIXME(llvm): system symbols have incorrect results on Windows
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>
|
||||
const WINDOWS_SKIPPED: &[&str] = &[
|
||||
"conv_f32_u128",
|
||||
"conv_f32_i128",
|
||||
"conv_f64_u128",
|
||||
"conv_f64_i128",
|
||||
];
|
||||
|
||||
if cfg!(target_arch = "arm") {
|
||||
// The Arm symbols need a different ABI that our macro doesn't handle, just skip it
|
||||
return true;
|
||||
}
|
||||
|
||||
if ALWAYS_SKIPPED.contains(&test_name) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if cfg!(all(target_arch = "powerpc64", target_endian = "little"))
|
||||
&& PPC64LE_SKIPPED.contains(&test_name)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if cfg!(all(target_arch = "x86", not(target_feature = "sse")))
|
||||
&& X86_NO_SSE_SKIPPED.contains(&test_name)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if cfg!(target_arch = "aarch64") && AARCH64_SKIPPED.contains(&test_name) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Still run benchmarks/tests but don't check correctness between compiler-builtins and
|
||||
/// assembly functions
|
||||
pub fn skip_asm_checks(_test_name: &str) -> bool {
|
||||
// Nothing to skip at this time
|
||||
false
|
||||
}
|
||||
|
||||
/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten
|
||||
/// assembly.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The signature must be correct and any assembly must be sound.
|
||||
#[macro_export]
|
||||
macro_rules! float_bench {
|
||||
(
|
||||
// Name of this benchmark
|
||||
name: $name:ident,
|
||||
// The function signature to be tested
|
||||
sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty,
|
||||
// Path to the crate in compiler_builtins
|
||||
crate_fn: $crate_fn:path,
|
||||
// Optional alias on ppc
|
||||
$( crate_fn_ppc: $crate_fn_ppc:path, )?
|
||||
// Name of the system symbol
|
||||
sys_fn: $sys_fn:ident,
|
||||
// Optional alias on ppc
|
||||
$( sys_fn_ppc: $sys_fn_ppc:path, )?
|
||||
// Meta saying whether the system symbol is available
|
||||
sys_available: $sys_available:meta,
|
||||
// An optional function to validate the results of two functions are equal, if not
|
||||
// just `$ret_ty::check_eq`
|
||||
$( output_eq: $output_eq:expr, )?
|
||||
// Assembly implementations, if any.
|
||||
asm: [
|
||||
$(
|
||||
#[cfg($asm_meta:meta)] {
|
||||
$($asm_tt:tt)*
|
||||
}
|
||||
);*
|
||||
$(;)?
|
||||
]
|
||||
$(,)?
|
||||
) => {paste::paste! {
|
||||
// SAFETY: macro invocation must use the correct signature
|
||||
#[cfg($sys_available)]
|
||||
unsafe extern "C" {
|
||||
/// Binding for the system function
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty;
|
||||
|
||||
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
float_bench! { @coalesce_fn $($sys_fn_ppc)? =>
|
||||
fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty;
|
||||
}
|
||||
}
|
||||
|
||||
fn $name(c: &mut Criterion) {
|
||||
use core::hint::black_box;
|
||||
use compiler_builtins::float::Float;
|
||||
use $crate::bench::TestIO;
|
||||
|
||||
#[inline(never)] // equalize with external calls
|
||||
fn crate_fn($($arg: $arg_ty),*) -> $ret_ty {
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
let target_crate_fn = $crate_fn;
|
||||
|
||||
// On PPC, use an alias if specified
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
let target_crate_fn = float_bench!(@coalesce $($crate_fn_ppc)?, $crate_fn);
|
||||
|
||||
target_crate_fn( $($arg),* )
|
||||
}
|
||||
|
||||
#[inline(always)] // already a branch
|
||||
#[cfg($sys_available)]
|
||||
fn sys_fn($($arg: $arg_ty),*) -> $ret_ty {
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
let target_sys_fn = $sys_fn;
|
||||
|
||||
// On PPC, use an alias if specified
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
let target_sys_fn = float_bench!(@coalesce $($sys_fn_ppc)?, $sys_fn);
|
||||
|
||||
unsafe { target_sys_fn( $($arg),* ) }
|
||||
}
|
||||
|
||||
#[inline(never)] // equalize with external calls
|
||||
#[cfg(any( $($asm_meta),* ))]
|
||||
fn asm_fn($(mut $arg: $arg_ty),*) -> $ret_ty {
|
||||
use core::arch::asm;
|
||||
$(
|
||||
#[cfg($asm_meta)]
|
||||
unsafe { $($asm_tt)* }
|
||||
)*
|
||||
}
|
||||
|
||||
let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS);
|
||||
let benchvec = <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS);
|
||||
let test_name = stringify!($name);
|
||||
let check_eq = float_bench!(@coalesce $($output_eq)?, $ret_ty::check_eq);
|
||||
|
||||
// Verify math lines up. We run the crate functions even if we don't validate the
|
||||
// output here to make sure there are no panics or crashes.
|
||||
|
||||
#[cfg($sys_available)]
|
||||
for ($($arg),*) in testvec.iter().copied() {
|
||||
let crate_res = crate_fn($($arg),*);
|
||||
let sys_res = sys_fn($($arg),*);
|
||||
|
||||
if $crate::bench::skip_sys_checks(test_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(
|
||||
check_eq(crate_res, sys_res),
|
||||
"{test_name}{:?}: crate: {crate_res:?}, sys: {sys_res:?}",
|
||||
($($arg),* ,)
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(any( $($asm_meta),* ))]
|
||||
{
|
||||
for ($($arg),*) in testvec.iter().copied() {
|
||||
let crate_res = crate_fn($($arg),*);
|
||||
let asm_res = asm_fn($($arg),*);
|
||||
|
||||
if $crate::bench::skip_asm_checks(test_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(
|
||||
check_eq(crate_res, asm_res),
|
||||
"{test_name}{:?}: crate: {crate_res:?}, asm: {asm_res:?}",
|
||||
($($arg),* ,)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let mut group = c.benchmark_group(test_name);
|
||||
group.bench_function("compiler-builtins", |b| b.iter(|| {
|
||||
for ($($arg),*) in benchvec.iter().copied() {
|
||||
black_box(crate_fn( $(black_box($arg)),* ));
|
||||
}
|
||||
}));
|
||||
|
||||
#[cfg($sys_available)]
|
||||
group.bench_function("system", |b| b.iter(|| {
|
||||
for ($($arg),*) in benchvec.iter().copied() {
|
||||
black_box(sys_fn( $(black_box($arg)),* ));
|
||||
}
|
||||
}));
|
||||
|
||||
#[cfg(any( $($asm_meta),* ))]
|
||||
group.bench_function(&format!(
|
||||
"assembly ({} {})", std::env::consts::ARCH, std::env::consts::FAMILY
|
||||
), |b| b.iter(|| {
|
||||
for ($($arg),*) in benchvec.iter().copied() {
|
||||
black_box(asm_fn( $(black_box($arg)),* ));
|
||||
}
|
||||
}));
|
||||
|
||||
group.finish();
|
||||
}
|
||||
}};
|
||||
|
||||
// Allow overriding a default
|
||||
(@coalesce $specified:expr, $default:expr) => { $specified };
|
||||
(@coalesce, $default:expr) => { $default };
|
||||
|
||||
// Allow overriding a function name
|
||||
(@coalesce_fn $specified:ident => fn $default_name:ident $($tt:tt)+) => {
|
||||
fn $specified $($tt)+
|
||||
};
|
||||
(@coalesce_fn => fn $default_name:ident $($tt:tt)+) => {
|
||||
fn $default_name $($tt)+
|
||||
};
|
||||
}
|
||||
|
||||
/// A type used as either an input or output to/from a benchmark function.
|
||||
pub trait TestIO: Sized {
|
||||
fn make_testvec(len: u32) -> Vec<Self>;
|
||||
fn check_eq(a: Self, b: Self) -> bool;
|
||||
}
|
||||
|
||||
macro_rules! impl_testio {
|
||||
(float $($f_ty:ty),+) => {$(
|
||||
impl TestIO for $f_ty {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ret = RefCell::new(Vec::new());
|
||||
crate::fuzz_float(len, |a| ret.borrow_mut().push(a));
|
||||
ret.into_inner()
|
||||
}
|
||||
|
||||
fn check_eq(a: Self, b: Self) -> bool {
|
||||
Float::eq_repr(a, b)
|
||||
}
|
||||
}
|
||||
|
||||
impl TestIO for ($f_ty, $f_ty) {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ret = RefCell::new(Vec::new());
|
||||
crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b)));
|
||||
ret.into_inner()
|
||||
}
|
||||
|
||||
fn check_eq(_a: Self, _b: Self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
)*};
|
||||
|
||||
(int $($i_ty:ty),+) => {$(
|
||||
impl TestIO for $i_ty {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ret = RefCell::new(Vec::new());
|
||||
crate::fuzz(len, |a| ret.borrow_mut().push(a));
|
||||
ret.into_inner()
|
||||
}
|
||||
|
||||
fn check_eq(a: Self, b: Self) -> bool {
|
||||
a == b
|
||||
}
|
||||
}
|
||||
|
||||
impl TestIO for ($i_ty, $i_ty) {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ret = RefCell::new(Vec::new());
|
||||
crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b)));
|
||||
ret.into_inner()
|
||||
}
|
||||
|
||||
fn check_eq(_a: Self, _b: Self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
)*};
|
||||
|
||||
((float, int) ($f_ty:ty, $i_ty:ty)) => {
|
||||
impl TestIO for ($f_ty, $i_ty) {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ivec = RefCell::new(Vec::new());
|
||||
let fvec = RefCell::new(Vec::new());
|
||||
|
||||
crate::fuzz(len.isqrt(), |a| ivec.borrow_mut().push(a));
|
||||
crate::fuzz_float(len.isqrt(), |a| fvec.borrow_mut().push(a));
|
||||
|
||||
let mut ret = Vec::new();
|
||||
let ivec = ivec.into_inner();
|
||||
let fvec = fvec.into_inner();
|
||||
|
||||
for f in fvec {
|
||||
for i in &ivec {
|
||||
ret.push((f, *i));
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
fn check_eq(_a: Self, _b: Self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
impl_testio!(float f16);
|
||||
impl_testio!(float f32, f64);
|
||||
#[cfg(f128_enabled)]
|
||||
impl_testio!(float f128);
|
||||
impl_testio!(int i16, i32, i64, i128);
|
||||
impl_testio!(int u16, u32, u64, u128);
|
||||
impl_testio!((float, int)(f32, i32));
|
||||
impl_testio!((float, int)(f64, i32));
|
||||
#[cfg(f128_enabled)]
|
||||
impl_testio!((float, int)(f128, i32));
|
||||
337
library/compiler-builtins/builtins-test/src/lib.rs
Normal file
337
library/compiler-builtins/builtins-test/src/lib.rs
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
//! This crate is for integration testing and fuzz testing of functions in `compiler-builtins`. This
|
||||
//! includes publicly documented intrinsics and some internal alternative implementation functions
|
||||
//! such as `usize_leading_zeros_riscv` (which are tested because they are configured for
|
||||
//! architectures not tested by the CI).
|
||||
//!
|
||||
//! The general idea is to use a combination of edge case testing and randomized fuzz testing. The
|
||||
//! edge case testing is crucial for checking cases like where both inputs are equal or equal to
|
||||
//! special values such as `i128::MIN`, which is unlikely for the random fuzzer by itself to
|
||||
//! encounter. The randomized fuzz testing is specially designed to cover wide swaths of search
|
||||
//! space in as few iterations as possible. See `fuzz_values` in `builtins-test/tests/misc.rs` for
|
||||
//! an example.
|
||||
//!
|
||||
//! Some floating point tests are disabled for specific architectures, because they do not have
|
||||
//! correct rounding.
|
||||
#![no_std]
|
||||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
#![cfg_attr(f16_enabled, feature(f16))]
|
||||
|
||||
pub mod bench;
|
||||
extern crate alloc;
|
||||
|
||||
use compiler_builtins::float::Float;
|
||||
use compiler_builtins::int::{Int, MinInt};
|
||||
use rand_xoshiro::Xoshiro128StarStar;
|
||||
use rand_xoshiro::rand_core::{RngCore, SeedableRng};
|
||||
|
||||
/// Sets the number of fuzz iterations run for most tests. In practice, the vast majority of bugs
|
||||
/// are caught by the edge case testers. Most of the remaining bugs triggered by more complex
|
||||
/// sequences are caught well within 10_000 fuzz iterations. For classes of algorithms like division
|
||||
/// that are vulnerable to rare edge cases, we want 1_000_000 iterations to be more confident. In
|
||||
/// practical CI, however, we only want to run the more strenuous test once to catch algorithmic
|
||||
/// level bugs, and run the 10_000 iteration test on most targets. Target-dependent bugs are likely
|
||||
/// to involve miscompilation and misconfiguration that is likely to break algorithms in quickly
|
||||
/// caught ways. We choose to configure `N = 1_000_000` iterations for `x86_64` targets (and if
|
||||
/// debug assertions are disabled. Tests without `--release` would take too long) which are likely
|
||||
/// to have fast hardware, and run `N = 10_000` for all other targets.
|
||||
pub const N: u32 = if cfg!(target_arch = "x86_64") && !cfg!(debug_assertions) {
|
||||
1_000_000
|
||||
} else {
|
||||
10_000
|
||||
};
|
||||
|
||||
/// Random fuzzing step. When run several times, it results in excellent fuzzing entropy such as:
|
||||
/// 11110101010101011110111110011111
|
||||
/// 10110101010100001011101011001010
|
||||
/// 1000000000000000
|
||||
/// 10000000000000110111110000001010
|
||||
/// 1111011111111101010101111110101
|
||||
/// 101111111110100000000101000000
|
||||
/// 10000000110100000000100010101
|
||||
/// 1010101010101000
|
||||
fn fuzz_step<I: Int>(rng: &mut Xoshiro128StarStar, x: &mut I) {
|
||||
let ones = !I::ZERO;
|
||||
let bit_indexing_mask: u32 = I::BITS - 1;
|
||||
// It happens that all the RNG we need can come from one call. 7 bits are needed to index a
|
||||
// worst case 128 bit integer, and there are 4 indexes that need to be made plus 4 bits for
|
||||
// selecting operations
|
||||
let rng32 = rng.next_u32();
|
||||
|
||||
// Randomly OR, AND, and XOR randomly sized and shifted continuous strings of
|
||||
// ones with `lhs` and `rhs`.
|
||||
let r0 = bit_indexing_mask & rng32;
|
||||
let r1 = bit_indexing_mask & (rng32 >> 7);
|
||||
let mask = ones.wrapping_shl(r0).rotate_left(r1);
|
||||
match (rng32 >> 14) % 4 {
|
||||
0 => *x |= mask,
|
||||
1 => *x &= mask,
|
||||
// both 2 and 3 to make XORs as common as ORs and ANDs combined
|
||||
_ => *x ^= mask,
|
||||
}
|
||||
|
||||
// Alternating ones and zeros (e.x. 0b1010101010101010). This catches second-order
|
||||
// problems that might occur for algorithms with two modes of operation (potentially
|
||||
// there is some invariant that can be broken and maintained via alternating between modes,
|
||||
// breaking the algorithm when it reaches the end).
|
||||
let mut alt_ones = I::ONE;
|
||||
for _ in 0..(I::BITS / 2) {
|
||||
alt_ones <<= 2;
|
||||
alt_ones |= I::ONE;
|
||||
}
|
||||
let r0 = bit_indexing_mask & (rng32 >> 16);
|
||||
let r1 = bit_indexing_mask & (rng32 >> 23);
|
||||
let mask = alt_ones.wrapping_shl(r0).rotate_left(r1);
|
||||
match rng32 >> 30 {
|
||||
0 => *x |= mask,
|
||||
1 => *x &= mask,
|
||||
_ => *x ^= mask,
|
||||
}
|
||||
}
|
||||
|
||||
// We need macros like this, because `#![no_std]` prevents us from using iterators
|
||||
macro_rules! edge_cases {
|
||||
($I:ident, $case:ident, $inner:block) => {
|
||||
for i0 in 0..$I::FUZZ_NUM {
|
||||
let mask_lo = (!$I::UnsignedInt::ZERO).wrapping_shr($I::FUZZ_LENGTHS[i0] as u32);
|
||||
for i1 in i0..I::FUZZ_NUM {
|
||||
let mask_hi =
|
||||
(!$I::UnsignedInt::ZERO).wrapping_shl($I::FUZZ_LENGTHS[i1 - i0] as u32);
|
||||
let $case = I::from_unsigned(mask_lo & mask_hi);
|
||||
$inner
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Feeds a series of fuzzing inputs to `f`. The fuzzer first uses an algorithm designed to find
|
||||
/// edge cases, followed by a more random fuzzer that runs `n` times.
|
||||
pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F)
|
||||
where
|
||||
<I as MinInt>::UnsignedInt: Int,
|
||||
{
|
||||
// edge case tester. Calls `f` 210 times for u128.
|
||||
// zero gets skipped by the loop
|
||||
f(I::ZERO);
|
||||
edge_cases!(I, case, {
|
||||
f(case);
|
||||
});
|
||||
|
||||
// random fuzzer
|
||||
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
|
||||
let mut x: I = MinInt::ZERO;
|
||||
for _ in 0..n {
|
||||
fuzz_step(&mut rng, &mut x);
|
||||
f(x)
|
||||
}
|
||||
}
|
||||
|
||||
/// The same as `fuzz`, except `f` has two inputs.
|
||||
pub fn fuzz_2<I: Int, F: Fn(I, I)>(n: u32, f: F)
|
||||
where
|
||||
<I as MinInt>::UnsignedInt: Int,
|
||||
{
|
||||
// Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`.
|
||||
edge_cases!(I, case, {
|
||||
f(I::ZERO, case);
|
||||
});
|
||||
edge_cases!(I, case, {
|
||||
f(case, I::ZERO);
|
||||
});
|
||||
// Nested edge tester. Calls `f` 44100 times for `u128`.
|
||||
edge_cases!(I, case0, {
|
||||
edge_cases!(I, case1, {
|
||||
f(case0, case1);
|
||||
})
|
||||
});
|
||||
|
||||
// random fuzzer
|
||||
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
|
||||
let mut x: I = I::ZERO;
|
||||
let mut y: I = I::ZERO;
|
||||
for _ in 0..n {
|
||||
fuzz_step(&mut rng, &mut x);
|
||||
fuzz_step(&mut rng, &mut y);
|
||||
f(x, y)
|
||||
}
|
||||
}
|
||||
|
||||
/// Tester for shift functions
|
||||
pub fn fuzz_shift<I: Int, F: Fn(I, u32)>(f: F) {
|
||||
// Shift functions are very simple and do not need anything other than shifting a small
|
||||
// set of random patterns for every fuzz length.
|
||||
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
|
||||
let mut x: I = MinInt::ZERO;
|
||||
for i in 0..I::FUZZ_NUM {
|
||||
fuzz_step(&mut rng, &mut x);
|
||||
f(x, MinInt::ZERO);
|
||||
f(x, I::FUZZ_LENGTHS[i] as u32);
|
||||
}
|
||||
}
|
||||
|
||||
fn fuzz_float_step<F: Float>(rng: &mut Xoshiro128StarStar, f: &mut F) {
|
||||
let rng32 = rng.next_u32();
|
||||
// we need to fuzz the different parts of the float separately, because the masking on larger
|
||||
// significands will tend to set the exponent to all ones or all zeros frequently
|
||||
|
||||
// sign bit fuzzing
|
||||
let sign = (rng32 & 1) != 0;
|
||||
|
||||
// exponent fuzzing. Only 4 bits for the selector needed.
|
||||
let ones = (F::Int::ONE << F::EXP_BITS) - F::Int::ONE;
|
||||
let r0 = (rng32 >> 1) % F::EXP_BITS;
|
||||
let r1 = (rng32 >> 5) % F::EXP_BITS;
|
||||
// custom rotate shift. Note that `F::Int` is unsigned, so we can shift right without smearing
|
||||
// the sign bit.
|
||||
let mask = if r1 == 0 {
|
||||
ones.wrapping_shr(r0)
|
||||
} else {
|
||||
let tmp = ones.wrapping_shr(r0);
|
||||
(tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXP_BITS - r1)) & ones
|
||||
};
|
||||
let mut exp = (f.to_bits() & F::EXP_MASK) >> F::SIG_BITS;
|
||||
match (rng32 >> 9) % 4 {
|
||||
0 => exp |= mask,
|
||||
1 => exp &= mask,
|
||||
_ => exp ^= mask,
|
||||
}
|
||||
|
||||
// significand fuzzing
|
||||
let mut sig = f.to_bits() & F::SIG_MASK;
|
||||
fuzz_step(rng, &mut sig);
|
||||
sig &= F::SIG_MASK;
|
||||
|
||||
*f = F::from_parts(sign, exp, sig);
|
||||
}
|
||||
|
||||
macro_rules! float_edge_cases {
|
||||
($F:ident, $case:ident, $inner:block) => {
|
||||
for exponent in [
|
||||
F::Int::ZERO,
|
||||
F::Int::ONE,
|
||||
F::Int::ONE << (F::EXP_BITS / 2),
|
||||
(F::Int::ONE << (F::EXP_BITS - 1)) - F::Int::ONE,
|
||||
F::Int::ONE << (F::EXP_BITS - 1),
|
||||
(F::Int::ONE << (F::EXP_BITS - 1)) + F::Int::ONE,
|
||||
(F::Int::ONE << F::EXP_BITS) - F::Int::ONE,
|
||||
]
|
||||
.iter()
|
||||
{
|
||||
for significand in [
|
||||
F::Int::ZERO,
|
||||
F::Int::ONE,
|
||||
F::Int::ONE << (F::SIG_BITS / 2),
|
||||
(F::Int::ONE << (F::SIG_BITS - 1)) - F::Int::ONE,
|
||||
F::Int::ONE << (F::SIG_BITS - 1),
|
||||
(F::Int::ONE << (F::SIG_BITS - 1)) + F::Int::ONE,
|
||||
(F::Int::ONE << F::SIG_BITS) - F::Int::ONE,
|
||||
]
|
||||
.iter()
|
||||
{
|
||||
for sign in [false, true].iter() {
|
||||
let $case = F::from_parts(*sign, *exponent, *significand);
|
||||
$inner
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn fuzz_float<F: Float, E: Fn(F)>(n: u32, f: E) {
|
||||
float_edge_cases!(F, case, {
|
||||
f(case);
|
||||
});
|
||||
|
||||
// random fuzzer
|
||||
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
|
||||
let mut x = F::ZERO;
|
||||
for _ in 0..n {
|
||||
fuzz_float_step(&mut rng, &mut x);
|
||||
f(x);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fuzz_float_2<F: Float, E: Fn(F, F)>(n: u32, f: E) {
|
||||
float_edge_cases!(F, case0, {
|
||||
float_edge_cases!(F, case1, {
|
||||
f(case0, case1);
|
||||
});
|
||||
});
|
||||
|
||||
// random fuzzer
|
||||
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
|
||||
let mut x = F::ZERO;
|
||||
let mut y = F::ZERO;
|
||||
for _ in 0..n {
|
||||
fuzz_float_step(&mut rng, &mut x);
|
||||
fuzz_float_step(&mut rng, &mut y);
|
||||
f(x, y)
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform an operation using builtin types if available, falling back to apfloat if not.
|
||||
#[macro_export]
|
||||
macro_rules! apfloat_fallback {
|
||||
(
|
||||
$float_ty:ty,
|
||||
// Type name in `rustc_apfloat::ieee`. Not a full path, it automatically gets the prefix.
|
||||
$apfloat_ty:ident,
|
||||
// Cfg expression for when builtin system operations should be used
|
||||
$sys_available:meta,
|
||||
// The expression to run. This expression may use `FloatTy` for its signature.
|
||||
// Optionally, the final conversion back to a float can be suppressed using
|
||||
// `=> no_convert` (for e.g. operations that return a bool).
|
||||
//
|
||||
// If the apfloat needs a different operation, it can be provided here.
|
||||
$op:expr $(=> $convert:ident)? $(; $apfloat_op:expr)?,
|
||||
// Arguments that get passed to `$op` after converting to a float
|
||||
$($arg:expr),+
|
||||
$(,)?
|
||||
) => {{
|
||||
#[cfg($sys_available)]
|
||||
let ret = {
|
||||
type FloatTy = $float_ty;
|
||||
$op( $($arg),+ )
|
||||
};
|
||||
|
||||
#[cfg(not($sys_available))]
|
||||
let ret = {
|
||||
use rustc_apfloat::Float;
|
||||
type FloatTy = rustc_apfloat::ieee::$apfloat_ty;
|
||||
|
||||
apfloat_fallback!(@inner
|
||||
fty: $float_ty,
|
||||
// Apply a conversion to `FloatTy` to each arg, then pass all args to `$op`
|
||||
op_res: $op( $(FloatTy::from_bits($arg.to_bits().into())),+ ),
|
||||
$(apfloat_op: $apfloat_op, )?
|
||||
$(conv_opts: $convert,)?
|
||||
args: $($arg),+
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}};
|
||||
|
||||
// Operations that do not need converting back to a float
|
||||
(@inner fty: $float_ty:ty, op_res: $val:expr, conv_opts: no_convert, args: $($_arg:expr),+) => {
|
||||
$val
|
||||
};
|
||||
|
||||
// Some apfloat operations return a `StatusAnd` that we need to extract the value from. This
|
||||
// is the default.
|
||||
(@inner fty: $float_ty:ty, op_res: $val:expr, args: $($_arg:expr),+) => {{
|
||||
// ignore the status, just get the value
|
||||
let unwrapped = $val.value;
|
||||
|
||||
<$float_ty>::from_bits(FloatTy::to_bits(unwrapped).try_into().unwrap())
|
||||
}};
|
||||
|
||||
// This is the case where we can't use the same expression for the default builtin and
|
||||
// nonstandard apfloat fallback (e.g. `as` casts in std are normal functions in apfloat, so
|
||||
// two separate expressions must be specified.
|
||||
(@inner
|
||||
fty: $float_ty:ty, op_res: $_val:expr,
|
||||
apfloat_op: $apfloat_op:expr, args: $($arg:expr),+
|
||||
) => {{
|
||||
$apfloat_op($($arg),+)
|
||||
}};
|
||||
}
|
||||
143
library/compiler-builtins/builtins-test/tests/addsub.rs
Normal file
143
library/compiler-builtins/builtins-test/tests/addsub.rs
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
#![allow(unused_macros)]
|
||||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::*;
|
||||
|
||||
mod int_addsub {
|
||||
use super::*;
|
||||
|
||||
macro_rules! sum {
|
||||
($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn_add() {
|
||||
use compiler_builtins::int::addsub::{$fn_add, $fn_sub};
|
||||
|
||||
fuzz_2(N, |x: $i, y: $i| {
|
||||
let add0 = x.wrapping_add(y);
|
||||
let sub0 = x.wrapping_sub(y);
|
||||
let add1: $i = $fn_add(x, y);
|
||||
let sub1: $i = $fn_sub(x, y);
|
||||
if add0 != add1 {
|
||||
panic!(
|
||||
"{}({}, {}): std: {}, builtins: {}",
|
||||
stringify!($fn_add), x, y, add0, add1
|
||||
);
|
||||
}
|
||||
if sub0 != sub1 {
|
||||
panic!(
|
||||
"{}({}, {}): std: {}, builtins: {}",
|
||||
stringify!($fn_sub), x, y, sub0, sub1
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! overflowing_sum {
|
||||
($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn_add() {
|
||||
use compiler_builtins::int::addsub::{$fn_add, $fn_sub};
|
||||
|
||||
fuzz_2(N, |x: $i, y: $i| {
|
||||
let (add0, add_o0)= x.overflowing_add(y);
|
||||
let (sub0, sub_o0)= x.overflowing_sub(y);
|
||||
let mut add_o1 = 0;
|
||||
let mut sub_o1 = 0;
|
||||
let add1: $i = $fn_add(x, y, &mut add_o1);
|
||||
let sub1: $i = $fn_sub(x, y, &mut sub_o1);
|
||||
if add0 != add1 || i32::from(add_o0) != add_o1 {
|
||||
panic!(
|
||||
"{}({}, {}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn_add), x, y, (add0, add_o0) , (add1, add_o1)
|
||||
);
|
||||
}
|
||||
if sub0 != sub1 || i32::from(sub_o0) != sub_o1 {
|
||||
panic!(
|
||||
"{}({}, {}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn_sub), x, y, (sub0, sub_o0) , (sub1, sub_o1)
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
// Integer addition and subtraction is very simple, so 100 fuzzing passes should be plenty.
|
||||
sum! {
|
||||
u128, __rust_u128_add, __rust_u128_sub;
|
||||
i128, __rust_i128_add, __rust_i128_sub;
|
||||
}
|
||||
|
||||
overflowing_sum! {
|
||||
u128, __rust_u128_addo, __rust_u128_subo;
|
||||
i128, __rust_i128_addo, __rust_i128_subo;
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! float_sum {
|
||||
($($f:ty, $fn_add:ident, $fn_sub:ident, $apfloat_ty:ident, $sys_available:meta);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn_add() {
|
||||
use core::ops::{Add, Sub};
|
||||
use compiler_builtins::float::{{add::$fn_add, sub::$fn_sub}, Float};
|
||||
|
||||
fuzz_float_2(N, |x: $f, y: $f| {
|
||||
let add0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Add::add, x, y);
|
||||
let sub0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Sub::sub, x, y);
|
||||
let add1: $f = $fn_add(x, y);
|
||||
let sub1: $f = $fn_sub(x, y);
|
||||
if !Float::eq_repr(add0, add1) {
|
||||
panic!(
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn_add), x, y, add0, add1
|
||||
);
|
||||
}
|
||||
if !Float::eq_repr(sub0, sub1) {
|
||||
panic!(
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn_sub), x, y, sub0, sub1
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
|
||||
mod float_addsub {
|
||||
use super::*;
|
||||
|
||||
float_sum! {
|
||||
f32, __addsf3, __subsf3, Single, all();
|
||||
f64, __adddf3, __subdf3, Double, all();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
mod float_addsub_f128 {
|
||||
use super::*;
|
||||
|
||||
float_sum! {
|
||||
f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
mod float_addsub_f128_ppc {
|
||||
use super::*;
|
||||
|
||||
float_sum! {
|
||||
f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
#![cfg(all(
|
||||
target_arch = "arm",
|
||||
not(any(target_env = "gnu", target_env = "musl")),
|
||||
target_os = "linux",
|
||||
feature = "mem"
|
||||
))]
|
||||
#![feature(compiler_builtins_lib)]
|
||||
#![no_std]
|
||||
|
||||
extern crate compiler_builtins;
|
||||
|
||||
// test runner
|
||||
extern crate utest_cortex_m_qemu;
|
||||
|
||||
// overrides `panic!`
|
||||
#[macro_use]
|
||||
extern crate utest_macros;
|
||||
|
||||
use core::mem;
|
||||
|
||||
macro_rules! panic {
|
||||
($($tt:tt)*) => {
|
||||
upanic!($($tt)*);
|
||||
};
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn __aeabi_memclr4(dest: *mut u8, n: usize);
|
||||
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
|
||||
}
|
||||
|
||||
struct Aligned {
|
||||
array: [u8; 8],
|
||||
_alignment: [u32; 0],
|
||||
}
|
||||
|
||||
impl Aligned {
|
||||
fn new() -> Self {
|
||||
Aligned {
|
||||
array: [0; 8],
|
||||
_alignment: [],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memclr4() {
|
||||
let mut aligned = Aligned::new();
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
|
||||
for n in 0..9 {
|
||||
unsafe {
|
||||
__aeabi_memset4(xs.as_mut_ptr(), n, 0xff);
|
||||
__aeabi_memclr4(xs.as_mut_ptr(), n);
|
||||
}
|
||||
|
||||
assert!(xs[0..n].iter().all(|x| *x == 0));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
#![cfg(all(
|
||||
target_arch = "arm",
|
||||
not(any(target_env = "gnu", target_env = "musl")),
|
||||
target_os = "linux",
|
||||
feature = "mem"
|
||||
))]
|
||||
#![feature(compiler_builtins_lib)]
|
||||
#![no_std]
|
||||
|
||||
extern crate compiler_builtins;
|
||||
|
||||
// test runner
|
||||
extern crate utest_cortex_m_qemu;
|
||||
|
||||
// overrides `panic!`
|
||||
#[macro_use]
|
||||
extern crate utest_macros;
|
||||
|
||||
macro_rules! panic {
|
||||
($($tt:tt)*) => {
|
||||
upanic!($($tt)*);
|
||||
};
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
|
||||
fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
|
||||
}
|
||||
|
||||
struct Aligned {
|
||||
array: [u8; 8],
|
||||
_alignment: [u32; 0],
|
||||
}
|
||||
|
||||
impl Aligned {
|
||||
fn new(array: [u8; 8]) -> Self {
|
||||
Aligned {
|
||||
array: array,
|
||||
_alignment: [],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memcpy() {
|
||||
let mut dest = [0; 4];
|
||||
let src = [0xde, 0xad, 0xbe, 0xef];
|
||||
|
||||
for n in 0..dest.len() {
|
||||
dest.copy_from_slice(&[0; 4]);
|
||||
|
||||
unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }
|
||||
|
||||
assert_eq!(&dest[0..n], &src[0..n])
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memcpy4() {
|
||||
let mut aligned = Aligned::new([0; 8]);
|
||||
let dest = &mut aligned.array;
|
||||
let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];
|
||||
|
||||
for n in 0..dest.len() {
|
||||
dest.copy_from_slice(&[0; 8]);
|
||||
|
||||
unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }
|
||||
|
||||
assert_eq!(&dest[0..n], &src[0..n])
|
||||
}
|
||||
}
|
||||
240
library/compiler-builtins/builtins-test/tests/aeabi_memset.rs
Normal file
240
library/compiler-builtins/builtins-test/tests/aeabi_memset.rs
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
#![cfg(all(
|
||||
target_arch = "arm",
|
||||
not(any(target_env = "gnu", target_env = "musl")),
|
||||
target_os = "linux",
|
||||
feature = "mem"
|
||||
))]
|
||||
#![feature(compiler_builtins_lib)]
|
||||
#![no_std]
|
||||
|
||||
extern crate compiler_builtins;
|
||||
|
||||
// test runner
|
||||
extern crate utest_cortex_m_qemu;
|
||||
|
||||
// overrides `panic!`
|
||||
#[macro_use]
|
||||
extern crate utest_macros;
|
||||
|
||||
use core::mem;
|
||||
|
||||
macro_rules! panic {
|
||||
($($tt:tt)*) => {
|
||||
upanic!($($tt)*);
|
||||
};
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
|
||||
}
|
||||
|
||||
struct Aligned {
|
||||
array: [u8; 8],
|
||||
_alignment: [u32; 0],
|
||||
}
|
||||
|
||||
impl Aligned {
|
||||
fn new(array: [u8; 8]) -> Self {
|
||||
Aligned {
|
||||
array: array,
|
||||
_alignment: [],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero() {
|
||||
let mut aligned = Aligned::new([0u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) }
|
||||
|
||||
assert_eq!(*xs, [0; 8]);
|
||||
|
||||
let mut aligned = Aligned::new([1u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) }
|
||||
|
||||
assert_eq!(*xs, [1; 8]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one() {
|
||||
let mut aligned = Aligned::new([0u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let n = 1;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]);
|
||||
|
||||
let mut aligned = Aligned::new([1u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 1, 1, 1, 1, 1, 1, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two() {
|
||||
let mut aligned = Aligned::new([0u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let n = 2;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]);
|
||||
|
||||
let mut aligned = Aligned::new([1u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 1, 1, 1, 1, 1, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn three() {
|
||||
let mut aligned = Aligned::new([0u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let n = 3;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]);
|
||||
|
||||
let mut aligned = Aligned::new([1u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 1, 1, 1, 1, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn four() {
|
||||
let mut aligned = Aligned::new([0u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let n = 4;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]);
|
||||
|
||||
let mut aligned = Aligned::new([1u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 1, 1, 1, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn five() {
|
||||
let mut aligned = Aligned::new([0u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let n = 5;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]);
|
||||
|
||||
let mut aligned = Aligned::new([1u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 1, 1, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn six() {
|
||||
let mut aligned = Aligned::new([0u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let n = 6;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]);
|
||||
|
||||
let mut aligned = Aligned::new([1u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seven() {
|
||||
let mut aligned = Aligned::new([0u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let n = 7;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]);
|
||||
|
||||
let mut aligned = Aligned::new([1u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eight() {
|
||||
let mut aligned = Aligned::new([0u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let n = 8;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
|
||||
|
||||
let mut aligned = Aligned::new([1u8; 8]);
|
||||
assert_eq!(mem::align_of_val(&aligned), 4);
|
||||
let xs = &mut aligned.array;
|
||||
let c = 0xdeadbeef;
|
||||
|
||||
unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) }
|
||||
|
||||
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
|
||||
}
|
||||
134
library/compiler-builtins/builtins-test/tests/big.rs
Normal file
134
library/compiler-builtins/builtins-test/tests/big.rs
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
use compiler_builtins::int::{HInt, MinInt, i256, u256};
|
||||
|
||||
const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
|
||||
|
||||
/// Print a `u256` as hex since we can't add format implementations
|
||||
fn hexu(v: u256) -> String {
|
||||
format!(
|
||||
"0x{:016x}{:016x}{:016x}{:016x}",
|
||||
v.0[3], v.0[2], v.0[1], v.0[0]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn widen_u128() {
|
||||
assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0]));
|
||||
assert_eq!(
|
||||
LOHI_SPLIT.widen(),
|
||||
u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn widen_i128() {
|
||||
assert_eq!((-1i128).widen(), u256::MAX.signed());
|
||||
assert_eq!(
|
||||
(LOHI_SPLIT as i128).widen(),
|
||||
i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX])
|
||||
);
|
||||
assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn widen_mul_u128() {
|
||||
let tests = [
|
||||
(u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])),
|
||||
(u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])),
|
||||
(u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])),
|
||||
(u128::MIN, u128::MIN, u256::ZERO),
|
||||
(1234, 0, u256::ZERO),
|
||||
(0, 1234, u256::ZERO),
|
||||
];
|
||||
|
||||
let mut errors = Vec::new();
|
||||
for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
|
||||
let res = a.widen_mul(b);
|
||||
let res_z = a.zero_widen_mul(b);
|
||||
assert_eq!(res, res_z);
|
||||
if res != exp {
|
||||
errors.push((i, a, b, exp, res));
|
||||
}
|
||||
}
|
||||
|
||||
for (i, a, b, exp, res) in &errors {
|
||||
eprintln!(
|
||||
"FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}",
|
||||
hexu(*exp),
|
||||
hexu(*res)
|
||||
);
|
||||
}
|
||||
assert!(errors.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_u128() {
|
||||
assert_eq!(!u256::ZERO, u256::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shr_u128() {
|
||||
let only_low = [
|
||||
1,
|
||||
u16::MAX.into(),
|
||||
u32::MAX.into(),
|
||||
u64::MAX.into(),
|
||||
u128::MAX,
|
||||
];
|
||||
|
||||
let mut errors = Vec::new();
|
||||
|
||||
for a in only_low {
|
||||
for perturb in 0..10 {
|
||||
let a = a.saturating_add(perturb);
|
||||
for shift in 0..128 {
|
||||
let res = a.widen() >> shift;
|
||||
let expected = (a >> shift).widen();
|
||||
if res != expected {
|
||||
errors.push((a.widen(), shift, res, expected));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let check = [
|
||||
(
|
||||
u256::MAX,
|
||||
1,
|
||||
u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]),
|
||||
),
|
||||
(
|
||||
u256::MAX,
|
||||
5,
|
||||
u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]),
|
||||
),
|
||||
(u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
|
||||
(u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
|
||||
(u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
|
||||
(u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
|
||||
(u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
|
||||
(u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
|
||||
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
|
||||
(u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
|
||||
(u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
|
||||
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
|
||||
(u256::MAX, 254, u256([0b11, 0, 0, 0])),
|
||||
(u256::MAX, 255, u256([1, 0, 0, 0])),
|
||||
];
|
||||
|
||||
for (input, shift, expected) in check {
|
||||
let res = input >> shift;
|
||||
if res != expected {
|
||||
errors.push((input, shift, res, expected));
|
||||
}
|
||||
}
|
||||
|
||||
for (a, b, res, expected) in &errors {
|
||||
eprintln!(
|
||||
"FAILURE: {} >> {b} = {} got {}",
|
||||
hexu(*a),
|
||||
hexu(*expected),
|
||||
hexu(*res),
|
||||
);
|
||||
}
|
||||
assert!(errors.is_empty());
|
||||
}
|
||||
184
library/compiler-builtins/builtins-test/tests/cmp.rs
Normal file
184
library/compiler-builtins/builtins-test/tests/cmp.rs
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
#![allow(unused_macros)]
|
||||
#![allow(unreachable_code)]
|
||||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::*;
|
||||
|
||||
mod float_comparisons {
|
||||
use super::*;
|
||||
|
||||
macro_rules! cmp {
|
||||
(
|
||||
$f:ty, $x:ident, $y:ident, $apfloat_ty:ident, $sys_available:meta,
|
||||
$($unordered_val:expr, $fn:ident);*;
|
||||
) => {
|
||||
$(
|
||||
let cmp0 = if apfloat_fallback!(
|
||||
$f, $apfloat_ty, $sys_available,
|
||||
|x: FloatTy| x.is_nan() => no_convert,
|
||||
$x
|
||||
) || apfloat_fallback!(
|
||||
$f, $apfloat_ty, $sys_available,
|
||||
|y: FloatTy| y.is_nan() => no_convert,
|
||||
$y
|
||||
)
|
||||
{
|
||||
$unordered_val
|
||||
} else if apfloat_fallback!(
|
||||
$f, $apfloat_ty, $sys_available,
|
||||
|x, y| x < y => no_convert,
|
||||
$x, $y
|
||||
) {
|
||||
-1
|
||||
} else if apfloat_fallback!(
|
||||
$f, $apfloat_ty, $sys_available,
|
||||
|x, y| x == y => no_convert,
|
||||
$x, $y
|
||||
) {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
let cmp1 = $fn($x, $y);
|
||||
if cmp0 != cmp1 {
|
||||
panic!(
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn), $x, $y, cmp0, cmp1
|
||||
);
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmp_f32() {
|
||||
use compiler_builtins::float::cmp::{
|
||||
__eqsf2, __gesf2, __gtsf2, __lesf2, __ltsf2, __nesf2, __unordsf2,
|
||||
};
|
||||
|
||||
fuzz_float_2(N, |x: f32, y: f32| {
|
||||
assert_eq!(__unordsf2(x, y) != 0, x.is_nan() || y.is_nan());
|
||||
cmp!(f32, x, y, Single, all(),
|
||||
1, __ltsf2;
|
||||
1, __lesf2;
|
||||
1, __eqsf2;
|
||||
-1, __gesf2;
|
||||
-1, __gtsf2;
|
||||
1, __nesf2;
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmp_f64() {
|
||||
use compiler_builtins::float::cmp::{
|
||||
__eqdf2, __gedf2, __gtdf2, __ledf2, __ltdf2, __nedf2, __unorddf2,
|
||||
};
|
||||
|
||||
fuzz_float_2(N, |x: f64, y: f64| {
|
||||
assert_eq!(__unorddf2(x, y) != 0, x.is_nan() || y.is_nan());
|
||||
cmp!(f64, x, y, Double, all(),
|
||||
1, __ltdf2;
|
||||
1, __ledf2;
|
||||
1, __eqdf2;
|
||||
-1, __gedf2;
|
||||
-1, __gtdf2;
|
||||
1, __nedf2;
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(f128_enabled)]
|
||||
fn cmp_f128() {
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
use compiler_builtins::float::cmp::{
|
||||
__eqkf2 as __eqtf2, __gekf2 as __getf2, __gtkf2 as __gttf2, __lekf2 as __letf2,
|
||||
__ltkf2 as __lttf2, __nekf2 as __netf2, __unordkf2 as __unordtf2,
|
||||
};
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
use compiler_builtins::float::cmp::{
|
||||
__eqtf2, __getf2, __gttf2, __letf2, __lttf2, __netf2, __unordtf2,
|
||||
};
|
||||
|
||||
fuzz_float_2(N, |x: f128, y: f128| {
|
||||
let x_is_nan = apfloat_fallback!(
|
||||
f128, Quad, not(feature = "no-sys-f128"),
|
||||
|x: FloatTy| x.is_nan() => no_convert,
|
||||
x
|
||||
);
|
||||
let y_is_nan = apfloat_fallback!(
|
||||
f128, Quad, not(feature = "no-sys-f128"),
|
||||
|x: FloatTy| x.is_nan() => no_convert,
|
||||
y
|
||||
);
|
||||
|
||||
assert_eq!(__unordtf2(x, y) != 0, x_is_nan || y_is_nan);
|
||||
|
||||
cmp!(f128, x, y, Quad, not(feature = "no-sys-f128"),
|
||||
1, __lttf2;
|
||||
1, __letf2;
|
||||
1, __eqtf2;
|
||||
-1, __getf2;
|
||||
-1, __gttf2;
|
||||
1, __netf2;
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
mod float_comparisons_arm {
|
||||
use super::*;
|
||||
|
||||
macro_rules! cmp2 {
|
||||
($x:ident, $y:ident, $($unordered_val:expr, $fn_std:expr, $fn_builtins:ident);*;) => {
|
||||
$(
|
||||
let cmp0: i32 = if $x.is_nan() || $y.is_nan() {
|
||||
$unordered_val
|
||||
} else {
|
||||
$fn_std as i32
|
||||
};
|
||||
let cmp1: i32 = $fn_builtins($x, $y);
|
||||
if cmp0 != cmp1 {
|
||||
panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1);
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmp_f32() {
|
||||
use compiler_builtins::float::cmp::{
|
||||
__aeabi_fcmpeq, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmple, __aeabi_fcmplt,
|
||||
};
|
||||
|
||||
fuzz_float_2(N, |x: f32, y: f32| {
|
||||
cmp2!(x, y,
|
||||
0, x < y, __aeabi_fcmplt;
|
||||
0, x <= y, __aeabi_fcmple;
|
||||
0, x == y, __aeabi_fcmpeq;
|
||||
0, x >= y, __aeabi_fcmpge;
|
||||
0, x > y, __aeabi_fcmpgt;
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmp_f64() {
|
||||
use compiler_builtins::float::cmp::{
|
||||
__aeabi_dcmpeq, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmple, __aeabi_dcmplt,
|
||||
};
|
||||
|
||||
fuzz_float_2(N, |x: f64, y: f64| {
|
||||
cmp2!(x, y,
|
||||
0, x < y, __aeabi_dcmplt;
|
||||
0, x <= y, __aeabi_dcmple;
|
||||
0, x == y, __aeabi_dcmpeq;
|
||||
0, x >= y, __aeabi_dcmpge;
|
||||
0, x > y, __aeabi_dcmpgt;
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
364
library/compiler-builtins/builtins-test/tests/conv.rs
Normal file
364
library/compiler-builtins/builtins-test/tests/conv.rs
Normal file
|
|
@ -0,0 +1,364 @@
|
|||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
#![cfg_attr(f16_enabled, feature(f16))]
|
||||
// makes configuration easier
|
||||
#![allow(unused_macros)]
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use builtins_test::*;
|
||||
use compiler_builtins::float::Float;
|
||||
use rustc_apfloat::{Float as _, FloatConvert as _};
|
||||
|
||||
mod i_to_f {
|
||||
use super::*;
|
||||
|
||||
macro_rules! i_to_f {
|
||||
($f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn() {
|
||||
use compiler_builtins::float::conv::$fn;
|
||||
use compiler_builtins::int::Int;
|
||||
|
||||
fuzz(N, |x: $i_ty| {
|
||||
let f0 = apfloat_fallback!(
|
||||
$f_ty, $apfloat_ty, $sys_available,
|
||||
|x| x as $f_ty;
|
||||
// When the builtin is not available, we need to use a different conversion
|
||||
// method (since apfloat doesn't support `as` casting).
|
||||
|x: $i_ty| {
|
||||
use compiler_builtins::int::MinInt;
|
||||
|
||||
let apf = if <$i_ty>::SIGNED {
|
||||
FloatTy::from_i128(x.try_into().unwrap()).value
|
||||
} else {
|
||||
FloatTy::from_u128(x.try_into().unwrap()).value
|
||||
};
|
||||
|
||||
<$f_ty>::from_bits(apf.to_bits())
|
||||
},
|
||||
x
|
||||
);
|
||||
let f1: $f_ty = $fn(x);
|
||||
|
||||
#[cfg($sys_available)] {
|
||||
// This makes sure that the conversion produced the best rounding possible, and does
|
||||
// this independent of `x as $into` rounding correctly.
|
||||
// This assumes that float to integer conversion is correct.
|
||||
let y_minus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_sub(1)) as $i_ty;
|
||||
let y = f1 as $i_ty;
|
||||
let y_plus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_add(1)) as $i_ty;
|
||||
let error_minus = <$i_ty as Int>::abs_diff(y_minus_ulp, x);
|
||||
let error = <$i_ty as Int>::abs_diff(y, x);
|
||||
let error_plus = <$i_ty as Int>::abs_diff(y_plus_ulp, x);
|
||||
|
||||
// The first two conditions check that none of the two closest float values are
|
||||
// strictly closer in representation to `x`. The second makes sure that rounding is
|
||||
// towards even significand if two float values are equally close to the integer.
|
||||
if error_minus < error
|
||||
|| error_plus < error
|
||||
|| ((error_minus == error || error_plus == error)
|
||||
&& ((f0.to_bits() & 1) != 0))
|
||||
{
|
||||
if !cfg!(any(
|
||||
target_arch = "powerpc",
|
||||
target_arch = "powerpc64"
|
||||
)) {
|
||||
panic!(
|
||||
"incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
|
||||
stringify!($fn),
|
||||
x,
|
||||
f1.to_bits(),
|
||||
y_minus_ulp,
|
||||
y,
|
||||
y_plus_ulp,
|
||||
error_minus,
|
||||
error,
|
||||
error_plus,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test against native conversion. We disable testing on all `x86` because of
|
||||
// rounding bugs with `i686`. `powerpc` also has the same rounding bug.
|
||||
if !Float::eq_repr(f0, f1) && !cfg!(any(
|
||||
target_arch = "x86",
|
||||
target_arch = "powerpc",
|
||||
target_arch = "powerpc64"
|
||||
)) {
|
||||
panic!(
|
||||
"{}({}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn),
|
||||
x,
|
||||
f0,
|
||||
f1,
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
i_to_f! { f32, Single, all(),
|
||||
u32, __floatunsisf;
|
||||
i32, __floatsisf;
|
||||
u64, __floatundisf;
|
||||
i64, __floatdisf;
|
||||
u128, __floatuntisf;
|
||||
i128, __floattisf;
|
||||
}
|
||||
|
||||
i_to_f! { f64, Double, all(),
|
||||
u32, __floatunsidf;
|
||||
i32, __floatsidf;
|
||||
u64, __floatundidf;
|
||||
i64, __floatdidf;
|
||||
u128, __floatuntidf;
|
||||
i128, __floattidf;
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
|
||||
u32, __floatunsitf;
|
||||
i32, __floatsitf;
|
||||
u64, __floatunditf;
|
||||
i64, __floatditf;
|
||||
u128, __floatuntitf;
|
||||
i128, __floattitf;
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
|
||||
u32, __floatunsikf;
|
||||
i32, __floatsikf;
|
||||
u64, __floatundikf;
|
||||
i64, __floatdikf;
|
||||
u128, __floatuntikf;
|
||||
i128, __floattikf;
|
||||
}
|
||||
}
|
||||
|
||||
mod f_to_i {
|
||||
use super::*;
|
||||
|
||||
macro_rules! f_to_i {
|
||||
($x:ident, $f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => {
|
||||
$(
|
||||
// it is undefined behavior in the first place to do conversions with NaNs
|
||||
if !apfloat_fallback!(
|
||||
$f_ty, $apfloat_ty, $sys_available, |x: FloatTy| x.is_nan() => no_convert, $x
|
||||
) {
|
||||
let conv0 = apfloat_fallback!(
|
||||
$f_ty, $apfloat_ty, $sys_available,
|
||||
// Use an `as` cast when the builtin is available on the system.
|
||||
|x| x as $i_ty;
|
||||
// When the builtin is not available, we need to use a different conversion
|
||||
// method (since apfloat doesn't support `as` casting).
|
||||
|x: $f_ty| {
|
||||
use compiler_builtins::int::MinInt;
|
||||
|
||||
let apf = FloatTy::from_bits(x.to_bits().into());
|
||||
let bits: usize = <$i_ty>::BITS.try_into().unwrap();
|
||||
|
||||
let err_fn = || panic!(
|
||||
"Unable to convert value {x:?} to type {}:", stringify!($i_ty)
|
||||
);
|
||||
|
||||
if <$i_ty>::SIGNED {
|
||||
<$i_ty>::try_from(apf.to_i128(bits).value).ok().unwrap_or_else(err_fn)
|
||||
} else {
|
||||
<$i_ty>::try_from(apf.to_u128(bits).value).ok().unwrap_or_else(err_fn)
|
||||
}
|
||||
},
|
||||
$x
|
||||
);
|
||||
let conv1: $i_ty = $fn($x);
|
||||
if conv0 != conv1 {
|
||||
panic!("{}({:?}): std: {:?}, builtins: {:?}", stringify!($fn), $x, conv0, conv1);
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f32_to_int() {
|
||||
use compiler_builtins::float::conv::{
|
||||
__fixsfdi, __fixsfsi, __fixsfti, __fixunssfdi, __fixunssfsi, __fixunssfti,
|
||||
};
|
||||
|
||||
fuzz_float(N, |x: f32| {
|
||||
f_to_i!(x, f32, Single, all(),
|
||||
u32, __fixunssfsi;
|
||||
u64, __fixunssfdi;
|
||||
u128, __fixunssfti;
|
||||
i32, __fixsfsi;
|
||||
i64, __fixsfdi;
|
||||
i128, __fixsfti;
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f64_to_int() {
|
||||
use compiler_builtins::float::conv::{
|
||||
__fixdfdi, __fixdfsi, __fixdfti, __fixunsdfdi, __fixunsdfsi, __fixunsdfti,
|
||||
};
|
||||
|
||||
fuzz_float(N, |x: f64| {
|
||||
f_to_i!(x, f64, Double, all(),
|
||||
u32, __fixunsdfsi;
|
||||
u64, __fixunsdfdi;
|
||||
u128, __fixunsdfti;
|
||||
i32, __fixdfsi;
|
||||
i64, __fixdfdi;
|
||||
i128, __fixdfti;
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(f128_enabled)]
|
||||
fn f128_to_int() {
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
use compiler_builtins::float::conv::{
|
||||
__fixkfdi as __fixtfdi, __fixkfsi as __fixtfsi, __fixkfti as __fixtfti,
|
||||
__fixunskfdi as __fixunstfdi, __fixunskfsi as __fixunstfsi,
|
||||
__fixunskfti as __fixunstfti,
|
||||
};
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
use compiler_builtins::float::conv::{
|
||||
__fixtfdi, __fixtfsi, __fixtfti, __fixunstfdi, __fixunstfsi, __fixunstfti,
|
||||
};
|
||||
|
||||
fuzz_float(N, |x: f128| {
|
||||
f_to_i!(
|
||||
x,
|
||||
f128,
|
||||
Quad,
|
||||
not(feature = "no-sys-f128-int-convert"),
|
||||
u32, __fixunstfsi;
|
||||
u64, __fixunstfdi;
|
||||
u128, __fixunstfti;
|
||||
i32, __fixtfsi;
|
||||
i64, __fixtfdi;
|
||||
i128, __fixtfti;
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! f_to_f {
|
||||
(
|
||||
$mod:ident,
|
||||
$(
|
||||
$from_ty:ty => $to_ty:ty,
|
||||
$from_ap_ty:ident => $to_ap_ty:ident,
|
||||
$fn:ident, $sys_available:meta
|
||||
);+;
|
||||
) => {$(
|
||||
#[test]
|
||||
fn $fn() {
|
||||
use compiler_builtins::float::{$mod::$fn, Float};
|
||||
use rustc_apfloat::ieee::{$from_ap_ty, $to_ap_ty};
|
||||
|
||||
fuzz_float(N, |x: $from_ty| {
|
||||
let tmp0: $to_ty = apfloat_fallback!(
|
||||
$from_ty,
|
||||
$from_ap_ty,
|
||||
$sys_available,
|
||||
|x: $from_ty| x as $to_ty;
|
||||
|x: $from_ty| {
|
||||
let from_apf = FloatTy::from_bits(x.to_bits().into());
|
||||
// Get `value` directly to ignore INVALID_OP
|
||||
let to_apf: $to_ap_ty = from_apf.convert(&mut false).value;
|
||||
<$to_ty>::from_bits(to_apf.to_bits().try_into().unwrap())
|
||||
},
|
||||
x
|
||||
);
|
||||
let tmp1: $to_ty = $fn(x);
|
||||
|
||||
if !Float::eq_repr(tmp0, tmp1) {
|
||||
panic!(
|
||||
"{}({:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn),
|
||||
x,
|
||||
tmp0,
|
||||
tmp1
|
||||
);
|
||||
}
|
||||
})
|
||||
}
|
||||
)+};
|
||||
}
|
||||
|
||||
mod extend {
|
||||
use super::*;
|
||||
|
||||
f_to_f! {
|
||||
extend,
|
||||
f32 => f64, Single => Double, __extendsfdf2, all();
|
||||
}
|
||||
|
||||
#[cfg(all(f16_enabled, f128_enabled))]
|
||||
#[cfg(not(any(
|
||||
target_arch = "powerpc",
|
||||
target_arch = "powerpc64",
|
||||
target_arch = "loongarch64"
|
||||
)))]
|
||||
f_to_f! {
|
||||
extend,
|
||||
f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16");
|
||||
f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16");
|
||||
f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert");
|
||||
f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert");
|
||||
f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128");
|
||||
f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128");
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
f_to_f! {
|
||||
extend,
|
||||
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
|
||||
f32 => f128, Single => Quad, __extendsfkf2, not(feature = "no-sys-f128");
|
||||
f64 => f128, Double => Quad, __extenddfkf2, not(feature = "no-sys-f128");
|
||||
}
|
||||
}
|
||||
|
||||
mod trunc {
|
||||
use super::*;
|
||||
|
||||
f_to_f! {
|
||||
trunc,
|
||||
f64 => f32, Double => Single, __truncdfsf2, all();
|
||||
}
|
||||
|
||||
#[cfg(all(f16_enabled, f128_enabled))]
|
||||
#[cfg(not(any(
|
||||
target_arch = "powerpc",
|
||||
target_arch = "powerpc64",
|
||||
target_arch = "loongarch64"
|
||||
)))]
|
||||
f_to_f! {
|
||||
trunc,
|
||||
f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16");
|
||||
f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16");
|
||||
f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert");
|
||||
f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert");
|
||||
f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128");
|
||||
f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128");
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
f_to_f! {
|
||||
trunc,
|
||||
// FIXME(#655): `f16` tests disabled until we can bootstrap symbols
|
||||
f128 => f32, Quad => Single, __trunckfsf2, not(feature = "no-sys-f128");
|
||||
f128 => f64, Quad => Double, __trunckfdf2, not(feature = "no-sys-f128");
|
||||
}
|
||||
}
|
||||
164
library/compiler-builtins/builtins-test/tests/div_rem.rs
Normal file
164
library/compiler-builtins/builtins-test/tests/div_rem.rs
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
#![feature(f128)]
|
||||
#![allow(unused_macros)]
|
||||
|
||||
use builtins_test::*;
|
||||
use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
|
||||
use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc};
|
||||
|
||||
// Division algorithms have by far the nastiest and largest number of edge cases, and experience shows
|
||||
// that sometimes 100_000 iterations of the random fuzzer is needed.
|
||||
|
||||
/// Creates intensive test functions for division functions of a certain size
|
||||
macro_rules! test {
|
||||
(
|
||||
$n:expr, // the number of bits in a $iX or $uX
|
||||
$uX:ident, // unsigned integer that will be shifted
|
||||
$iX:ident, // signed version of $uX
|
||||
$test_name:ident, // name of the test function
|
||||
$unsigned_name:ident, // unsigned division function
|
||||
$signed_name:ident // signed division function
|
||||
) => {
|
||||
#[test]
|
||||
fn $test_name() {
|
||||
fuzz_2(N, |lhs, rhs| {
|
||||
if rhs == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut rem: $uX = 0;
|
||||
let quo: $uX = $unsigned_name(lhs, rhs, Some(&mut rem));
|
||||
if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) {
|
||||
panic!(
|
||||
"unsigned division function failed with lhs:{} rhs:{} \
|
||||
std:({}, {}) builtins:({}, {})",
|
||||
lhs,
|
||||
rhs,
|
||||
lhs.wrapping_div(rhs),
|
||||
lhs.wrapping_rem(rhs),
|
||||
quo,
|
||||
rem
|
||||
);
|
||||
}
|
||||
|
||||
// test the signed division function also
|
||||
let lhs = lhs as $iX;
|
||||
let rhs = rhs as $iX;
|
||||
let mut rem: $iX = 0;
|
||||
let quo: $iX = $signed_name(lhs, rhs, &mut rem);
|
||||
// We cannot just test that
|
||||
// `lhs == rhs.wrapping_mul(quo).wrapping_add(rem)`, but also
|
||||
// need to make sure the remainder isn't larger than the divisor
|
||||
// and has the correct sign.
|
||||
let incorrect_rem = if rem == 0 {
|
||||
false
|
||||
} else if rhs == $iX::MIN {
|
||||
// `rhs.wrapping_abs()` would overflow, so handle this case
|
||||
// separately.
|
||||
(lhs.is_negative() != rem.is_negative()) || (rem == $iX::MIN)
|
||||
} else {
|
||||
(lhs.is_negative() != rem.is_negative())
|
||||
|| (rhs.wrapping_abs() <= rem.wrapping_abs())
|
||||
};
|
||||
if incorrect_rem || lhs != rhs.wrapping_mul(quo).wrapping_add(rem) {
|
||||
panic!(
|
||||
"signed division function failed with lhs:{} rhs:{} \
|
||||
std:({}, {}) builtins:({}, {})",
|
||||
lhs,
|
||||
rhs,
|
||||
lhs.wrapping_div(rhs),
|
||||
lhs.wrapping_rem(rhs),
|
||||
quo,
|
||||
rem
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test!(32, u32, i32, div_rem_si4, __udivmodsi4, __divmodsi4);
|
||||
test!(64, u64, i64, div_rem_di4, __udivmoddi4, __divmoddi4);
|
||||
test!(128, u128, i128, div_rem_ti4, __udivmodti4, __divmodti4);
|
||||
|
||||
#[test]
|
||||
fn divide_sparc() {
|
||||
fuzz_2(N, |lhs, rhs| {
|
||||
if rhs == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut rem: u128 = 0;
|
||||
let quo: u128 = u128_divide_sparc(lhs, rhs, &mut rem);
|
||||
if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) {
|
||||
panic!(
|
||||
"u128_divide_sparc({}, {}): \
|
||||
std:({}, {}), builtins:({}, {})",
|
||||
lhs,
|
||||
rhs,
|
||||
lhs.wrapping_div(rhs),
|
||||
lhs.wrapping_rem(rhs),
|
||||
quo,
|
||||
rem
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
macro_rules! float {
|
||||
($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn() {
|
||||
use compiler_builtins::float::{div::$fn, Float};
|
||||
use core::ops::Div;
|
||||
|
||||
fuzz_float_2(N, |x: $f, y: $f| {
|
||||
let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y);
|
||||
let quo1: $f = $fn(x, y);
|
||||
|
||||
// ARM SIMD instructions always flush subnormals to zero
|
||||
if cfg!(target_arch = "arm") &&
|
||||
((Float::is_subnormal(quo0)) || Float::is_subnormal(quo1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if !Float::eq_repr(quo0, quo1) {
|
||||
panic!(
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn),
|
||||
x,
|
||||
y,
|
||||
quo0,
|
||||
quo1
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
|
||||
mod float_div {
|
||||
use super::*;
|
||||
|
||||
float! {
|
||||
f32, __divsf3, Single, all();
|
||||
f64, __divdf3, Double, all();
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
float! {
|
||||
f128, __divtf3, Quad,
|
||||
// FIXME(llvm): there is a bug in LLVM rt.
|
||||
// See <https://github.com/llvm/llvm-project/issues/91840>.
|
||||
not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
float! {
|
||||
f128, __divkf3, Quad, not(feature = "no-sys-f128");
|
||||
}
|
||||
}
|
||||
72
library/compiler-builtins/builtins-test/tests/float_pow.rs
Normal file
72
library/compiler-builtins/builtins-test/tests/float_pow.rs
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
#![allow(unused_macros)]
|
||||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
#![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
|
||||
|
||||
use builtins_test::*;
|
||||
|
||||
// This is approximate because of issues related to
|
||||
// https://github.com/rust-lang/rust/issues/73920.
|
||||
// TODO how do we resolve this indeterminacy?
|
||||
macro_rules! pow {
|
||||
($($f:ty, $tolerance:expr, $fn:ident, $sys_available:meta);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
// FIXME(apfloat): We skip tests if system symbols aren't available rather
|
||||
// than providing a fallback, since `rustc_apfloat` does not provide `pow`.
|
||||
#[cfg($sys_available)]
|
||||
fn $fn() {
|
||||
use compiler_builtins::float::pow::$fn;
|
||||
use compiler_builtins::float::Float;
|
||||
fuzz_float_2(N, |x: $f, y: $f| {
|
||||
if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) {
|
||||
let n = y.to_bits() & !<$f as Float>::SIG_MASK;
|
||||
let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIG_BITS;
|
||||
let n = n as i32;
|
||||
let tmp0: $f = x.powi(n);
|
||||
let tmp1: $f = $fn(x, n);
|
||||
let (a, b) = if tmp0 < tmp1 {
|
||||
(tmp0, tmp1)
|
||||
} else {
|
||||
(tmp1, tmp0)
|
||||
};
|
||||
|
||||
let good = if a == b {
|
||||
// handles infinity equality
|
||||
true
|
||||
} else if a < $tolerance {
|
||||
b < $tolerance
|
||||
} else {
|
||||
let quo = b / a;
|
||||
(quo < (1. + $tolerance)) && (quo > (1. - $tolerance))
|
||||
};
|
||||
|
||||
assert!(
|
||||
good,
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn), x, n, tmp0, tmp1
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
pow! {
|
||||
f32, 1e-4, __powisf2, all();
|
||||
f64, 1e-12, __powidf2, all();
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
// FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly.
|
||||
#[cfg(not(target_env = "msvc"))]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pow! {
|
||||
f128, 1e-36, __powitf2, not(feature = "no-sys-f128");
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
pow! {
|
||||
f128, 1e-36, __powikf2, not(feature = "no-sys-f128");
|
||||
}
|
||||
97
library/compiler-builtins/builtins-test/tests/lse.rs
Normal file
97
library/compiler-builtins/builtins-test/tests/lse.rs
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
#![feature(decl_macro)] // so we can use pub(super)
|
||||
#![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))]
|
||||
|
||||
/// Translate a byte size to a Rust type.
|
||||
macro int_ty {
|
||||
(1) => { i8 },
|
||||
(2) => { i16 },
|
||||
(4) => { i32 },
|
||||
(8) => { i64 },
|
||||
(16) => { i128 }
|
||||
}
|
||||
|
||||
mod cas {
|
||||
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
|
||||
#[test]
|
||||
fn $name() {
|
||||
builtins_test::fuzz_2(10000, |expected: super::int_ty!($bytes), new| {
|
||||
let mut target = expected.wrapping_add(10);
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target)
|
||||
},
|
||||
expected.wrapping_add(10),
|
||||
"return value should always be the previous value",
|
||||
);
|
||||
assert_eq!(
|
||||
target,
|
||||
expected.wrapping_add(10),
|
||||
"shouldn't have changed target"
|
||||
);
|
||||
|
||||
target = expected;
|
||||
assert_eq!(
|
||||
unsafe {
|
||||
compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target)
|
||||
},
|
||||
expected
|
||||
);
|
||||
assert_eq!(target, new, "should have updated target");
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro test_cas16($_ordering:ident, $name:ident) {
|
||||
cas::test!($_ordering, 16, $name);
|
||||
}
|
||||
|
||||
mod swap {
|
||||
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
|
||||
#[test]
|
||||
fn $name() {
|
||||
builtins_test::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| {
|
||||
let orig_right = right;
|
||||
assert_eq!(
|
||||
unsafe { compiler_builtins::aarch64_linux::$name::$name(left, &mut right) },
|
||||
orig_right
|
||||
);
|
||||
assert_eq!(left, right);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! test_op {
|
||||
($mod:ident, $( $op:tt )* ) => {
|
||||
mod $mod {
|
||||
pub(super) macro test {
|
||||
($_ordering:ident, $bytes:tt, $name:ident) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
builtins_test::fuzz_2(10000, |old, val| {
|
||||
let mut target = old;
|
||||
let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*;
|
||||
let expected = op(old, val);
|
||||
assert_eq!(old, unsafe { compiler_builtins::aarch64_linux::$name::$name(val, &mut target) }, "{} should return original value", stringify!($name));
|
||||
assert_eq!(expected, target, "{} should store to target", stringify!($name));
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_op!(add, |left, right| left.wrapping_add(right));
|
||||
test_op!(clr, |left, right| left & !right);
|
||||
test_op!(xor, std::ops::BitXor::bitxor);
|
||||
test_op!(or, std::ops::BitOr::bitor);
|
||||
|
||||
compiler_builtins::foreach_cas!(cas::test);
|
||||
compiler_builtins::foreach_cas16!(test_cas16);
|
||||
compiler_builtins::foreach_swp!(swap::test);
|
||||
compiler_builtins::foreach_ldadd!(add::test);
|
||||
compiler_builtins::foreach_ldclr!(clr::test);
|
||||
compiler_builtins::foreach_ldeor!(xor::test);
|
||||
compiler_builtins::foreach_ldset!(or::test);
|
||||
286
library/compiler-builtins/builtins-test/tests/mem.rs
Normal file
286
library/compiler-builtins/builtins-test/tests/mem.rs
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
extern crate compiler_builtins;
|
||||
use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
|
||||
|
||||
const WORD_SIZE: usize = core::mem::size_of::<usize>();
|
||||
|
||||
#[test]
|
||||
fn memcpy_3() {
|
||||
let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
|
||||
unsafe {
|
||||
let src = arr.as_ptr().offset(9);
|
||||
let dst = arr.as_mut_ptr().offset(1);
|
||||
assert_eq!(memcpy(dst, src, 3), dst);
|
||||
assert_eq!(arr, [0, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11]);
|
||||
}
|
||||
arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
|
||||
unsafe {
|
||||
let src = arr.as_ptr().offset(1);
|
||||
let dst = arr.as_mut_ptr().offset(9);
|
||||
assert_eq!(memcpy(dst, src, 3), dst);
|
||||
assert_eq!(arr, [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memcpy_10() {
|
||||
let arr: [u8; 18] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
|
||||
let mut dst: [u8; 12] = [0; 12];
|
||||
unsafe {
|
||||
let src = arr.as_ptr().offset(1);
|
||||
assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr());
|
||||
assert_eq!(dst, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0]);
|
||||
}
|
||||
unsafe {
|
||||
let src = arr.as_ptr().offset(8);
|
||||
assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr());
|
||||
assert_eq!(dst, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0, 0]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memcpy_big() {
|
||||
// Make the arrays cross 3 pages
|
||||
const SIZE: usize = 8193;
|
||||
let src: [u8; SIZE] = [22; SIZE];
|
||||
struct Dst {
|
||||
start: usize,
|
||||
buf: [u8; SIZE],
|
||||
end: usize,
|
||||
}
|
||||
|
||||
let mut dst = Dst {
|
||||
start: 0,
|
||||
buf: [0; SIZE],
|
||||
end: 0,
|
||||
};
|
||||
unsafe {
|
||||
assert_eq!(
|
||||
memcpy(dst.buf.as_mut_ptr(), src.as_ptr(), SIZE),
|
||||
dst.buf.as_mut_ptr()
|
||||
);
|
||||
assert_eq!(dst.start, 0);
|
||||
assert_eq!(dst.buf, [22; SIZE]);
|
||||
assert_eq!(dst.end, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memmove_forward() {
|
||||
let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
|
||||
unsafe {
|
||||
let src = arr.as_ptr().offset(6);
|
||||
let dst = arr.as_mut_ptr().offset(3);
|
||||
assert_eq!(memmove(dst, src, 5), dst);
|
||||
assert_eq!(arr, [0, 1, 2, 6, 7, 8, 9, 10, 8, 9, 10, 11]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memmove_backward() {
|
||||
let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
|
||||
unsafe {
|
||||
let src = arr.as_ptr().offset(3);
|
||||
let dst = arr.as_mut_ptr().offset(6);
|
||||
assert_eq!(memmove(dst, src, 5), dst);
|
||||
assert_eq!(arr, [0, 1, 2, 3, 4, 5, 3, 4, 5, 6, 7, 11]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memset_zero() {
|
||||
let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
|
||||
unsafe {
|
||||
let ptr = arr.as_mut_ptr().offset(5);
|
||||
assert_eq!(memset(ptr, 0, 2), ptr);
|
||||
assert_eq!(arr, [0, 1, 2, 3, 4, 0, 0, 7]);
|
||||
|
||||
// Only the LSB matters for a memset
|
||||
assert_eq!(memset(arr.as_mut_ptr(), 0x2000, 8), arr.as_mut_ptr());
|
||||
assert_eq!(arr, [0, 0, 0, 0, 0, 0, 0, 0]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memset_nonzero() {
|
||||
let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
|
||||
unsafe {
|
||||
let ptr = arr.as_mut_ptr().offset(2);
|
||||
assert_eq!(memset(ptr, 22, 3), ptr);
|
||||
assert_eq!(arr, [0, 1, 22, 22, 22, 5, 6, 7]);
|
||||
|
||||
// Only the LSB matters for a memset
|
||||
assert_eq!(memset(arr.as_mut_ptr(), 0x2009, 8), arr.as_mut_ptr());
|
||||
assert_eq!(arr, [9, 9, 9, 9, 9, 9, 9, 9]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memcmp_eq() {
|
||||
let arr1 @ arr2 = gen_arr::<256>();
|
||||
for i in 0..256 {
|
||||
unsafe {
|
||||
assert_eq!(memcmp(arr1.0.as_ptr(), arr2.0.as_ptr(), i), 0);
|
||||
assert_eq!(memcmp(arr2.0.as_ptr(), arr1.0.as_ptr(), i), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memcmp_ne() {
|
||||
let arr1 @ arr2 = gen_arr::<256>();
|
||||
// Reduce iteration count in Miri as it is too slow otherwise.
|
||||
let limit = if cfg!(miri) { 64 } else { 256 };
|
||||
for i in 0..limit {
|
||||
let mut diff_arr = arr1;
|
||||
diff_arr.0[i] = 127;
|
||||
let expect = diff_arr.0[i].cmp(&arr2.0[i]);
|
||||
for k in i + 1..limit {
|
||||
let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) };
|
||||
assert_eq!(expect, result.cmp(&0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct AlignedStorage<const N: usize>([u8; N], [usize; 0]);
|
||||
|
||||
fn gen_arr<const N: usize>() -> AlignedStorage<N> {
|
||||
let mut ret = AlignedStorage::<N>([0; N], []);
|
||||
for i in 0..N {
|
||||
ret.0[i] = i as u8;
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memmove_forward_misaligned_nonaligned_start() {
|
||||
let mut arr = gen_arr::<32>();
|
||||
let mut reference = arr;
|
||||
unsafe {
|
||||
let src = arr.0.as_ptr().offset(6);
|
||||
let dst = arr.0.as_mut_ptr().offset(3);
|
||||
assert_eq!(memmove(dst, src, 17), dst);
|
||||
reference.0.copy_within(6..6 + 17, 3);
|
||||
assert_eq!(arr.0, reference.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memmove_forward_misaligned_aligned_start() {
|
||||
let mut arr = gen_arr::<32>();
|
||||
let mut reference = arr;
|
||||
unsafe {
|
||||
let src = arr.0.as_ptr().offset(6);
|
||||
let dst = arr.0.as_mut_ptr().add(0);
|
||||
assert_eq!(memmove(dst, src, 17), dst);
|
||||
reference.0.copy_within(6..6 + 17, 0);
|
||||
assert_eq!(arr.0, reference.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memmove_forward_aligned() {
|
||||
let mut arr = gen_arr::<32>();
|
||||
let mut reference = arr;
|
||||
unsafe {
|
||||
let src = arr.0.as_ptr().add(3 + WORD_SIZE);
|
||||
let dst = arr.0.as_mut_ptr().add(3);
|
||||
assert_eq!(memmove(dst, src, 17), dst);
|
||||
reference
|
||||
.0
|
||||
.copy_within(3 + WORD_SIZE..3 + WORD_SIZE + 17, 3);
|
||||
assert_eq!(arr.0, reference.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memmove_backward_misaligned_nonaligned_start() {
|
||||
let mut arr = gen_arr::<32>();
|
||||
let mut reference = arr;
|
||||
unsafe {
|
||||
let src = arr.0.as_ptr().offset(3);
|
||||
let dst = arr.0.as_mut_ptr().offset(6);
|
||||
assert_eq!(memmove(dst, src, 17), dst);
|
||||
reference.0.copy_within(3..3 + 17, 6);
|
||||
assert_eq!(arr.0, reference.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memmove_backward_misaligned_aligned_start() {
|
||||
let mut arr = gen_arr::<32>();
|
||||
let mut reference = arr;
|
||||
unsafe {
|
||||
let src = arr.0.as_ptr().offset(3);
|
||||
let dst = arr.0.as_mut_ptr().add(WORD_SIZE);
|
||||
assert_eq!(memmove(dst, src, 17), dst);
|
||||
reference.0.copy_within(3..3 + 17, WORD_SIZE);
|
||||
assert_eq!(arr.0, reference.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memmove_backward_aligned() {
|
||||
let mut arr = gen_arr::<32>();
|
||||
let mut reference = arr;
|
||||
unsafe {
|
||||
let src = arr.0.as_ptr().add(3);
|
||||
let dst = arr.0.as_mut_ptr().add(3 + WORD_SIZE);
|
||||
assert_eq!(memmove(dst, src, 17), dst);
|
||||
reference.0.copy_within(3..3 + 17, 3 + WORD_SIZE);
|
||||
assert_eq!(arr.0, reference.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memmove_misaligned_bounds() {
|
||||
// The above test have the downside that the addresses surrounding the range-to-copy are all
|
||||
// still in-bounds, so Miri would not actually complain about OOB accesses. So we also test with
|
||||
// an array that has just the right size. We test a few times to avoid it being accidentally
|
||||
// aligned.
|
||||
for _ in 0..8 {
|
||||
let mut arr1 = [0u8; 17];
|
||||
let mut arr2 = [0u8; 17];
|
||||
unsafe {
|
||||
// Copy both ways so we hit both the forward and backward cases.
|
||||
memmove(arr1.as_mut_ptr(), arr2.as_mut_ptr(), 17);
|
||||
memmove(arr2.as_mut_ptr(), arr1.as_mut_ptr(), 17);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memset_backward_misaligned_nonaligned_start() {
|
||||
let mut arr = gen_arr::<32>();
|
||||
let mut reference = arr;
|
||||
unsafe {
|
||||
let ptr = arr.0.as_mut_ptr().offset(6);
|
||||
assert_eq!(memset(ptr, 0xCC, 17), ptr);
|
||||
core::ptr::write_bytes(reference.0.as_mut_ptr().add(6), 0xCC, 17);
|
||||
assert_eq!(arr.0, reference.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memset_backward_misaligned_aligned_start() {
|
||||
let mut arr = gen_arr::<32>();
|
||||
let mut reference = arr;
|
||||
unsafe {
|
||||
let ptr = arr.0.as_mut_ptr().add(WORD_SIZE);
|
||||
assert_eq!(memset(ptr, 0xCC, 17), ptr);
|
||||
core::ptr::write_bytes(reference.0.as_mut_ptr().add(WORD_SIZE), 0xCC, 17);
|
||||
assert_eq!(arr.0, reference.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memset_backward_aligned() {
|
||||
let mut arr = gen_arr::<32>();
|
||||
let mut reference = arr;
|
||||
unsafe {
|
||||
let ptr = arr.0.as_mut_ptr().add(3 + WORD_SIZE);
|
||||
assert_eq!(memset(ptr, 0xCC, 17), ptr);
|
||||
core::ptr::write_bytes(reference.0.as_mut_ptr().add(3 + WORD_SIZE), 0xCC, 17);
|
||||
assert_eq!(arr.0, reference.0);
|
||||
}
|
||||
}
|
||||
202
library/compiler-builtins/builtins-test/tests/misc.rs
Normal file
202
library/compiler-builtins/builtins-test/tests/misc.rs
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
// makes configuration easier
|
||||
#![allow(unused_macros)]
|
||||
|
||||
use builtins_test::*;
|
||||
|
||||
/// Make sure that the the edge case tester and randomized tester don't break, and list examples of
|
||||
/// fuzz values for documentation purposes.
|
||||
#[test]
|
||||
fn fuzz_values() {
|
||||
const VALS: [u16; 47] = [
|
||||
0b0, // edge cases
|
||||
0b1111111111111111,
|
||||
0b1111111111111110,
|
||||
0b1111111111111100,
|
||||
0b1111111110000000,
|
||||
0b1111111100000000,
|
||||
0b1110000000000000,
|
||||
0b1100000000000000,
|
||||
0b1000000000000000,
|
||||
0b111111111111111,
|
||||
0b111111111111110,
|
||||
0b111111111111100,
|
||||
0b111111110000000,
|
||||
0b111111100000000,
|
||||
0b110000000000000,
|
||||
0b100000000000000,
|
||||
0b11111111111111,
|
||||
0b11111111111110,
|
||||
0b11111111111100,
|
||||
0b11111110000000,
|
||||
0b11111100000000,
|
||||
0b10000000000000,
|
||||
0b111111111,
|
||||
0b111111110,
|
||||
0b111111100,
|
||||
0b110000000,
|
||||
0b100000000,
|
||||
0b11111111,
|
||||
0b11111110,
|
||||
0b11111100,
|
||||
0b10000000,
|
||||
0b111,
|
||||
0b110,
|
||||
0b100,
|
||||
0b11,
|
||||
0b10,
|
||||
0b1,
|
||||
0b1010110100000, // beginning of random fuzzing
|
||||
0b1100011001011010,
|
||||
0b1001100101001111,
|
||||
0b1101010100011010,
|
||||
0b100010001,
|
||||
0b1000000000000000,
|
||||
0b1100000000000101,
|
||||
0b1100111101010101,
|
||||
0b1100010111111111,
|
||||
0b1111110101111111,
|
||||
];
|
||||
let mut i = 0;
|
||||
fuzz(10, |x: u16| {
|
||||
assert_eq!(x, VALS[i]);
|
||||
i += 1;
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leading_zeros() {
|
||||
use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv};
|
||||
{
|
||||
use compiler_builtins::int::leading_zeros::__clzsi2;
|
||||
fuzz(N, |x: u32| {
|
||||
if x == 0 {
|
||||
return; // undefined value for an intrinsic
|
||||
}
|
||||
let lz = x.leading_zeros() as usize;
|
||||
let lz0 = __clzsi2(x);
|
||||
let lz1 = leading_zeros_default(x);
|
||||
let lz2 = leading_zeros_riscv(x);
|
||||
if lz0 != lz {
|
||||
panic!("__clzsi2({x}): std: {lz}, builtins: {lz0}");
|
||||
}
|
||||
if lz1 != lz {
|
||||
panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}");
|
||||
}
|
||||
if lz2 != lz {
|
||||
panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
use compiler_builtins::int::leading_zeros::__clzdi2;
|
||||
fuzz(N, |x: u64| {
|
||||
if x == 0 {
|
||||
return; // undefined value for an intrinsic
|
||||
}
|
||||
let lz = x.leading_zeros() as usize;
|
||||
let lz0 = __clzdi2(x);
|
||||
let lz1 = leading_zeros_default(x);
|
||||
let lz2 = leading_zeros_riscv(x);
|
||||
if lz0 != lz {
|
||||
panic!("__clzdi2({x}): std: {lz}, builtins: {lz0}");
|
||||
}
|
||||
if lz1 != lz {
|
||||
panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}");
|
||||
}
|
||||
if lz2 != lz {
|
||||
panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
use compiler_builtins::int::leading_zeros::__clzti2;
|
||||
fuzz(N, |x: u128| {
|
||||
if x == 0 {
|
||||
return; // undefined value for an intrinsic
|
||||
}
|
||||
let lz = x.leading_zeros() as usize;
|
||||
let lz0 = __clzti2(x);
|
||||
if lz0 != lz {
|
||||
panic!("__clzti2({x}): std: {lz}, builtins: {lz0}");
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_zeros() {
|
||||
use compiler_builtins::int::trailing_zeros::{__ctzdi2, __ctzsi2, __ctzti2, trailing_zeros};
|
||||
fuzz(N, |x: u32| {
|
||||
if x == 0 {
|
||||
return; // undefined value for an intrinsic
|
||||
}
|
||||
let tz = x.trailing_zeros() as usize;
|
||||
let tz0 = __ctzsi2(x);
|
||||
let tz1 = trailing_zeros(x);
|
||||
if tz0 != tz {
|
||||
panic!("__ctzsi2({x}): std: {tz}, builtins: {tz0}");
|
||||
}
|
||||
if tz1 != tz {
|
||||
panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}");
|
||||
}
|
||||
});
|
||||
fuzz(N, |x: u64| {
|
||||
if x == 0 {
|
||||
return; // undefined value for an intrinsic
|
||||
}
|
||||
let tz = x.trailing_zeros() as usize;
|
||||
let tz0 = __ctzdi2(x);
|
||||
let tz1 = trailing_zeros(x);
|
||||
if tz0 != tz {
|
||||
panic!("__ctzdi2({x}): std: {tz}, builtins: {tz0}");
|
||||
}
|
||||
if tz1 != tz {
|
||||
panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}");
|
||||
}
|
||||
});
|
||||
fuzz(N, |x: u128| {
|
||||
if x == 0 {
|
||||
return; // undefined value for an intrinsic
|
||||
}
|
||||
let tz = x.trailing_zeros() as usize;
|
||||
let tz0 = __ctzti2(x);
|
||||
if tz0 != tz {
|
||||
panic!("__ctzti2({x}): std: {tz}, builtins: {tz0}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bswap() {
|
||||
use compiler_builtins::int::bswap::{__bswapdi2, __bswapsi2};
|
||||
fuzz(N, |x: u32| {
|
||||
assert_eq!(x.swap_bytes(), __bswapsi2(x));
|
||||
});
|
||||
fuzz(N, |x: u64| {
|
||||
assert_eq!(x.swap_bytes(), __bswapdi2(x));
|
||||
});
|
||||
|
||||
assert_eq!(__bswapsi2(0x12345678u32), 0x78563412u32);
|
||||
assert_eq!(__bswapsi2(0x00000001u32), 0x01000000u32);
|
||||
assert_eq!(__bswapdi2(0x123456789ABCDEF0u64), 0xF0DEBC9A78563412u64);
|
||||
assert_eq!(__bswapdi2(0x0200000001000000u64), 0x0000000100000002u64);
|
||||
|
||||
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
|
||||
{
|
||||
use compiler_builtins::int::bswap::__bswapti2;
|
||||
fuzz(N, |x: u128| {
|
||||
assert_eq!(x.swap_bytes(), __bswapti2(x));
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
__bswapti2(0x123456789ABCDEF013579BDF02468ACEu128),
|
||||
0xCE8A4602DF9B5713F0DEBC9A78563412u128
|
||||
);
|
||||
assert_eq!(
|
||||
__bswapti2(0x04000000030000000200000001000000u128),
|
||||
0x00000001000000020000000300000004u128
|
||||
);
|
||||
}
|
||||
}
|
||||
150
library/compiler-builtins/builtins-test/tests/mul.rs
Normal file
150
library/compiler-builtins/builtins-test/tests/mul.rs
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
#![allow(unused_macros)]
|
||||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use builtins_test::*;
|
||||
|
||||
mod int_mul {
|
||||
use super::*;
|
||||
|
||||
macro_rules! mul {
|
||||
($($i:ty, $fn:ident);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn() {
|
||||
use compiler_builtins::int::mul::$fn;
|
||||
|
||||
fuzz_2(N, |x: $i, y: $i| {
|
||||
let mul0 = x.wrapping_mul(y);
|
||||
let mul1: $i = $fn(x, y);
|
||||
if mul0 != mul1 {
|
||||
panic!(
|
||||
"{func}({x}, {y}): std: {mul0}, builtins: {mul1}",
|
||||
func = stringify!($fn),
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
mul! {
|
||||
u64, __muldi3;
|
||||
i128, __multi3;
|
||||
}
|
||||
}
|
||||
|
||||
mod int_overflowing_mul {
|
||||
use super::*;
|
||||
|
||||
macro_rules! overflowing_mul {
|
||||
($($i:ty, $fn:ident);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn() {
|
||||
use compiler_builtins::int::mul::$fn;
|
||||
|
||||
fuzz_2(N, |x: $i, y: $i| {
|
||||
let (mul0, o0) = x.overflowing_mul(y);
|
||||
let mut o1 = 0i32;
|
||||
let mul1: $i = $fn(x, y, &mut o1);
|
||||
let o1 = o1 != 0;
|
||||
if mul0 != mul1 || o0 != o1 {
|
||||
panic!(
|
||||
"{func}({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",
|
||||
func = stringify!($fn),
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
overflowing_mul! {
|
||||
i32, __mulosi4;
|
||||
i64, __mulodi4;
|
||||
i128, __muloti4;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn overflowing_mul_u128() {
|
||||
use compiler_builtins::int::mul::{__rust_i128_mulo, __rust_u128_mulo};
|
||||
|
||||
fuzz_2(N, |x: u128, y: u128| {
|
||||
let mut o1 = 0;
|
||||
let (mul0, o0) = x.overflowing_mul(y);
|
||||
let mul1 = __rust_u128_mulo(x, y, &mut o1);
|
||||
if mul0 != mul1 || i32::from(o0) != o1 {
|
||||
panic!("__rust_u128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",);
|
||||
}
|
||||
let x = x as i128;
|
||||
let y = y as i128;
|
||||
let (mul0, o0) = x.overflowing_mul(y);
|
||||
let mul1 = __rust_i128_mulo(x, y, &mut o1);
|
||||
if mul0 != mul1 || i32::from(o0) != o1 {
|
||||
panic!("__rust_i128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! float_mul {
|
||||
($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn() {
|
||||
use compiler_builtins::float::{mul::$fn, Float};
|
||||
use core::ops::Mul;
|
||||
|
||||
fuzz_float_2(N, |x: $f, y: $f| {
|
||||
let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y);
|
||||
let mul1: $f = $fn(x, y);
|
||||
if !Float::eq_repr(mul0, mul1) {
|
||||
panic!(
|
||||
"{func}({x:?}, {y:?}): std: {mul0:?}, builtins: {mul1:?}",
|
||||
func = stringify!($fn),
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
|
||||
mod float_mul {
|
||||
use super::*;
|
||||
|
||||
// FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in
|
||||
// nightly.
|
||||
float_mul! {
|
||||
f32, __mulsf3, Single, not(target_arch = "arm");
|
||||
f64, __muldf3, Double, not(target_arch = "arm");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
mod float_mul_f128 {
|
||||
use super::*;
|
||||
|
||||
float_mul! {
|
||||
f128, __multf3, Quad,
|
||||
// FIXME(llvm): there is a bug in LLVM rt.
|
||||
// See <https://github.com/llvm/llvm-project/issues/91840>.
|
||||
not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
mod float_mul_f128_ppc {
|
||||
use super::*;
|
||||
|
||||
float_mul! {
|
||||
f128, __mulkf3, Quad, not(feature = "no-sys-f128");
|
||||
}
|
||||
}
|
||||
35
library/compiler-builtins/builtins-test/tests/shift.rs
Normal file
35
library/compiler-builtins/builtins-test/tests/shift.rs
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
use builtins_test::*;
|
||||
|
||||
macro_rules! shift {
|
||||
($($i:ty, $fn_std:ident, $fn_builtins:ident);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn_builtins() {
|
||||
use compiler_builtins::int::shift::$fn_builtins;
|
||||
|
||||
fuzz_shift(|x: $i, s: u32| {
|
||||
let tmp0: $i = x.$fn_std(s);
|
||||
let tmp1: $i = $fn_builtins(x, s);
|
||||
if tmp0 != tmp1 {
|
||||
panic!(
|
||||
"{}({}, {}): std: {}, builtins: {}",
|
||||
stringify!($fn_builtins), x, s, tmp0, tmp1
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
shift! {
|
||||
u32, wrapping_shl, __ashlsi3;
|
||||
u64, wrapping_shl, __ashldi3;
|
||||
u128, wrapping_shl, __ashlti3;
|
||||
i32, wrapping_shr, __ashrsi3;
|
||||
i64, wrapping_shr, __ashrdi3;
|
||||
i128, wrapping_shr, __ashrti3;
|
||||
u32, wrapping_shr, __lshrsi3;
|
||||
u64, wrapping_shr, __lshrdi3;
|
||||
u128, wrapping_shr, __lshrti3;
|
||||
}
|
||||
58
library/compiler-builtins/ci/bench-icount.sh
Executable file
58
library/compiler-builtins/ci/bench-icount.sh
Executable file
|
|
@ -0,0 +1,58 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -eux
|
||||
|
||||
iai_home="iai-home"
|
||||
|
||||
# Download the baseline from master
|
||||
./ci/ci-util.py locate-baseline --download --extract
|
||||
|
||||
# Run benchmarks once
|
||||
function run_icount_benchmarks() {
|
||||
cargo_args=(
|
||||
"--bench" "icount"
|
||||
"--no-default-features"
|
||||
"--features" "unstable,unstable-float,icount"
|
||||
)
|
||||
|
||||
iai_args=(
|
||||
"--home" "$(pwd)/$iai_home"
|
||||
"--regression=ir=5.0"
|
||||
"--save-summary"
|
||||
)
|
||||
|
||||
# Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax
|
||||
parsing_iai_args=0
|
||||
while [ "$#" -gt 0 ]; do
|
||||
if [ "$parsing_iai_args" == "1" ]; then
|
||||
iai_args+=("$1")
|
||||
elif [ "$1" == "--" ]; then
|
||||
parsing_iai_args=1
|
||||
else
|
||||
cargo_args+=("$1")
|
||||
fi
|
||||
|
||||
shift
|
||||
done
|
||||
|
||||
# Run iai-callgrind benchmarks
|
||||
cargo bench "${cargo_args[@]}" -- "${iai_args[@]}"
|
||||
|
||||
# NB: iai-callgrind should exit on error but does not, so we inspect the sumary
|
||||
# for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337
|
||||
if [ -n "${PR_NUMBER:-}" ]; then
|
||||
# If this is for a pull request, ignore regressions if specified.
|
||||
./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER"
|
||||
else
|
||||
./ci/ci-util.py check-regressions --home "$iai_home" || true
|
||||
fi
|
||||
}
|
||||
|
||||
# Run once with softfloats, once with arch instructions enabled
|
||||
run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat
|
||||
run_icount_benchmarks -- --save-baseline=hardfloat
|
||||
|
||||
# Name and tar the new baseline
|
||||
name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
|
||||
echo "BASELINE_NAME=$name" >>"$GITHUB_ENV"
|
||||
tar cJf "$name.tar.xz" "$iai_home"
|
||||
438
library/compiler-builtins/ci/ci-util.py
Executable file
438
library/compiler-builtins/ci/ci-util.py
Executable file
|
|
@ -0,0 +1,438 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Utilities for CI.
|
||||
|
||||
This dynamically prepares a list of routines that had a source file change based on
|
||||
git history.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess as sp
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from glob import glob, iglob
|
||||
from inspect import cleandoc
|
||||
from os import getenv
|
||||
from pathlib import Path
|
||||
from typing import TypedDict, Self
|
||||
|
||||
USAGE = cleandoc(
|
||||
"""
|
||||
usage:
|
||||
|
||||
./ci/ci-util.py <COMMAND> [flags]
|
||||
|
||||
COMMAND:
|
||||
generate-matrix
|
||||
Calculate a matrix of which functions had source change, print that as
|
||||
a JSON object.
|
||||
|
||||
locate-baseline [--download] [--extract]
|
||||
Locate the most recent benchmark baseline available in CI and, if flags
|
||||
specify, download and extract it. Never exits with nonzero status if
|
||||
downloading fails.
|
||||
|
||||
Note that `--extract` will overwrite files in `iai-home`.
|
||||
|
||||
check-regressions [--home iai-home] [--allow-pr-override pr_number]
|
||||
Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
|
||||
files and see if there are any regressions. This is used as a workaround
|
||||
for `iai-callgrind` not exiting with error status; see
|
||||
<https://github.com/iai-callgrind/iai-callgrind/issues/337>.
|
||||
|
||||
If `--allow-pr-override` is specified, the regression check will not exit
|
||||
with failure if any line in the PR starts with `allow-regressions`.
|
||||
"""
|
||||
)
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
GIT = ["git", "-C", REPO_ROOT]
|
||||
DEFAULT_BRANCH = "master"
|
||||
WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts
|
||||
ARTIFACT_GLOB = "baseline-icount*"
|
||||
# Place this in a PR body to skip regression checks (must be at the start of a line).
|
||||
REGRESSION_DIRECTIVE = "ci: allow-regressions"
|
||||
# Place this in a PR body to skip extensive tests
|
||||
SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive"
|
||||
# Place this in a PR body to allow running a large number of extensive tests. If not
|
||||
# set, this script will error out if a threshold is exceeded in order to avoid
|
||||
# accidentally spending huge amounts of CI time.
|
||||
ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive"
|
||||
MANY_EXTENSIVE_THRESHOLD = 20
|
||||
|
||||
# Don't run exhaustive tests if these files change, even if they contaiin a function
|
||||
# definition.
|
||||
IGNORE_FILES = [
|
||||
"libm/src/math/support/",
|
||||
"libm/src/libm_helper.rs",
|
||||
"libm/src/math/arch/intrinsics.rs",
|
||||
]
|
||||
|
||||
# libm PR CI takes a long time and doesn't need to run unless relevant files have been
|
||||
# changed. Anything matching this regex pattern will trigger a run.
|
||||
TRIGGER_LIBM_PR_CI = ".*(libm|musl).*"
|
||||
|
||||
TYPES = ["f16", "f32", "f64", "f128"]
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
"""Print to stderr."""
|
||||
print(*args, file=sys.stderr, **kwargs)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PrInfo:
|
||||
"""GitHub response for PR query"""
|
||||
|
||||
body: str
|
||||
commits: list[str]
|
||||
created_at: str
|
||||
number: int
|
||||
|
||||
@classmethod
|
||||
def load(cls, pr_number: int | str) -> Self:
|
||||
"""For a given PR number, query the body and commit list"""
|
||||
pr_info = sp.check_output(
|
||||
[
|
||||
"gh",
|
||||
"pr",
|
||||
"view",
|
||||
str(pr_number),
|
||||
"--json=number,commits,body,createdAt",
|
||||
# Flatten the commit list to only hashes, change a key to snake naming
|
||||
"--jq=.commits |= map(.oid) | .created_at = .createdAt | del(.createdAt)",
|
||||
],
|
||||
text=True,
|
||||
)
|
||||
eprint("PR info:", json.dumps(pr_info, indent=4))
|
||||
return cls(**json.loads(pr_info))
|
||||
|
||||
def contains_directive(self, directive: str) -> bool:
|
||||
"""Return true if the provided directive is on a line in the PR body"""
|
||||
lines = self.body.splitlines()
|
||||
return any(line.startswith(directive) for line in lines)
|
||||
|
||||
|
||||
class FunctionDef(TypedDict):
|
||||
"""Type for an entry in `function-definitions.json`"""
|
||||
|
||||
sources: list[str]
|
||||
type: str
|
||||
|
||||
|
||||
class Context:
|
||||
gh_ref: str | None
|
||||
changed: list[Path]
|
||||
defs: dict[str, FunctionDef]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.gh_ref = getenv("GITHUB_REF")
|
||||
self.changed = []
|
||||
self._init_change_list()
|
||||
|
||||
with open(REPO_ROOT.joinpath("etc/function-definitions.json")) as f:
|
||||
defs = json.load(f)
|
||||
|
||||
defs.pop("__comment", None)
|
||||
self.defs = defs
|
||||
|
||||
def _init_change_list(self):
|
||||
"""Create a list of files that have been changed. This uses GITHUB_REF if
|
||||
available, otherwise a diff between `HEAD` and `master`.
|
||||
"""
|
||||
|
||||
# For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being
|
||||
# the PR number), and sets this as `GITHUB_REF`.
|
||||
ref = self.gh_ref
|
||||
eprint(f"using ref `{ref}`")
|
||||
if not self.is_pr():
|
||||
# If the ref is not for `merge` then we are not in PR CI
|
||||
eprint("No diff available for ref")
|
||||
return
|
||||
|
||||
# The ref is for a dummy merge commit. We can extract the merge base by
|
||||
# inspecting all parents (`^@`).
|
||||
merge_sha = sp.check_output(
|
||||
GIT + ["show-ref", "--hash", ref], text=True
|
||||
).strip()
|
||||
merge_log = sp.check_output(GIT + ["log", "-1", merge_sha], text=True)
|
||||
eprint(f"Merge:\n{merge_log}\n")
|
||||
|
||||
parents = (
|
||||
sp.check_output(GIT + ["rev-parse", f"{merge_sha}^@"], text=True)
|
||||
.strip()
|
||||
.splitlines()
|
||||
)
|
||||
assert len(parents) == 2, f"expected two-parent merge but got:\n{parents}"
|
||||
base = parents[0].strip()
|
||||
incoming = parents[1].strip()
|
||||
|
||||
eprint(f"base: {base}, incoming: {incoming}")
|
||||
textlist = sp.check_output(
|
||||
GIT + ["diff", base, incoming, "--name-only"], text=True
|
||||
)
|
||||
self.changed = [Path(p) for p in textlist.splitlines()]
|
||||
|
||||
def is_pr(self) -> bool:
|
||||
"""Check if we are looking at a PR rather than a push."""
|
||||
return self.gh_ref is not None and "merge" in self.gh_ref
|
||||
|
||||
@staticmethod
|
||||
def _ignore_file(fname: str) -> bool:
|
||||
return any(fname.startswith(pfx) for pfx in IGNORE_FILES)
|
||||
|
||||
def changed_routines(self) -> dict[str, list[str]]:
|
||||
"""Create a list of routines for which one or more files have been updated,
|
||||
separated by type.
|
||||
"""
|
||||
routines = set()
|
||||
for name, meta in self.defs.items():
|
||||
# Don't update if changes to the file should be ignored
|
||||
sources = (f for f in meta["sources"] if not self._ignore_file(f))
|
||||
|
||||
# Select changed files
|
||||
changed = [f for f in sources if Path(f) in self.changed]
|
||||
|
||||
if len(changed) > 0:
|
||||
eprint(f"changed files for {name}: {changed}")
|
||||
routines.add(name)
|
||||
|
||||
ret: dict[str, list[str]] = {}
|
||||
for r in sorted(routines):
|
||||
ret.setdefault(self.defs[r]["type"], []).append(r)
|
||||
|
||||
return ret
|
||||
|
||||
def may_skip_libm_ci(self) -> bool:
|
||||
"""If this is a PR and no libm files were changed, allow skipping libm
|
||||
jobs."""
|
||||
|
||||
if self.is_pr():
|
||||
return all(not re.match(TRIGGER_LIBM_PR_CI, str(f)) for f in self.changed)
|
||||
|
||||
return False
|
||||
|
||||
def emit_workflow_output(self):
|
||||
"""Create a JSON object a list items for each type's changed files, if any
|
||||
did change, and the routines that were affected by the change.
|
||||
"""
|
||||
|
||||
pr_number = os.environ.get("PR_NUMBER")
|
||||
skip_tests = False
|
||||
error_on_many_tests = False
|
||||
|
||||
if pr_number is not None and len(pr_number) > 0:
|
||||
pr = PrInfo.load(pr_number)
|
||||
skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE)
|
||||
error_on_many_tests = not pr.contains_directive(
|
||||
ALLOW_MANY_EXTENSIVE_DIRECTIVE
|
||||
)
|
||||
|
||||
if skip_tests:
|
||||
eprint("Skipping all extensive tests")
|
||||
|
||||
changed = self.changed_routines()
|
||||
matrix = []
|
||||
total_to_test = 0
|
||||
|
||||
# Figure out which extensive tests need to run
|
||||
for ty in TYPES:
|
||||
ty_changed = changed.get(ty, [])
|
||||
ty_to_test = [] if skip_tests else ty_changed
|
||||
total_to_test += len(ty_to_test)
|
||||
|
||||
item = {
|
||||
"ty": ty,
|
||||
"changed": ",".join(ty_changed),
|
||||
"to_test": ",".join(ty_to_test),
|
||||
}
|
||||
|
||||
matrix.append(item)
|
||||
|
||||
ext_matrix = json.dumps({"extensive_matrix": matrix}, separators=(",", ":"))
|
||||
may_skip = str(self.may_skip_libm_ci()).lower()
|
||||
print(f"extensive_matrix={ext_matrix}")
|
||||
print(f"may_skip_libm_ci={may_skip}")
|
||||
eprint(f"extensive_matrix={ext_matrix}")
|
||||
eprint(f"may_skip_libm_ci={may_skip}")
|
||||
eprint(f"total extensive tests: {total_to_test}")
|
||||
|
||||
if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD:
|
||||
eprint(
|
||||
f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add"
|
||||
f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is"
|
||||
" intentional. If this is refactoring that happens to touch a lot of"
|
||||
f" files, `{SKIP_EXTENSIVE_DIRECTIVE}` can be used instead."
|
||||
)
|
||||
exit(1)
|
||||
|
||||
|
||||
def locate_baseline(flags: list[str]) -> None:
|
||||
"""Find the most recent baseline from CI, download it if specified.
|
||||
|
||||
This returns rather than erroring, even if the `gh` commands fail. This is to avoid
|
||||
erroring in CI if the baseline is unavailable (artifact time limit exceeded, first
|
||||
run on the branch, etc).
|
||||
"""
|
||||
|
||||
download = False
|
||||
extract = False
|
||||
|
||||
while len(flags) > 0:
|
||||
match flags[0]:
|
||||
case "--download":
|
||||
download = True
|
||||
case "--extract":
|
||||
extract = True
|
||||
case _:
|
||||
eprint(USAGE)
|
||||
exit(1)
|
||||
flags = flags[1:]
|
||||
|
||||
if extract and not download:
|
||||
eprint("cannot extract without downloading")
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
# Locate the most recent job to complete with success on our branch
|
||||
latest_job = sp.check_output(
|
||||
[
|
||||
"gh",
|
||||
"run",
|
||||
"list",
|
||||
"--status=success",
|
||||
f"--branch={DEFAULT_BRANCH}",
|
||||
"--json=databaseId,url,headSha,conclusion,createdAt,"
|
||||
"status,workflowDatabaseId,workflowName",
|
||||
# Return the first array element matching our workflow name. NB: cannot
|
||||
# just use `--limit=1`, jq filtering happens after limiting. We also
|
||||
# cannot just use `--workflow` because GH gets confused from
|
||||
# different file names in history.
|
||||
f'--jq=[.[] | select(.workflowName == "{WORKFLOW_NAME}")][0]',
|
||||
],
|
||||
text=True,
|
||||
)
|
||||
except sp.CalledProcessError as e:
|
||||
eprint(f"failed to run github command: {e}")
|
||||
return
|
||||
|
||||
try:
|
||||
latest = json.loads(latest_job)
|
||||
eprint("latest job: ", json.dumps(latest, indent=4))
|
||||
except json.JSONDecodeError as e:
|
||||
eprint(f"failed to decode json '{latest_job}', {e}")
|
||||
return
|
||||
|
||||
if not download:
|
||||
eprint("--download not specified, returning")
|
||||
return
|
||||
|
||||
job_id = latest.get("databaseId")
|
||||
if job_id is None:
|
||||
eprint("skipping download step")
|
||||
return
|
||||
|
||||
sp.run(
|
||||
["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
|
||||
check=False,
|
||||
)
|
||||
|
||||
if not extract:
|
||||
eprint("skipping extraction step")
|
||||
return
|
||||
|
||||
# Find the baseline with the most recent timestamp. GH downloads the files to e.g.
|
||||
# `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
|
||||
candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
|
||||
if len(candidate_baselines) == 0:
|
||||
eprint("no possible baseline directories found")
|
||||
return
|
||||
|
||||
candidate_baselines.sort(reverse=True)
|
||||
baseline_archive = candidate_baselines[0]
|
||||
eprint(f"extracting {baseline_archive}")
|
||||
sp.run(["tar", "xJvf", baseline_archive], check=True)
|
||||
eprint("baseline extracted successfully")
|
||||
|
||||
|
||||
def check_iai_regressions(args: list[str]):
|
||||
"""Find regressions in iai summary.json files, exit with failure if any are
|
||||
found.
|
||||
"""
|
||||
|
||||
iai_home_str = "iai-home"
|
||||
pr_number = None
|
||||
|
||||
while len(args) > 0:
|
||||
match args:
|
||||
case ["--home", home, *rest]:
|
||||
iai_home_str = home
|
||||
args = rest
|
||||
case ["--allow-pr-override", pr_num, *rest]:
|
||||
pr_number = pr_num
|
||||
args = rest
|
||||
case _:
|
||||
eprint(USAGE)
|
||||
exit(1)
|
||||
|
||||
iai_home = Path(iai_home_str)
|
||||
|
||||
found_summaries = False
|
||||
regressions: list[dict] = []
|
||||
for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
|
||||
found_summaries = True
|
||||
with open(iai_home / summary_path, "r") as f:
|
||||
summary = json.load(f)
|
||||
|
||||
summary_regs = []
|
||||
run = summary["callgrind_summary"]["callgrind_run"]
|
||||
fname = summary["function_name"]
|
||||
id = summary["id"]
|
||||
name_entry = {"name": f"{fname}.{id}"}
|
||||
|
||||
for segment in run["segments"]:
|
||||
summary_regs.extend(segment["regressions"])
|
||||
|
||||
summary_regs.extend(run["total"]["regressions"])
|
||||
|
||||
regressions.extend(name_entry | reg for reg in summary_regs)
|
||||
|
||||
if not found_summaries:
|
||||
eprint(f"did not find any summary.json files within {iai_home}")
|
||||
exit(1)
|
||||
|
||||
if len(regressions) == 0:
|
||||
eprint("No regressions found")
|
||||
return
|
||||
|
||||
eprint("Found regressions:", json.dumps(regressions, indent=4))
|
||||
|
||||
if pr_number is not None:
|
||||
pr = PrInfo.load(pr_number)
|
||||
if pr.contains_directive(REGRESSION_DIRECTIVE):
|
||||
eprint("PR allows regressions, returning")
|
||||
return
|
||||
|
||||
exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
match sys.argv[1:]:
|
||||
case ["generate-matrix"]:
|
||||
ctx = Context()
|
||||
ctx.emit_workflow_output()
|
||||
case ["locate-baseline", *flags]:
|
||||
locate_baseline(flags)
|
||||
case ["check-regressions", *args]:
|
||||
check_iai_regressions(args)
|
||||
case ["--help" | "-h"]:
|
||||
print(USAGE)
|
||||
exit()
|
||||
case _:
|
||||
eprint(USAGE)
|
||||
exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-aarch64-linux-gnu m4 make libc6-dev-arm64-cross \
|
||||
qemu-user-static
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu-
|
||||
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-aarch64-static \
|
||||
AR_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/aarch64-linux-gnu \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-arm-linux-gnueabi libc6-dev-armel-cross qemu-user-static
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=arm-linux-gnueabi-
|
||||
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER=qemu-arm-static \
|
||||
AR_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/arm-linux-gnueabi \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf-
|
||||
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \
|
||||
AR_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf-
|
||||
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \
|
||||
AR_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc-multilib m4 make libc6-dev ca-certificates
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc-multilib m4 make libc6-dev ca-certificates
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user-static ca-certificates \
|
||||
gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
|
||||
|
||||
ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \
|
||||
CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \
|
||||
AR_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-ar \
|
||||
CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \
|
||||
QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-mips-linux-gnu libc6-dev-mips-cross \
|
||||
binfmt-support qemu-user-static qemu-system-mips
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=mips-linux-gnu-
|
||||
ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER=qemu-mips-static \
|
||||
AR_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/mips-linux-gnu \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
gcc-mips64-linux-gnuabi64 \
|
||||
libc6-dev \
|
||||
libc6-dev-mips64-cross \
|
||||
qemu-user-static \
|
||||
qemu-system-mips
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=mips64-linux-gnuabi64-
|
||||
ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64-static \
|
||||
AR_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/mips64-linux-gnuabi64 \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
gcc-mips64el-linux-gnuabi64 \
|
||||
libc6-dev \
|
||||
libc6-dev-mips64el-cross \
|
||||
qemu-user-static
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=mips64el-linux-gnuabi64-
|
||||
ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64el-static \
|
||||
AR_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/mips64el-linux-gnuabi64 \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-mipsel-linux-gnu libc6-dev-mipsel-cross \
|
||||
binfmt-support qemu-user-static
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=mipsel-linux-gnu-
|
||||
ENV CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_RUNNER=qemu-mipsel-static \
|
||||
AR_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/mipsel-linux-gnu \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user-static ca-certificates \
|
||||
gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
|
||||
qemu-system-ppc
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=powerpc-linux-gnu-
|
||||
ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc-static \
|
||||
AR_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/powerpc-linux-gnu \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \
|
||||
binfmt-support qemu-user-static qemu-system-ppc
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=powerpc64-linux-gnu-
|
||||
ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64-static \
|
||||
AR_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/powerpc64-linux-gnu \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user-static ca-certificates \
|
||||
gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
|
||||
qemu-system-ppc
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=powerpc64le-linux-gnu-
|
||||
ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \
|
||||
AR_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_CPU=POWER8 \
|
||||
QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user-static ca-certificates \
|
||||
gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \
|
||||
qemu-system-riscv64
|
||||
|
||||
ENV TOOLCHAIN_PREFIX=riscv64-linux-gnu-
|
||||
ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
|
||||
CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64-static \
|
||||
AR_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
|
||||
CC_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
|
||||
QEMU_LD_PREFIX=/usr/riscv64-linux-gnu \
|
||||
RUST_TEST_THREADS=1
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-arm-none-eabi \
|
||||
libnewlib-arm-none-eabi
|
||||
ENV BUILD_ONLY=1
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-arm-none-eabi \
|
||||
libnewlib-arm-none-eabi
|
||||
ENV BUILD_ONLY=1
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-arm-none-eabi \
|
||||
libnewlib-arm-none-eabi
|
||||
ENV BUILD_ONLY=1
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev ca-certificates \
|
||||
gcc-arm-none-eabi \
|
||||
libnewlib-arm-none-eabi
|
||||
ENV BUILD_ONLY=1
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
ARG IMAGE=ubuntu:20.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc clang libc6-dev ca-certificates
|
||||
|
||||
ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=true
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
ARG IMAGE=ubuntu:24.04
|
||||
FROM $IMAGE
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
gcc m4 make libc6-dev ca-certificates
|
||||
10
library/compiler-builtins/ci/download-compiler-rt.sh
Executable file
10
library/compiler-builtins/ci/download-compiler-rt.sh
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/sh
|
||||
# Download sources to build C versions of intrinsics. Once being run,
|
||||
# `RUST_COMPILER_RT_ROOT` must be set.
|
||||
|
||||
set -eux
|
||||
|
||||
rust_llvm_version=20.1-2025-02-13
|
||||
|
||||
curl -L -o code.tar.gz "https://github.com/rust-lang/llvm-project/archive/rustc/${rust_llvm_version}.tar.gz"
|
||||
tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-${rust_llvm_version}/compiler-rt
|
||||
18
library/compiler-builtins/ci/miri.sh
Executable file
18
library/compiler-builtins/ci/miri.sh
Executable file
|
|
@ -0,0 +1,18 @@
|
|||
#!/bin/bash
|
||||
set -eux
|
||||
|
||||
# We need Tree Borrows as some of our raw pointer patterns are not
|
||||
# compatible with Stacked Borrows.
|
||||
export MIRIFLAGS="-Zmiri-tree-borrows"
|
||||
|
||||
# One target that sets `mem-unaligned` and one that does not,
|
||||
# and a big-endian target.
|
||||
targets=(
|
||||
x86_64-unknown-linux-gnu
|
||||
armv7-unknown-linux-gnueabihf
|
||||
s390x-unknown-linux-gnu
|
||||
)
|
||||
for target in "${targets[@]}"; do
|
||||
# Only run the `mem` tests to avoid this taking too long.
|
||||
cargo miri test --manifest-path builtins-test/Cargo.toml --features no-asm --target "$target" -- mem
|
||||
done
|
||||
111
library/compiler-builtins/ci/run-docker.sh
Executable file
111
library/compiler-builtins/ci/run-docker.sh
Executable file
|
|
@ -0,0 +1,111 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Small script to run tests for a target (or all targets) inside all the
|
||||
# respective docker images.
|
||||
|
||||
set -euxo pipefail
|
||||
|
||||
host_arch="$(uname -m | sed 's/arm64/aarch64/')"
|
||||
|
||||
# Directories and files that do not yet exist need to be created before
|
||||
# calling docker, otherwise docker will create them but they will be owned
|
||||
# by root.
|
||||
mkdir -p target
|
||||
cargo generate-lockfile
|
||||
cargo generate-lockfile --manifest-path builtins-test-intrinsics/Cargo.toml
|
||||
|
||||
run() {
|
||||
local target="$1"
|
||||
|
||||
echo "testing target: $target"
|
||||
|
||||
emulated=""
|
||||
target_arch="$(echo "$target" | cut -d'-' -f1)"
|
||||
if [ "$target_arch" != "$host_arch" ]; then
|
||||
emulated=1
|
||||
echo "target is emulated"
|
||||
fi
|
||||
|
||||
run_cmd="HOME=/tmp"
|
||||
|
||||
if [ "${GITHUB_ACTIONS:-}" = "true" ]; then
|
||||
# Enable Docker image caching on GHA
|
||||
build_cmd=("buildx" "build")
|
||||
build_args=(
|
||||
"--cache-from" "type=local,src=/tmp/.buildx-cache"
|
||||
"--cache-to" "type=local,dest=/tmp/.buildx-cache-new"
|
||||
# This is the beautiful bash syntax for expanding an array but neither
|
||||
# raising an error nor returning an empty string if the array is empty.
|
||||
"${build_args[@]:+"${build_args[@]}"}"
|
||||
"--load"
|
||||
)
|
||||
fi
|
||||
|
||||
if [ "$(uname -s)" = "Linux" ] && [ -z "${DOCKER_BASE_IMAGE:-}" ]; then
|
||||
# Share the host rustc and target. Do this only on Linux and if the image
|
||||
# isn't overridden
|
||||
run_args=(
|
||||
--user "$(id -u):$(id -g)"
|
||||
-e "CARGO_HOME=/cargo"
|
||||
-v "${HOME}/.cargo:/cargo"
|
||||
-v "$(pwd)/target:/builtins-target"
|
||||
-v "$(rustc --print sysroot):/rust:ro"
|
||||
)
|
||||
run_cmd="$run_cmd PATH=\$PATH:/rust/bin:/cargo/bin"
|
||||
else
|
||||
# Use rustc provided by a docker image
|
||||
docker volume create compiler-builtins-cache
|
||||
build_args=(
|
||||
"--build-arg"
|
||||
"IMAGE=${DOCKER_BASE_IMAGE:-rustlang/rust:nightly}"
|
||||
)
|
||||
run_args=(-v "compiler-builtins-cache:/builtins-target")
|
||||
run_cmd="$run_cmd HOME=/tmp" "USING_CONTAINER_RUSTC=1"
|
||||
fi
|
||||
|
||||
if [ -d compiler-rt ]; then
|
||||
export RUST_COMPILER_RT_ROOT="/checkout/compiler-rt"
|
||||
fi
|
||||
|
||||
run_cmd="$run_cmd ci/run.sh $target"
|
||||
|
||||
docker "${build_cmd[@]:-build}" \
|
||||
-t "builtins-$target" \
|
||||
"${build_args[@]:-}" \
|
||||
"ci/docker/$target"
|
||||
docker run \
|
||||
--rm \
|
||||
-e CI \
|
||||
-e CARGO_TARGET_DIR=/builtins-target \
|
||||
-e CARGO_TERM_COLOR \
|
||||
-e MAY_SKIP_LIBM_CI \
|
||||
-e RUSTFLAGS \
|
||||
-e RUST_BACKTRACE \
|
||||
-e RUST_COMPILER_RT_ROOT \
|
||||
-e "EMULATED=$emulated" \
|
||||
-v "$(pwd):/checkout:ro" \
|
||||
-w /checkout \
|
||||
"${run_args[@]:-}" \
|
||||
--init \
|
||||
"builtins-$target" \
|
||||
sh -c "$run_cmd"
|
||||
}
|
||||
|
||||
if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then
|
||||
set +x
|
||||
echo "\
|
||||
usage: ./ci/run-docker.sh [target]
|
||||
|
||||
you can also set DOCKER_BASE_IMAGE to use something other than the default
|
||||
ubuntu:24.04 (or rustlang/rust:nightly).
|
||||
"
|
||||
exit
|
||||
fi
|
||||
|
||||
if [ -z "${1:-}" ]; then
|
||||
for d in ci/docker/*; do
|
||||
run $(basename "$d")
|
||||
done
|
||||
else
|
||||
run "$1"
|
||||
fi
|
||||
24
library/compiler-builtins/ci/run-extensive.sh
Executable file
24
library/compiler-builtins/ci/run-extensive.sh
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
echo "Tests to run: '$TO_TEST'"
|
||||
|
||||
if [ -z "$TO_TEST" ]; then
|
||||
echo "No tests to run, exiting."
|
||||
exit
|
||||
fi
|
||||
|
||||
set -x
|
||||
|
||||
test_cmd=(
|
||||
cargo test
|
||||
--package libm-test
|
||||
--features "build-mpfr,libm/unstable,libm/force-soft-floats"
|
||||
--profile release-checked
|
||||
)
|
||||
|
||||
# Run the non-extensive tests first to catch any easy failures
|
||||
"${test_cmd[@]}" -- "$TO_TEST"
|
||||
|
||||
LIBM_EXTENSIVE_TESTS="$TO_TEST" "${test_cmd[@]}" -- extensive
|
||||
302
library/compiler-builtins/ci/run.sh
Executable file
302
library/compiler-builtins/ci/run.sh
Executable file
|
|
@ -0,0 +1,302 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -eux
|
||||
|
||||
export RUST_BACKTRACE="${RUST_BACKTRACE:-full}"
|
||||
export NEXTEST_STATUS_LEVEL=all
|
||||
|
||||
target="${1:-}"
|
||||
|
||||
if [ -z "$target" ]; then
|
||||
host_target=$(rustc -vV | awk '/^host/ { print $2 }')
|
||||
echo "Defaulted to host target $host_target"
|
||||
target="$host_target"
|
||||
fi
|
||||
|
||||
if [[ "$target" = *"wasm"* ]]; then
|
||||
# Enable the random backend
|
||||
export RUSTFLAGS="${RUSTFLAGS:-} --cfg getrandom_backend=\"wasm_js\""
|
||||
fi
|
||||
|
||||
if [ "${USING_CONTAINER_RUSTC:-}" = 1 ]; then
|
||||
# Install nonstandard components if we have control of the environment
|
||||
rustup target list --installed |
|
||||
grep -E "^$target\$" ||
|
||||
rustup target add "$target"
|
||||
fi
|
||||
|
||||
# Test our implementation
|
||||
if [ "${BUILD_ONLY:-}" = "1" ]; then
|
||||
echo "no tests to run for build-only targets"
|
||||
else
|
||||
test_builtins=(cargo test --package builtins-test --no-fail-fast --target "$target")
|
||||
"${test_builtins[@]}"
|
||||
"${test_builtins[@]}" --release
|
||||
"${test_builtins[@]}" --features c
|
||||
"${test_builtins[@]}" --features c --release
|
||||
"${test_builtins[@]}" --features no-asm
|
||||
"${test_builtins[@]}" --features no-asm --release
|
||||
"${test_builtins[@]}" --features no-f16-f128
|
||||
"${test_builtins[@]}" --features no-f16-f128 --release
|
||||
"${test_builtins[@]}" --benches
|
||||
"${test_builtins[@]}" --benches --release
|
||||
|
||||
if [ "${TEST_VERBATIM:-}" = "1" ]; then
|
||||
verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\builtins-test\\target2)
|
||||
"${test_builtins[@]}" --target-dir "$verb_path" --features c
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
declare -a rlib_paths
|
||||
|
||||
# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs
|
||||
update_rlib_paths() {
|
||||
if [ -d /builtins-target ]; then
|
||||
rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
|
||||
else
|
||||
rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
|
||||
fi
|
||||
}
|
||||
|
||||
# Remove any existing artifacts from previous tests that don't set #![compiler_builtins]
|
||||
update_rlib_paths
|
||||
rm -f "${rlib_paths[@]}"
|
||||
|
||||
cargo build -p compiler_builtins --target "$target"
|
||||
cargo build -p compiler_builtins --target "$target" --release
|
||||
cargo build -p compiler_builtins --target "$target" --features c
|
||||
cargo build -p compiler_builtins --target "$target" --features c --release
|
||||
cargo build -p compiler_builtins --target "$target" --features no-asm
|
||||
cargo build -p compiler_builtins --target "$target" --features no-asm --release
|
||||
cargo build -p compiler_builtins --target "$target" --features no-f16-f128
|
||||
cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release
|
||||
|
||||
PREFIX=${target//unknown-/}-
|
||||
case "$target" in
|
||||
armv7-*)
|
||||
PREFIX=arm-linux-gnueabihf-
|
||||
;;
|
||||
thumb*)
|
||||
PREFIX=arm-none-eabi-
|
||||
;;
|
||||
*86*-*)
|
||||
PREFIX=
|
||||
;;
|
||||
esac
|
||||
|
||||
NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) )
|
||||
if [ "$NM" = "" ]; then
|
||||
NM="${PREFIX}nm"
|
||||
fi
|
||||
|
||||
# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with
|
||||
# rustup run to ensure that those are in PATH.
|
||||
TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')"
|
||||
if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then
|
||||
NM="rustup run $TOOLCHAIN $NM"
|
||||
fi
|
||||
|
||||
# Look out for duplicated symbols when we include the compiler-rt (C) implementation
|
||||
update_rlib_paths
|
||||
for rlib in "${rlib_paths[@]}"; do
|
||||
set +x
|
||||
echo "================================================================"
|
||||
echo "checking $rlib for duplicate symbols"
|
||||
echo "================================================================"
|
||||
set -x
|
||||
|
||||
duplicates_found=0
|
||||
|
||||
# NOTE On i586, It's normal that the get_pc_thunk symbol appears several
|
||||
# times so ignore it
|
||||
$NM -g --defined-only "$rlib" 2>&1 |
|
||||
sort |
|
||||
uniq -d |
|
||||
grep -v __x86.get_pc_thunk --quiet |
|
||||
grep 'T __' && duplicates_found=1
|
||||
|
||||
if [ "$duplicates_found" != 0 ]; then
|
||||
echo "error: found duplicate symbols"
|
||||
exit 1
|
||||
else
|
||||
echo "success; no duplicate symbols found"
|
||||
fi
|
||||
done
|
||||
|
||||
rm -f "${rlib_paths[@]}"
|
||||
|
||||
build_intrinsics_test() {
|
||||
cargo build \
|
||||
--target "$target" --verbose \
|
||||
--manifest-path builtins-test-intrinsics/Cargo.toml "$@"
|
||||
}
|
||||
|
||||
# Verify that we haven't dropped any intrinsics/symbols
|
||||
build_intrinsics_test
|
||||
build_intrinsics_test --release
|
||||
build_intrinsics_test --features c
|
||||
build_intrinsics_test --features c --release
|
||||
|
||||
# Verify that there are no undefined symbols to `panic` within our
|
||||
# implementations
|
||||
CARGO_PROFILE_DEV_LTO=true build_intrinsics_test
|
||||
CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release
|
||||
|
||||
# Ensure no references to any symbols from core
|
||||
update_rlib_paths
|
||||
for rlib in "${rlib_paths[@]}"; do
|
||||
set +x
|
||||
echo "================================================================"
|
||||
echo "checking $rlib for references to core"
|
||||
echo "================================================================"
|
||||
set -x
|
||||
|
||||
tmpdir="${CARGO_TARGET_DIR:-target}/tmp"
|
||||
test -d "$tmpdir" || mkdir "$tmpdir"
|
||||
defined="$tmpdir/defined_symbols.txt"
|
||||
undefined="$tmpdir/defined_symbols.txt"
|
||||
|
||||
$NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined"
|
||||
$NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined"
|
||||
grep_has_results=0
|
||||
grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1
|
||||
|
||||
if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then
|
||||
echo "FIXME: powerpc64 fails these tests"
|
||||
elif [ "$grep_has_results" != 0 ]; then
|
||||
echo "error: found unexpected references to core"
|
||||
exit 1
|
||||
else
|
||||
echo "success; no references to core found"
|
||||
fi
|
||||
done
|
||||
|
||||
# Test libm
|
||||
|
||||
# Make sure a simple build works
|
||||
cargo check -p libm --no-default-features --target "$target"
|
||||
|
||||
if [ "${MAY_SKIP_LIBM_CI:-}" = "true" ]; then
|
||||
echo "skipping libm PR CI"
|
||||
exit
|
||||
fi
|
||||
|
||||
mflags=()
|
||||
|
||||
# We enumerate features manually.
|
||||
mflags+=(--no-default-features)
|
||||
|
||||
# Enable arch-specific routines when available.
|
||||
mflags+=(--features arch)
|
||||
|
||||
# Always enable `unstable-float` since it expands available API but does not
|
||||
# change any implementations.
|
||||
mflags+=(--features unstable-float)
|
||||
|
||||
# We need to specifically skip tests for musl-math-sys on systems that can't
|
||||
# build musl since otherwise `--all` will activate it.
|
||||
case "$target" in
|
||||
# Can't build at all on MSVC, WASM, or thumb
|
||||
*windows-msvc*) mflags+=(--exclude musl-math-sys) ;;
|
||||
*wasm*) mflags+=(--exclude musl-math-sys) ;;
|
||||
*thumb*) mflags+=(--exclude musl-math-sys) ;;
|
||||
|
||||
# We can build musl on MinGW but running tests gets a stack overflow
|
||||
*windows-gnu*) ;;
|
||||
# FIXME(#309): LE PPC crashes calling the musl version of some functions. It
|
||||
# seems like a qemu bug but should be investigated further at some point.
|
||||
# See <https://github.com/rust-lang/libm/issues/309>.
|
||||
*powerpc64le*) ;;
|
||||
|
||||
# Everything else gets musl enabled
|
||||
*) mflags+=(--features libm-test/build-musl) ;;
|
||||
esac
|
||||
|
||||
|
||||
# Configure which targets test against MPFR
|
||||
case "$target" in
|
||||
# MSVC cannot link MPFR
|
||||
*windows-msvc*) ;;
|
||||
# FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial.
|
||||
*windows-gnu*) ;;
|
||||
# Targets that aren't cross compiled in CI work fine
|
||||
aarch64*apple*) mflags+=(--features libm-test/build-mpfr) ;;
|
||||
aarch64*linux*) mflags+=(--features libm-test/build-mpfr) ;;
|
||||
i586*) mflags+=(--features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross) ;;
|
||||
i686*) mflags+=(--features libm-test/build-mpfr) ;;
|
||||
x86_64*) mflags+=(--features libm-test/build-mpfr) ;;
|
||||
esac
|
||||
|
||||
# FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI.
|
||||
# <https://github.com/rust-lang/rust/issues/128944>
|
||||
case "$target" in
|
||||
*windows-gnu) mflags+=(--exclude libm-macros) ;;
|
||||
esac
|
||||
|
||||
if [ "${BUILD_ONLY:-}" = "1" ]; then
|
||||
# If we are on targets that can't run tests, verify that we can build.
|
||||
cmd=(cargo build --target "$target" --package libm)
|
||||
"${cmd[@]}"
|
||||
"${cmd[@]}" --features unstable-intrinsics
|
||||
|
||||
echo "can't run tests on $target; skipping"
|
||||
else
|
||||
mflags+=(--workspace --target "$target")
|
||||
cmd=(cargo test "${mflags[@]}")
|
||||
profile_flag="--profile"
|
||||
|
||||
# If nextest is available, use that
|
||||
command -v cargo-nextest && nextest=1 || nextest=0
|
||||
if [ "$nextest" = "1" ]; then
|
||||
cmd=(cargo nextest run --max-fail=10)
|
||||
|
||||
# Workaround for https://github.com/nextest-rs/nextest/issues/2066
|
||||
if [ -f /.dockerenv ]; then
|
||||
cfg_file="/tmp/nextest-config.toml"
|
||||
echo "[store]" >> "$cfg_file"
|
||||
echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file"
|
||||
cmd+=(--config-file "$cfg_file")
|
||||
fi
|
||||
|
||||
# Not all configurations have tests to run on wasm
|
||||
[[ "$target" = *"wasm"* ]] && cmd+=(--no-tests=warn)
|
||||
|
||||
cmd+=("${mflags[@]}")
|
||||
profile_flag="--cargo-profile"
|
||||
fi
|
||||
|
||||
# Test once without intrinsics
|
||||
"${cmd[@]}"
|
||||
|
||||
# Run doctests if they were excluded by nextest
|
||||
[ "$nextest" = "1" ] && cargo test --doc --exclude compiler_builtins "${mflags[@]}"
|
||||
|
||||
# Exclude the macros and utile crates from the rest of the tests to save CI
|
||||
# runtime, they shouldn't have anything feature- or opt-level-dependent.
|
||||
cmd+=(--exclude util --exclude libm-macros)
|
||||
|
||||
# Test once with intrinsics enabled
|
||||
"${cmd[@]}" --features unstable-intrinsics
|
||||
"${cmd[@]}" --features unstable-intrinsics --benches
|
||||
|
||||
# Test the same in release mode, which also increases coverage. Also ensure
|
||||
# the soft float routines are checked.
|
||||
"${cmd[@]}" "$profile_flag" release-checked
|
||||
"${cmd[@]}" "$profile_flag" release-checked --features force-soft-floats
|
||||
"${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics
|
||||
"${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics --benches
|
||||
|
||||
# Ensure that the routines do not panic.
|
||||
#
|
||||
# `--tests` must be passed because no-panic is only enabled as a dev
|
||||
# dependency. The `release-opt` profile must be used to enable LTO and a
|
||||
# single CGU.
|
||||
ENSURE_NO_PANIC=1 cargo build \
|
||||
-p libm \
|
||||
--target "$target" \
|
||||
--no-default-features \
|
||||
--features unstable-float \
|
||||
--tests \
|
||||
--profile release-opt
|
||||
fi
|
||||
168
library/compiler-builtins/compiler-builtins/CHANGELOG.md
Normal file
168
library/compiler-builtins/compiler-builtins/CHANGELOG.md
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.1.159](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.158...compiler_builtins-v0.1.159) - 2025-05-12
|
||||
|
||||
### Other
|
||||
|
||||
- Remove cfg(bootstrap)
|
||||
|
||||
## [0.1.158](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.157...compiler_builtins-v0.1.158) - 2025-05-06
|
||||
|
||||
### Other
|
||||
|
||||
- Require `target_has_atomic = "ptr"` for runtime feature detection
|
||||
|
||||
## [0.1.157](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.156...compiler_builtins-v0.1.157) - 2025-05-03
|
||||
|
||||
### Other
|
||||
|
||||
- Use runtime feature detection for fma routines on x86
|
||||
|
||||
## [0.1.156](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.155...compiler_builtins-v0.1.156) - 2025-04-21
|
||||
|
||||
### Other
|
||||
|
||||
- avr: Provide `abort()`
|
||||
- Remove `unsafe` from `naked_asm!` blocks
|
||||
- Enable icount benchmarks in CI
|
||||
- Move builtins-test-intrinsics out of the workspace
|
||||
- Run `cargo fmt` on all projects
|
||||
- Flatten the `libm/libm` directory
|
||||
- Update path to libm after the merge
|
||||
|
||||
## [0.1.155](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.154...compiler_builtins-v0.1.155) - 2025-04-17
|
||||
|
||||
### Other
|
||||
|
||||
- use `#[cfg(bootstrap)]` for rustc sync
|
||||
- Replace the `bl!` macro with `asm_sym`
|
||||
- __udivmod(h|q)i4
|
||||
|
||||
## [0.1.154](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.153...compiler_builtins-v0.1.154) - 2025-04-16
|
||||
|
||||
### Other
|
||||
|
||||
- turn #[naked] into an unsafe attribute
|
||||
|
||||
## [0.1.153](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.152...compiler_builtins-v0.1.153) - 2025-04-09
|
||||
|
||||
### Other
|
||||
|
||||
- Remove a mention of `force-soft-float` in `build.rs`
|
||||
- Revert "Disable `f16` on AArch64 without the `neon` feature"
|
||||
- Skip No More!
|
||||
- avoid out-of-bounds accesses ([#799](https://github.com/rust-lang/compiler-builtins/pull/799))
|
||||
|
||||
## [0.1.152](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.151...compiler_builtins-v0.1.152) - 2025-03-20
|
||||
|
||||
### Other
|
||||
|
||||
- Remove use of `atomic_load_unordered` and undefined behaviour from `arm_linux.rs`
|
||||
- Switch repository layout to use a virtual manifest
|
||||
|
||||
## [0.1.151](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.150...compiler_builtins-v0.1.151) - 2025-03-05
|
||||
|
||||
### Other
|
||||
|
||||
- Add cygwin support
|
||||
- Enable `f16` for LoongArch ([#770](https://github.com/rust-lang/compiler-builtins/pull/770))
|
||||
- Add __extendhfdf2 and add __truncdfhf2 test
|
||||
- Remove outdated information from the readme
|
||||
|
||||
## [0.1.150](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.149...compiler_builtins-v0.1.150) - 2025-03-01
|
||||
|
||||
### Other
|
||||
|
||||
- Disable `f16` on AArch64 without the `neon` feature
|
||||
- Update LLVM downloads to 20.1-2025-02-13
|
||||
|
||||
## [0.1.149](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.148...compiler_builtins-v0.1.149) - 2025-02-25
|
||||
|
||||
### Other
|
||||
|
||||
- Make a subset of `libm` symbols weakly available on all platforms
|
||||
|
||||
## [0.1.148](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.147...compiler_builtins-v0.1.148) - 2025-02-24
|
||||
|
||||
### Other
|
||||
|
||||
- Update the `libm` submodule
|
||||
- Enable `f16` for MIPS
|
||||
- Eliminate the use of `public_test_dep!` for a third time
|
||||
|
||||
## [0.1.147](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.146...compiler_builtins-v0.1.147) - 2025-02-19
|
||||
|
||||
### Other
|
||||
|
||||
- remove win64_128bit_abi_hack
|
||||
|
||||
## [0.1.146](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.145...compiler_builtins-v0.1.146) - 2025-02-06
|
||||
|
||||
### Other
|
||||
|
||||
- Expose erf{,c}{,f} from libm
|
||||
|
||||
## [0.1.145](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.144...compiler_builtins-v0.1.145) - 2025-02-04
|
||||
|
||||
### Other
|
||||
|
||||
- Revert "Eliminate the use of `public_test_dep!`"
|
||||
- Indentation fix to please clippy
|
||||
- Don't build out of line atomics support code for uefi
|
||||
- Add a version to some FIXMEs that will be resolved in LLVM 20
|
||||
- Remove use of the `start` feature
|
||||
|
||||
## [0.1.144](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.143...compiler_builtins-v0.1.144) - 2025-01-15
|
||||
|
||||
### Other
|
||||
|
||||
- Eliminate the use of `public_test_dep!`
|
||||
|
||||
## [0.1.143](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.142...compiler_builtins-v0.1.143) - 2025-01-15
|
||||
|
||||
### Other
|
||||
|
||||
- Use a C-safe return type for `__rust_[ui]128_*` overflowing intrinsics
|
||||
|
||||
## [0.1.142](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.141...compiler_builtins-v0.1.142) - 2025-01-07
|
||||
|
||||
### Other
|
||||
|
||||
- Account for optimization levels other than numbers
|
||||
|
||||
## [0.1.141](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.140...compiler_builtins-v0.1.141) - 2025-01-07
|
||||
|
||||
### Other
|
||||
|
||||
- Update the `libm` submodule
|
||||
- Fix new `clippy::precedence` errors
|
||||
- Rename `EXP_MAX` to `EXP_SAT`
|
||||
- Shorten prefixes for float constants
|
||||
|
||||
## [0.1.140](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.139...compiler_builtins-v0.1.140) - 2024-12-26
|
||||
|
||||
### Other
|
||||
|
||||
- Disable f128 for amdgpu ([#737](https://github.com/rust-lang/compiler-builtins/pull/737))
|
||||
- Fix a bug in `abs_diff`
|
||||
- Disable `f16` on platforms that have recursion problems
|
||||
|
||||
## [0.1.139](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.138...compiler_builtins-v0.1.139) - 2024-11-03
|
||||
|
||||
### Other
|
||||
|
||||
- Remove incorrect `sparcv9` match pattern from `configure_f16_f128`
|
||||
|
||||
## [0.1.138](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.137...compiler_builtins-v0.1.138) - 2024-11-01
|
||||
|
||||
### Other
|
||||
|
||||
- Use `f16_enabled`/`f128_enabled` in `examples/intrinsics.rs` ([#724](https://github.com/rust-lang/compiler-builtins/pull/724))
|
||||
- Disable `f16` for LoongArch64 ([#722](https://github.com/rust-lang/compiler-builtins/pull/722))
|
||||
64
library/compiler-builtins/compiler-builtins/Cargo.toml
Normal file
64
library/compiler-builtins/compiler-builtins/Cargo.toml
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
[package]
|
||||
authors = ["Jorge Aparicio <japaricious@gmail.com>"]
|
||||
name = "compiler_builtins"
|
||||
version = "0.1.159"
|
||||
license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/rust-lang/compiler-builtins"
|
||||
homepage = "https://github.com/rust-lang/compiler-builtins"
|
||||
documentation = "https://docs.rs/compiler_builtins"
|
||||
edition = "2021"
|
||||
description = "Compiler intrinsics used by the Rust compiler."
|
||||
links = "compiler-rt"
|
||||
|
||||
[lib]
|
||||
bench = false
|
||||
doctest = false
|
||||
test = false
|
||||
|
||||
[dependencies]
|
||||
# For more information on this dependency see
|
||||
# https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core
|
||||
core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
|
||||
|
||||
[build-dependencies]
|
||||
cc = { optional = true, version = "1.0" }
|
||||
|
||||
[dev-dependencies]
|
||||
panic-handler = { path = "../crates/panic-handler" }
|
||||
|
||||
[features]
|
||||
default = ["compiler-builtins"]
|
||||
|
||||
# Enable compilation of C code in compiler-rt, filling in some more optimized
|
||||
# implementations and also filling in unimplemented intrinsics
|
||||
c = ["dep:cc"]
|
||||
|
||||
# Workaround for the Cranelift codegen backend. Disables any implementations
|
||||
# which use inline assembly and fall back to pure Rust versions (if available).
|
||||
no-asm = []
|
||||
|
||||
# Workaround for codegen backends which haven't yet implemented `f16` and
|
||||
# `f128` support. Disabled any intrinsics which use those types.
|
||||
no-f16-f128 = []
|
||||
|
||||
# Flag this library as the unstable compiler-builtins lib
|
||||
compiler-builtins = []
|
||||
|
||||
# Generate memory-related intrinsics like memcpy
|
||||
mem = []
|
||||
|
||||
# Mangle all names so this can be linked in with other versions or other
|
||||
# compiler-rt implementations. Also used for testing
|
||||
mangled-names = []
|
||||
|
||||
# Only used in the compiler's build system
|
||||
rustc-dep-of-std = ["compiler-builtins", "dep:core"]
|
||||
|
||||
# This makes certain traits and function specializations public that
|
||||
# are not normally public but are required by the `builtins-test`
|
||||
unstable-public-internals = []
|
||||
|
||||
[lints.rust]
|
||||
# The cygwin config can be dropped after our benchmark toolchain is bumped
|
||||
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bootstrap)', 'cfg(target_os, values("cygwin"))'] }
|
||||
1
library/compiler-builtins/compiler-builtins/LICENSE.txt
Symbolic link
1
library/compiler-builtins/compiler-builtins/LICENSE.txt
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../LICENSE.txt
|
||||
436
library/compiler-builtins/compiler-builtins/README.md
Normal file
436
library/compiler-builtins/compiler-builtins/README.md
Normal file
|
|
@ -0,0 +1,436 @@
|
|||
# `compiler-builtins`
|
||||
|
||||
This crate provides external symbols that the compiler expects to be available
|
||||
when building Rust projects, typically software routines for basic operations
|
||||
that do not have hardware support. It is largely a port of LLVM's
|
||||
[`compiler-rt`].
|
||||
|
||||
It is distributed as part of Rust's sysroot. `compiler-builtins` does not need
|
||||
to be added as an explicit dependency in `Cargo.toml`.
|
||||
|
||||
[`compiler-rt`]: https://github.com/llvm/llvm-project/tree/1b1dc505057322f4fa1110ef4f53c44347f52986/compiler-rt
|
||||
|
||||
## Contributing
|
||||
|
||||
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
|
||||
## Progress
|
||||
|
||||
- [x] aarch64/chkstk.S
|
||||
- [x] adddf3.c
|
||||
- [x] addsf3.c
|
||||
- [x] arm/addsf3.S
|
||||
- [x] arm/aeabi_dcmp.S
|
||||
- [x] arm/aeabi_fcmp.S
|
||||
- [x] arm/aeabi_idivmod.S
|
||||
- [x] arm/aeabi_ldivmod.S
|
||||
- [x] arm/aeabi_memcpy.S
|
||||
- [x] arm/aeabi_memmove.S
|
||||
- [x] arm/aeabi_memset.S
|
||||
- [x] arm/aeabi_uidivmod.S
|
||||
- [x] arm/aeabi_uldivmod.S
|
||||
- [ ] arm/chkstk.S
|
||||
- [ ] arm/divmodsi4.S (generic version is done)
|
||||
- [ ] arm/divsi3.S (generic version is done)
|
||||
- [ ] arm/modsi3.S (generic version is done)
|
||||
- [x] arm/softfloat-alias.list
|
||||
- [ ] arm/udivmodsi4.S (generic version is done)
|
||||
- [ ] arm/udivsi3.S (generic version is done)
|
||||
- [ ] arm/umodsi3.S (generic version is done)
|
||||
- [x] ashldi3.c
|
||||
- [x] ashrdi3.c
|
||||
- [ ] avr/divmodhi4.S
|
||||
- [ ] avr/divmodqi4.S
|
||||
- [ ] avr/mulhi3.S
|
||||
- [ ] avr/mulqi3.S
|
||||
- [ ] avr/udivmodhi4.S
|
||||
- [ ] avr/udivmodqi4.S
|
||||
- [x] bswapdi2.c
|
||||
- [x] bswapsi2.c
|
||||
- [x] bswapti2.c
|
||||
- [x] clzdi2.c
|
||||
- [x] clzsi2.c
|
||||
- [x] clzti2.c
|
||||
- [x] comparedf2.c
|
||||
- [x] comparesf2.c
|
||||
- [x] ctzdi2.c
|
||||
- [x] ctzsi2.c
|
||||
- [x] ctzti2.c
|
||||
- [x] divdf3.c
|
||||
- [x] divdi3.c
|
||||
- [x] divmoddi4.c
|
||||
- [x] divmodsi4.c
|
||||
- [x] divmodti4.c
|
||||
- [x] divsf3.c
|
||||
- [x] divsi3.c
|
||||
- [x] extendsfdf2.c
|
||||
- [x] fixdfdi.c
|
||||
- [x] fixdfsi.c
|
||||
- [x] fixsfdi.c
|
||||
- [x] fixsfsi.c
|
||||
- [x] fixunsdfdi.c
|
||||
- [x] fixunsdfsi.c
|
||||
- [x] fixunssfdi.c
|
||||
- [x] fixunssfsi.c
|
||||
- [x] floatdidf.c
|
||||
- [x] floatdisf.c
|
||||
- [x] floatsidf.c
|
||||
- [x] floatsisf.c
|
||||
- [x] floatundidf.c
|
||||
- [x] floatundisf.c
|
||||
- [x] floatunsidf.c
|
||||
- [x] floatunsisf.c
|
||||
- [ ] i386/ashldi3.S
|
||||
- [ ] i386/ashrdi3.S
|
||||
- [x] i386/chkstk.S
|
||||
- [ ] i386/divdi3.S
|
||||
- [ ] i386/lshrdi3.S
|
||||
- [ ] i386/moddi3.S
|
||||
- [ ] i386/muldi3.S
|
||||
- [ ] i386/udivdi3.S
|
||||
- [ ] i386/umoddi3.S
|
||||
- [x] lshrdi3.c
|
||||
- [x] moddi3.c
|
||||
- [x] modsi3.c
|
||||
- [x] muldf3.c
|
||||
- [x] muldi3.c
|
||||
- [x] mulodi4.c
|
||||
- [x] mulosi4.c
|
||||
- [x] mulsf3.c
|
||||
- [x] powidf2.c
|
||||
- [x] powisf2.c
|
||||
- [ ] riscv/muldi3.S
|
||||
- [ ] riscv/mulsi3.S
|
||||
- [x] subdf3.c
|
||||
- [x] subsf3.c
|
||||
- [x] truncdfsf2.c
|
||||
- [x] udivdi3.c
|
||||
- [x] udivmoddi4.c
|
||||
- [x] udivmodsi4.c
|
||||
- [x] udivsi3.c
|
||||
- [x] umoddi3.c
|
||||
- [x] umodsi3.c
|
||||
- [x] x86_64/chkstk.S
|
||||
|
||||
These builtins are needed to support 128-bit integers.
|
||||
|
||||
- [x] ashlti3.c
|
||||
- [x] ashrti3.c
|
||||
- [x] divti3.c
|
||||
- [x] fixdfti.c
|
||||
- [x] fixsfti.c
|
||||
- [x] fixunsdfti.c
|
||||
- [x] fixunssfti.c
|
||||
- [x] floattidf.c
|
||||
- [x] floattisf.c
|
||||
- [x] floatuntidf.c
|
||||
- [x] floatuntisf.c
|
||||
- [x] lshrti3.c
|
||||
- [x] modti3.c
|
||||
- [x] muloti4.c
|
||||
- [x] multi3.c
|
||||
- [x] udivmodti4.c
|
||||
- [x] udivti3.c
|
||||
- [x] umodti3.c
|
||||
|
||||
These builtins are needed to support `f16` and `f128`, which are in the process
|
||||
of being added to Rust.
|
||||
|
||||
- [x] addtf3.c
|
||||
- [x] comparetf2.c
|
||||
- [x] divtf3.c
|
||||
- [x] extenddftf2.c
|
||||
- [x] extendhfsf2.c
|
||||
- [x] extendhftf2.c
|
||||
- [x] extendsftf2.c
|
||||
- [x] fixtfdi.c
|
||||
- [x] fixtfsi.c
|
||||
- [x] fixtfti.c
|
||||
- [x] fixunstfdi.c
|
||||
- [x] fixunstfsi.c
|
||||
- [x] fixunstfti.c
|
||||
- [x] floatditf.c
|
||||
- [x] floatsitf.c
|
||||
- [x] floattitf.c
|
||||
- [x] floatunditf.c
|
||||
- [x] floatunsitf.c
|
||||
- [x] floatuntitf.c
|
||||
- [x] multf3.c
|
||||
- [x] powitf2.c
|
||||
- [x] subtf3.c
|
||||
- [x] truncdfhf2.c
|
||||
- [x] truncsfhf2.c
|
||||
- [x] trunctfdf2.c
|
||||
- [x] trunctfhf2.c
|
||||
- [x] trunctfsf2.c
|
||||
|
||||
|
||||
These builtins are used by the Hexagon DSP
|
||||
|
||||
- [ ] hexagon/common_entry_exit_abi1.S
|
||||
- [ ] hexagon/common_entry_exit_abi2.S
|
||||
- [ ] hexagon/common_entry_exit_legacy.S
|
||||
- [x] hexagon/dfaddsub.S~~
|
||||
- [x] hexagon/dfdiv.S~~
|
||||
- [x] hexagon/dffma.S~~
|
||||
- [x] hexagon/dfminmax.S~~
|
||||
- [x] hexagon/dfmul.S~~
|
||||
- [x] hexagon/dfsqrt.S~~
|
||||
- [x] hexagon/divdi3.S~~
|
||||
- [x] hexagon/divsi3.S~~
|
||||
- [x] hexagon/fastmath2_dlib_asm.S~~
|
||||
- [x] hexagon/fastmath2_ldlib_asm.S~~
|
||||
- [x] hexagon/fastmath_dlib_asm.S~~
|
||||
- [x] hexagon/memcpy_forward_vp4cp4n2.S~~
|
||||
- [x] hexagon/memcpy_likely_aligned.S~~
|
||||
- [x] hexagon/moddi3.S~~
|
||||
- [x] hexagon/modsi3.S~~
|
||||
- [x] hexagon/sfdiv_opt.S~~
|
||||
- [x] hexagon/sfsqrt_opt.S~~
|
||||
- [x] hexagon/udivdi3.S~~
|
||||
- [x] hexagon/udivmoddi4.S~~
|
||||
- [x] hexagon/udivmodsi4.S~~
|
||||
- [x] hexagon/udivsi3.S~~
|
||||
- [x] hexagon/umoddi3.S~~
|
||||
- [x] hexagon/umodsi3.S~~
|
||||
|
||||
## Unimplemented functions
|
||||
|
||||
These builtins are for x87 `f80` floating-point numbers that are not supported
|
||||
by Rust.
|
||||
|
||||
- ~~extendxftf2.c~~
|
||||
- ~~fixunsxfdi.c~~
|
||||
- ~~fixunsxfsi.c~~
|
||||
- ~~fixunsxfti.c~~
|
||||
- ~~fixxfdi.c~~
|
||||
- ~~fixxfti.c~~
|
||||
- ~~floatdixf.c~~
|
||||
- ~~floattixf.c~~
|
||||
- ~~floatundixf.c~~
|
||||
- ~~floatuntixf.c~~
|
||||
- ~~i386/floatdixf.S~~
|
||||
- ~~i386/floatundixf.S~~
|
||||
- ~~x86_64/floatdixf.c~~
|
||||
- ~~x86_64/floatundixf.S~~
|
||||
|
||||
These builtins are for IBM "extended double" non-IEEE 128-bit floating-point
|
||||
numbers.
|
||||
|
||||
- ~~ppc/divtc3.c~~
|
||||
- ~~ppc/fixtfdi.c~~
|
||||
- ~~ppc/fixtfti.c~~
|
||||
- ~~ppc/fixunstfdi.c~~
|
||||
- ~~ppc/fixunstfti.c~~
|
||||
- ~~ppc/floatditf.c~~
|
||||
- ~~ppc/floattitf.c~~
|
||||
- ~~ppc/floatunditf.c~~
|
||||
- ~~ppc/gcc_qadd.c~~
|
||||
- ~~ppc/gcc_qdiv.c~~
|
||||
- ~~ppc/gcc_qmul.c~~
|
||||
- ~~ppc/gcc_qsub.c~~
|
||||
- ~~ppc/multc3.c~~
|
||||
|
||||
These builtins are for 16-bit brain floating-point numbers that are not
|
||||
supported by Rust.
|
||||
|
||||
- ~~truncdfbf2.c~~
|
||||
- ~~truncsfbf2.c~~
|
||||
- ~~trunctfxf2.c~~
|
||||
|
||||
These builtins involve complex floating-point types that are not supported by
|
||||
Rust.
|
||||
|
||||
- ~~divdc3.c~~
|
||||
- ~~divsc3.c~~
|
||||
- ~~divtc3.c~~
|
||||
- ~~divxc3.c~~
|
||||
- ~~muldc3.c~~
|
||||
- ~~mulsc3.c~~
|
||||
- ~~multc3.c~~
|
||||
- ~~mulxc3.c~~
|
||||
- ~~powixf2.c~~
|
||||
|
||||
These builtins are never called by LLVM.
|
||||
|
||||
- ~~absvdi2.c~~
|
||||
- ~~absvsi2.c~~
|
||||
- ~~absvti2.c~~
|
||||
- ~~addvdi3.c~~
|
||||
- ~~addvsi3.c~~
|
||||
- ~~addvti3.c~~
|
||||
- ~~arm/aeabi_cdcmp.S~~
|
||||
- ~~arm/aeabi_cdcmpeq_check_nan.c~~
|
||||
- ~~arm/aeabi_cfcmp.S~~
|
||||
- ~~arm/aeabi_cfcmpeq_check_nan.c~~
|
||||
- ~~arm/aeabi_div0.c~~
|
||||
- ~~arm/aeabi_drsub.c~~
|
||||
- ~~arm/aeabi_frsub.c~~
|
||||
- ~~arm/aeabi_memcmp.S~~
|
||||
- ~~arm/bswapdi2.S~~
|
||||
- ~~arm/bswapsi2.S~~
|
||||
- ~~arm/clzdi2.S~~
|
||||
- ~~arm/clzsi2.S~~
|
||||
- ~~arm/comparesf2.S~~
|
||||
- ~~arm/restore_vfp_d8_d15_regs.S~~
|
||||
- ~~arm/save_vfp_d8_d15_regs.S~~
|
||||
- ~~arm/switch16.S~~
|
||||
- ~~arm/switch32.S~~
|
||||
- ~~arm/switch8.S~~
|
||||
- ~~arm/switchu8.S~~
|
||||
- ~~cmpdi2.c~~
|
||||
- ~~cmpti2.c~~
|
||||
- ~~ffssi2.c~~
|
||||
- ~~ffsdi2.c~~ - this is [called by gcc][jemalloc-fail] though!
|
||||
- ~~ffsti2.c~~
|
||||
- ~~mulvdi3.c~~
|
||||
- ~~mulvsi3.c~~
|
||||
- ~~mulvti3.c~~
|
||||
- ~~negdf2.c~~
|
||||
- ~~negdi2.c~~
|
||||
- ~~negsf2.c~~
|
||||
- ~~negti2.c~~
|
||||
- ~~negvdi2.c~~
|
||||
- ~~negvsi2.c~~
|
||||
- ~~negvti2.c~~
|
||||
- ~~paritydi2.c~~
|
||||
- ~~paritysi2.c~~
|
||||
- ~~parityti2.c~~
|
||||
- ~~popcountdi2.c~~
|
||||
- ~~popcountsi2.c~~
|
||||
- ~~popcountti2.c~~
|
||||
- ~~ppc/restFP.S~~
|
||||
- ~~ppc/saveFP.S~~
|
||||
- ~~subvdi3.c~~
|
||||
- ~~subvsi3.c~~
|
||||
- ~~subvti3.c~~
|
||||
- ~~ucmpdi2.c~~
|
||||
- ~~ucmpti2.c~~
|
||||
- ~~udivmodti4.c~~
|
||||
|
||||
[jemalloc-fail]: https://travis-ci.org/rust-lang/rust/jobs/249772758
|
||||
|
||||
Rust only exposes atomic types on platforms that support them, and therefore does not need to fall back to software implementations.
|
||||
|
||||
- ~~arm/sync_fetch_and_add_4.S~~
|
||||
- ~~arm/sync_fetch_and_add_8.S~~
|
||||
- ~~arm/sync_fetch_and_and_4.S~~
|
||||
- ~~arm/sync_fetch_and_and_8.S~~
|
||||
- ~~arm/sync_fetch_and_max_4.S~~
|
||||
- ~~arm/sync_fetch_and_max_8.S~~
|
||||
- ~~arm/sync_fetch_and_min_4.S~~
|
||||
- ~~arm/sync_fetch_and_min_8.S~~
|
||||
- ~~arm/sync_fetch_and_nand_4.S~~
|
||||
- ~~arm/sync_fetch_and_nand_8.S~~
|
||||
- ~~arm/sync_fetch_and_or_4.S~~
|
||||
- ~~arm/sync_fetch_and_or_8.S~~
|
||||
- ~~arm/sync_fetch_and_sub_4.S~~
|
||||
- ~~arm/sync_fetch_and_sub_8.S~~
|
||||
- ~~arm/sync_fetch_and_umax_4.S~~
|
||||
- ~~arm/sync_fetch_and_umax_8.S~~
|
||||
- ~~arm/sync_fetch_and_umin_4.S~~
|
||||
- ~~arm/sync_fetch_and_umin_8.S~~
|
||||
- ~~arm/sync_fetch_and_xor_4.S~~
|
||||
- ~~arm/sync_fetch_and_xor_8.S~~
|
||||
- ~~arm/sync_synchronize.S~~
|
||||
- ~~atomic.c~~
|
||||
- ~~atomic_flag_clear.c~~
|
||||
- ~~atomic_flag_clear_explicit.c~~
|
||||
- ~~atomic_flag_test_and_set.c~~
|
||||
- ~~atomic_flag_test_and_set_explicit.c~~
|
||||
- ~~atomic_signal_fence.c~~
|
||||
- ~~atomic_thread_fence.c~~
|
||||
|
||||
Miscellaneous functionality that is not used by Rust.
|
||||
|
||||
- ~~aarch64/fp_mode.c~~
|
||||
- ~~aarch64/lse.S~~ (LSE atomics)
|
||||
- ~~aarch64/sme-abi-init.c~~ (matrix extension)
|
||||
- ~~aarch64/sme-abi.S~~ (matrix extension)
|
||||
- ~~aarch64/sme-libc-routines.c~~ (matrix extension)
|
||||
- ~~apple_versioning.c~~
|
||||
- ~~arm/fp_mode.c~~
|
||||
- ~~avr/exit.S~~
|
||||
- ~~clear_cache.c~~
|
||||
- ~~cpu_model/aarch64.c~~
|
||||
- ~~cpu_model/x86.c~~
|
||||
- ~~crtbegin.c~~
|
||||
- ~~crtend.c~~
|
||||
- ~~emutls.c~~
|
||||
- ~~enable_execute_stack.c~~
|
||||
- ~~eprintf.c~~
|
||||
- ~~fp_mode.c~~ (float exception handling)
|
||||
- ~~gcc_personality_v0.c~~
|
||||
- ~~i386/fp_mode.c~~
|
||||
- ~~int_util.c~~
|
||||
- ~~loongarch/fp_mode.c~~
|
||||
- ~~os_version_check.c~~
|
||||
- ~~riscv/fp_mode.c~~
|
||||
- ~~riscv/restore.S~~ (callee-saved registers)
|
||||
- ~~riscv/save.S~~ (callee-saved registers)
|
||||
- ~~trampoline_setup.c~~
|
||||
- ~~ve/grow_stack.S~~
|
||||
- ~~ve/grow_stack_align.S~~
|
||||
|
||||
Floating-point implementations of builtins that are only called from soft-float code. It would be better to simply use the generic soft-float versions in this case.
|
||||
|
||||
- ~~i386/floatdidf.S~~
|
||||
- ~~i386/floatdisf.S~~
|
||||
- ~~i386/floatundidf.S~~
|
||||
- ~~i386/floatundisf.S~~
|
||||
- ~~x86_64/floatundidf.S~~
|
||||
- ~~x86_64/floatundisf.S~~
|
||||
- ~~x86_64/floatdidf.c~~
|
||||
- ~~x86_64/floatdisf.c~~
|
||||
|
||||
Unsupported in any current target: used on old versions of 32-bit iOS with ARMv5.
|
||||
|
||||
- ~~arm/adddf3vfp.S~~
|
||||
- ~~arm/addsf3vfp.S~~
|
||||
- ~~arm/divdf3vfp.S~~
|
||||
- ~~arm/divsf3vfp.S~~
|
||||
- ~~arm/eqdf2vfp.S~~
|
||||
- ~~arm/eqsf2vfp.S~~
|
||||
- ~~arm/extendsfdf2vfp.S~~
|
||||
- ~~arm/fixdfsivfp.S~~
|
||||
- ~~arm/fixsfsivfp.S~~
|
||||
- ~~arm/fixunsdfsivfp.S~~
|
||||
- ~~arm/fixunssfsivfp.S~~
|
||||
- ~~arm/floatsidfvfp.S~~
|
||||
- ~~arm/floatsisfvfp.S~~
|
||||
- ~~arm/floatunssidfvfp.S~~
|
||||
- ~~arm/floatunssisfvfp.S~~
|
||||
- ~~arm/gedf2vfp.S~~
|
||||
- ~~arm/gesf2vfp.S~~
|
||||
- ~~arm/gtdf2vfp.S~~
|
||||
- ~~arm/gtsf2vfp.S~~
|
||||
- ~~arm/ledf2vfp.S~~
|
||||
- ~~arm/lesf2vfp.S~~
|
||||
- ~~arm/ltdf2vfp.S~~
|
||||
- ~~arm/ltsf2vfp.S~~
|
||||
- ~~arm/muldf3vfp.S~~
|
||||
- ~~arm/mulsf3vfp.S~~
|
||||
- ~~arm/nedf2vfp.S~~
|
||||
- ~~arm/negdf2vfp.S~~
|
||||
- ~~arm/negsf2vfp.S~~
|
||||
- ~~arm/nesf2vfp.S~~
|
||||
- ~~arm/subdf3vfp.S~~
|
||||
- ~~arm/subsf3vfp.S~~
|
||||
- ~~arm/truncdfsf2vfp.S~~
|
||||
- ~~arm/unorddf2vfp.S~~
|
||||
- ~~arm/unordsf2vfp.S~~
|
||||
|
||||
## License
|
||||
|
||||
Usage is allowed under the [MIT License] and the [Apache License, Version 2.0]
|
||||
with the LLVM exception.
|
||||
|
||||
[MIT License]: https://opensource.org/license/mit
|
||||
[Apache License, Version 2.0]: htps://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
### Contribution
|
||||
|
||||
Contributions are licensed under the MIT License, the Apache License,
|
||||
Version 2.0, and the Apache-2.0 license with the LLVM exception.
|
||||
|
||||
See [LICENSE.txt](../LICENSE.txt) for full details.
|
||||
712
library/compiler-builtins/compiler-builtins/build.rs
Normal file
712
library/compiler-builtins/compiler-builtins/build.rs
Normal file
|
|
@ -0,0 +1,712 @@
|
|||
mod configure;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use configure::{Target, configure_aliases, configure_f16_f128};
|
||||
|
||||
fn main() {
|
||||
println!("cargo::rerun-if-changed=build.rs");
|
||||
println!("cargo::rerun-if-changed=configure.rs");
|
||||
|
||||
let target = Target::from_env();
|
||||
let cwd = env::current_dir().unwrap();
|
||||
|
||||
configure_check_cfg();
|
||||
configure_f16_f128(&target);
|
||||
configure_aliases(&target);
|
||||
|
||||
configure_libm(&target);
|
||||
|
||||
println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display());
|
||||
|
||||
// Emscripten's runtime includes all the builtins
|
||||
if target.os == "emscripten" {
|
||||
return;
|
||||
}
|
||||
|
||||
// OpenBSD provides compiler_rt by default, use it instead of rebuilding it from source
|
||||
if target.os == "openbsd" {
|
||||
println!("cargo:rustc-link-search=native=/usr/lib");
|
||||
println!("cargo:rustc-link-lib=compiler_rt");
|
||||
return;
|
||||
}
|
||||
|
||||
// Forcibly enable memory intrinsics on wasm & SGX as we don't have a libc to
|
||||
// provide them.
|
||||
if (target.triple.contains("wasm") && !target.triple.contains("wasi"))
|
||||
|| (target.triple.contains("sgx") && target.triple.contains("fortanix"))
|
||||
|| target.triple.contains("-none")
|
||||
|| target.triple.contains("nvptx")
|
||||
|| target.triple.contains("uefi")
|
||||
|| target.triple.contains("xous")
|
||||
{
|
||||
println!("cargo:rustc-cfg=feature=\"mem\"");
|
||||
}
|
||||
|
||||
// These targets have hardware unaligned access support.
|
||||
println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))");
|
||||
if target.arch.contains("x86_64")
|
||||
|| target.arch.contains("x86")
|
||||
|| target.arch.contains("aarch64")
|
||||
|| target.arch.contains("bpf")
|
||||
{
|
||||
println!("cargo:rustc-cfg=feature=\"mem-unaligned\"");
|
||||
}
|
||||
|
||||
// NOTE we are going to assume that llvm-target, what determines our codegen option, matches the
|
||||
// target triple. This is usually correct for our built-in targets but can break in presence of
|
||||
// custom targets, which can have arbitrary names.
|
||||
let llvm_target = target.triple.split('-').collect::<Vec<_>>();
|
||||
|
||||
// Build missing intrinsics from compiler-rt C source code. If we're
|
||||
// mangling names though we assume that we're also in test mode so we don't
|
||||
// build anything and we rely on the upstream implementation of compiler-rt
|
||||
// functions
|
||||
if !cfg!(feature = "mangled-names") && cfg!(feature = "c") {
|
||||
// Don't use a C compiler for these targets:
|
||||
//
|
||||
// * nvptx - everything is bitcode, not compatible with mixed C/Rust
|
||||
if !target.arch.contains("nvptx") {
|
||||
#[cfg(feature = "c")]
|
||||
c::compile(&llvm_target, &target);
|
||||
}
|
||||
}
|
||||
|
||||
// Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This
|
||||
// includes the old androideabi. It is deprecated but it is available as a
|
||||
// rustc target (arm-linux-androideabi).
|
||||
println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)");
|
||||
if llvm_target[0] == "armv4t"
|
||||
|| llvm_target[0] == "armv5te"
|
||||
|| target.triple == "arm-linux-androideabi"
|
||||
{
|
||||
println!("cargo:rustc-cfg=kernel_user_helpers")
|
||||
}
|
||||
|
||||
if llvm_target[0].starts_with("aarch64") {
|
||||
generate_aarch64_outlined_atomics();
|
||||
}
|
||||
}
|
||||
|
||||
/// Run configuration for `libm` since it is included directly.
|
||||
///
|
||||
/// Much of this is copied from `libm/configure.rs`.
|
||||
fn configure_libm(target: &Target) {
|
||||
println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)");
|
||||
println!("cargo:rustc-check-cfg=cfg(arch_enabled)");
|
||||
println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
|
||||
println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable-public-internals\"))");
|
||||
|
||||
// Always use intrinsics
|
||||
println!("cargo:rustc-cfg=intrinsics_enabled");
|
||||
|
||||
// The arch module may contain assembly.
|
||||
if !cfg!(feature = "no-asm") {
|
||||
println!("cargo:rustc-cfg=arch_enabled");
|
||||
}
|
||||
|
||||
println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
|
||||
if !matches!(target.opt_level.as_str(), "0" | "1") {
|
||||
println!("cargo:rustc-cfg=optimizations_enabled");
|
||||
}
|
||||
|
||||
// Config shorthands
|
||||
println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
|
||||
if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") {
|
||||
// Shorthand to detect i586 targets
|
||||
println!("cargo:rustc-cfg=x86_no_sse");
|
||||
}
|
||||
|
||||
println!(
|
||||
"cargo:rustc-env=CFG_CARGO_FEATURES={:?}",
|
||||
target.cargo_features
|
||||
);
|
||||
println!("cargo:rustc-env=CFG_OPT_LEVEL={}", target.opt_level);
|
||||
println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", target.features);
|
||||
|
||||
// Activate libm's unstable features to make full use of Nightly.
|
||||
println!("cargo:rustc-cfg=feature=\"unstable-intrinsics\"");
|
||||
}
|
||||
|
||||
fn aarch64_symbol(ordering: Ordering) -> &'static str {
|
||||
match ordering {
|
||||
Ordering::Relaxed => "relax",
|
||||
Ordering::Acquire => "acq",
|
||||
Ordering::Release => "rel",
|
||||
Ordering::AcqRel => "acq_rel",
|
||||
_ => panic!("unknown symbol for {ordering:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items.
|
||||
/// Define them from the build script instead.
|
||||
/// Note that the majority of the code is still defined in `aarch64.rs` through inline macros.
|
||||
fn generate_aarch64_outlined_atomics() {
|
||||
use std::fmt::Write;
|
||||
// #[macro_export] so that we can use this in tests
|
||||
let gen_macro =
|
||||
|name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n");
|
||||
|
||||
// Generate different macros for add/clr/eor/set so that we can test them separately.
|
||||
let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"];
|
||||
let mut macros = BTreeMap::new();
|
||||
for sym in sym_names {
|
||||
macros.insert(sym, gen_macro(sym));
|
||||
}
|
||||
|
||||
// Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
|
||||
let mut cas16 = gen_macro("cas16");
|
||||
|
||||
for ordering in [
|
||||
Ordering::Relaxed,
|
||||
Ordering::Acquire,
|
||||
Ordering::Release,
|
||||
Ordering::AcqRel,
|
||||
] {
|
||||
let sym_ordering = aarch64_symbol(ordering);
|
||||
for size in [1, 2, 4, 8] {
|
||||
for (sym, macro_) in &mut macros {
|
||||
let name = format!("__aarch64_{sym}{size}_{sym_ordering}");
|
||||
writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap();
|
||||
}
|
||||
}
|
||||
let name = format!("__aarch64_cas16_{sym_ordering}");
|
||||
writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap();
|
||||
}
|
||||
|
||||
let mut buf = String::new();
|
||||
for macro_def in macros.values().chain(std::iter::once(&cas16)) {
|
||||
buf += macro_def;
|
||||
buf += "}; }\n";
|
||||
}
|
||||
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
|
||||
std::fs::write(out_dir.join("outlined_atomics.rs"), buf).unwrap();
|
||||
}
|
||||
|
||||
/// Emit directives for features we expect to support that aren't in `Cargo.toml`.
|
||||
///
|
||||
/// These are mostly cfg elements emitted by this `build.rs`.
|
||||
fn configure_check_cfg() {
|
||||
// Functions where we can set the "optimized-c" flag
|
||||
const HAS_OPTIMIZED_C: &[&str] = &[
|
||||
"__ashldi3",
|
||||
"__ashlsi3",
|
||||
"__ashrdi3",
|
||||
"__ashrsi3",
|
||||
"__bswapsi2",
|
||||
"__bswapdi2",
|
||||
"__bswapti2",
|
||||
"__divdi3",
|
||||
"__divsi3",
|
||||
"__divmoddi4",
|
||||
"__divmodsi4",
|
||||
"__divmodsi4",
|
||||
"__divmodti4",
|
||||
"__lshrdi3",
|
||||
"__lshrsi3",
|
||||
"__moddi3",
|
||||
"__modsi3",
|
||||
"__muldi3",
|
||||
"__udivdi3",
|
||||
"__udivmoddi4",
|
||||
"__udivmodsi4",
|
||||
"__udivsi3",
|
||||
"__umoddi3",
|
||||
"__umodsi3",
|
||||
];
|
||||
|
||||
// Build a list of all aarch64 atomic operation functions
|
||||
let mut aarch_atomic = Vec::new();
|
||||
for aarch_op in ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"] {
|
||||
let op_sizes = if aarch_op == "cas" {
|
||||
[1, 2, 4, 8, 16].as_slice()
|
||||
} else {
|
||||
[1, 2, 4, 8].as_slice()
|
||||
};
|
||||
|
||||
for op_size in op_sizes {
|
||||
for ordering in ["relax", "acq", "rel", "acq_rel"] {
|
||||
aarch_atomic.push(format!("__aarch64_{aarch_op}{op_size}_{ordering}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for fn_name in HAS_OPTIMIZED_C
|
||||
.iter()
|
||||
.copied()
|
||||
.chain(aarch_atomic.iter().map(|s| s.as_str()))
|
||||
{
|
||||
println!("cargo::rustc-check-cfg=cfg({fn_name}, values(\"optimized-c\"))",);
|
||||
}
|
||||
|
||||
// Rustc is unaware of sparc target features, but this does show up from
|
||||
// `rustc --print target-features --target sparc64-unknown-linux-gnu`.
|
||||
println!("cargo::rustc-check-cfg=cfg(target_feature, values(\"vis3\"))");
|
||||
|
||||
// FIXME: these come from libm and should be changed there
|
||||
println!("cargo::rustc-check-cfg=cfg(feature, values(\"checked\"))");
|
||||
println!("cargo::rustc-check-cfg=cfg(assert_no_panic)");
|
||||
}
|
||||
|
||||
#[cfg(feature = "c")]
|
||||
mod c {
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::env;
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use super::Target;
|
||||
|
||||
struct Sources {
|
||||
// SYMBOL -> PATH TO SOURCE
|
||||
map: BTreeMap<&'static str, &'static str>,
|
||||
}
|
||||
|
||||
impl Sources {
|
||||
fn new() -> Sources {
|
||||
Sources {
|
||||
map: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn extend(&mut self, sources: &[(&'static str, &'static str)]) {
|
||||
// NOTE Some intrinsics have both a generic implementation (e.g.
|
||||
// `floatdidf.c`) and an arch optimized implementation
|
||||
// (`x86_64/floatdidf.c`). In those cases, we keep the arch optimized
|
||||
// implementation and discard the generic implementation. If we don't
|
||||
// and keep both implementations, the linker will yell at us about
|
||||
// duplicate symbols!
|
||||
for (symbol, src) in sources {
|
||||
if src.contains("/") {
|
||||
// Arch-optimized implementation (preferred)
|
||||
self.map.insert(symbol, src);
|
||||
} else {
|
||||
// Generic implementation
|
||||
if !self.map.contains_key(symbol) {
|
||||
self.map.insert(symbol, src);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn remove(&mut self, symbols: &[&str]) {
|
||||
for symbol in symbols {
|
||||
self.map.remove(*symbol).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compile intrinsics from the compiler-rt C source code
|
||||
pub fn compile(llvm_target: &[&str], target: &Target) {
|
||||
let mut consider_float_intrinsics = true;
|
||||
let cfg = &mut cc::Build::new();
|
||||
|
||||
// AArch64 GCCs exit with an error condition when they encounter any kind of floating point
|
||||
// code if the `nofp` and/or `nosimd` compiler flags have been set.
|
||||
//
|
||||
// Therefore, evaluate if those flags are present and set a boolean that causes any
|
||||
// compiler-rt intrinsics that contain floating point source to be excluded for this target.
|
||||
if target.arch == "aarch64" {
|
||||
let cflags_key = String::from("CFLAGS_") + &(target.triple.replace("-", "_"));
|
||||
if let Ok(cflags_value) = env::var(cflags_key) {
|
||||
if cflags_value.contains("+nofp") || cflags_value.contains("+nosimd") {
|
||||
consider_float_intrinsics = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// `compiler-rt` requires `COMPILER_RT_HAS_FLOAT16` to be defined to make it use the
|
||||
// `_Float16` type for `f16` intrinsics. This shouldn't matter as all existing `f16`
|
||||
// intrinsics have been ported to Rust in `compiler-builtins` as C compilers don't
|
||||
// support `_Float16` on all targets (whereas Rust does). However, define the macro
|
||||
// anyway to prevent issues like rust#118813 and rust#123885 silently reoccuring if more
|
||||
// `f16` intrinsics get accidentally added here in the future.
|
||||
cfg.define("COMPILER_RT_HAS_FLOAT16", None);
|
||||
|
||||
cfg.warnings(false);
|
||||
|
||||
if target.env == "msvc" {
|
||||
// Don't pull in extra libraries on MSVC
|
||||
cfg.flag("/Zl");
|
||||
|
||||
// Emulate C99 and C++11's __func__ for MSVC prior to 2013 CTP
|
||||
cfg.define("__func__", Some("__FUNCTION__"));
|
||||
} else {
|
||||
// Turn off various features of gcc and such, mostly copying
|
||||
// compiler-rt's build system already
|
||||
cfg.flag("-fno-builtin");
|
||||
cfg.flag("-fvisibility=hidden");
|
||||
cfg.flag("-ffreestanding");
|
||||
// Avoid the following warning appearing once **per file**:
|
||||
// clang: warning: optimization flag '-fomit-frame-pointer' is not supported for target 'armv7' [-Wignored-optimization-argument]
|
||||
//
|
||||
// Note that compiler-rt's build system also checks
|
||||
//
|
||||
// `check_cxx_compiler_flag(-fomit-frame-pointer COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG)`
|
||||
//
|
||||
// in https://github.com/rust-lang/compiler-rt/blob/c8fbcb3/cmake/config-ix.cmake#L19.
|
||||
cfg.flag_if_supported("-fomit-frame-pointer");
|
||||
cfg.define("VISIBILITY_HIDDEN", None);
|
||||
|
||||
if let "aarch64" | "arm64ec" = target.arch.as_str() {
|
||||
// FIXME(llvm20): Older GCCs on A64 fail to build with
|
||||
// -Werror=implicit-function-declaration due to a compiler-rt bug.
|
||||
// With a newer LLVM we should be able to enable the flag everywhere.
|
||||
// https://github.com/llvm/llvm-project/commit/8aa9d6206ce55bdaaf422839c351fbd63f033b89
|
||||
} else {
|
||||
// Avoid implicitly creating references to undefined functions
|
||||
cfg.flag("-Werror=implicit-function-declaration");
|
||||
}
|
||||
}
|
||||
|
||||
// int_util.c tries to include stdlib.h if `_WIN32` is defined,
|
||||
// which it is when compiling UEFI targets with clang. This is
|
||||
// at odds with compiling with `-ffreestanding`, as the header
|
||||
// may be incompatible or not present. Create a minimal stub
|
||||
// header to use instead.
|
||||
if target.os == "uefi" {
|
||||
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
||||
let include_dir = out_dir.join("include");
|
||||
if !include_dir.exists() {
|
||||
fs::create_dir(&include_dir).unwrap();
|
||||
}
|
||||
fs::write(include_dir.join("stdlib.h"), "#include <stddef.h>").unwrap();
|
||||
cfg.flag(&format!("-I{}", include_dir.to_str().unwrap()));
|
||||
}
|
||||
|
||||
let mut sources = Sources::new();
|
||||
sources.extend(&[
|
||||
("__absvdi2", "absvdi2.c"),
|
||||
("__absvsi2", "absvsi2.c"),
|
||||
("__addvdi3", "addvdi3.c"),
|
||||
("__addvsi3", "addvsi3.c"),
|
||||
("__cmpdi2", "cmpdi2.c"),
|
||||
("__int_util", "int_util.c"),
|
||||
("__mulvdi3", "mulvdi3.c"),
|
||||
("__mulvsi3", "mulvsi3.c"),
|
||||
("__negdi2", "negdi2.c"),
|
||||
("__negvdi2", "negvdi2.c"),
|
||||
("__negvsi2", "negvsi2.c"),
|
||||
("__paritydi2", "paritydi2.c"),
|
||||
("__paritysi2", "paritysi2.c"),
|
||||
("__popcountdi2", "popcountdi2.c"),
|
||||
("__popcountsi2", "popcountsi2.c"),
|
||||
("__subvdi3", "subvdi3.c"),
|
||||
("__subvsi3", "subvsi3.c"),
|
||||
("__ucmpdi2", "ucmpdi2.c"),
|
||||
]);
|
||||
|
||||
if consider_float_intrinsics {
|
||||
sources.extend(&[
|
||||
("__divdc3", "divdc3.c"),
|
||||
("__divsc3", "divsc3.c"),
|
||||
("__muldc3", "muldc3.c"),
|
||||
("__mulsc3", "mulsc3.c"),
|
||||
("__negdf2", "negdf2.c"),
|
||||
("__negsf2", "negsf2.c"),
|
||||
]);
|
||||
}
|
||||
|
||||
// On iOS and 32-bit OSX these are all just empty intrinsics, no need to
|
||||
// include them.
|
||||
if target.vendor != "apple" || target.arch != "x86" {
|
||||
sources.extend(&[
|
||||
("__absvti2", "absvti2.c"),
|
||||
("__addvti3", "addvti3.c"),
|
||||
("__cmpti2", "cmpti2.c"),
|
||||
("__ffsti2", "ffsti2.c"),
|
||||
("__mulvti3", "mulvti3.c"),
|
||||
("__negti2", "negti2.c"),
|
||||
("__parityti2", "parityti2.c"),
|
||||
("__popcountti2", "popcountti2.c"),
|
||||
("__subvti3", "subvti3.c"),
|
||||
("__ucmpti2", "ucmpti2.c"),
|
||||
]);
|
||||
|
||||
if consider_float_intrinsics {
|
||||
sources.extend(&[("__negvti2", "negvti2.c")]);
|
||||
}
|
||||
}
|
||||
|
||||
if target.vendor == "apple" {
|
||||
sources.extend(&[
|
||||
("atomic_flag_clear", "atomic_flag_clear.c"),
|
||||
("atomic_flag_clear_explicit", "atomic_flag_clear_explicit.c"),
|
||||
("atomic_flag_test_and_set", "atomic_flag_test_and_set.c"),
|
||||
(
|
||||
"atomic_flag_test_and_set_explicit",
|
||||
"atomic_flag_test_and_set_explicit.c",
|
||||
),
|
||||
("atomic_signal_fence", "atomic_signal_fence.c"),
|
||||
("atomic_thread_fence", "atomic_thread_fence.c"),
|
||||
]);
|
||||
}
|
||||
|
||||
if target.env != "msvc" {
|
||||
if target.arch == "x86" {
|
||||
sources.extend(&[
|
||||
("__ashldi3", "i386/ashldi3.S"),
|
||||
("__ashrdi3", "i386/ashrdi3.S"),
|
||||
("__divdi3", "i386/divdi3.S"),
|
||||
("__lshrdi3", "i386/lshrdi3.S"),
|
||||
("__moddi3", "i386/moddi3.S"),
|
||||
("__muldi3", "i386/muldi3.S"),
|
||||
("__udivdi3", "i386/udivdi3.S"),
|
||||
("__umoddi3", "i386/umoddi3.S"),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
if target.arch == "arm" && target.vendor != "apple" && target.env != "msvc" {
|
||||
sources.extend(&[
|
||||
("__aeabi_div0", "arm/aeabi_div0.c"),
|
||||
("__aeabi_drsub", "arm/aeabi_drsub.c"),
|
||||
("__aeabi_frsub", "arm/aeabi_frsub.c"),
|
||||
("__bswapdi2", "arm/bswapdi2.S"),
|
||||
("__bswapsi2", "arm/bswapsi2.S"),
|
||||
("__divmodsi4", "arm/divmodsi4.S"),
|
||||
("__divsi3", "arm/divsi3.S"),
|
||||
("__modsi3", "arm/modsi3.S"),
|
||||
("__switch16", "arm/switch16.S"),
|
||||
("__switch32", "arm/switch32.S"),
|
||||
("__switch8", "arm/switch8.S"),
|
||||
("__switchu8", "arm/switchu8.S"),
|
||||
("__sync_synchronize", "arm/sync_synchronize.S"),
|
||||
("__udivmodsi4", "arm/udivmodsi4.S"),
|
||||
("__udivsi3", "arm/udivsi3.S"),
|
||||
("__umodsi3", "arm/umodsi3.S"),
|
||||
]);
|
||||
|
||||
if target.os == "freebsd" {
|
||||
sources.extend(&[("__clear_cache", "clear_cache.c")]);
|
||||
}
|
||||
|
||||
// First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM.
|
||||
// Second are little-endian only, so build fail on big-endian targets.
|
||||
// Temporally workaround: exclude these files for big-endian targets.
|
||||
if !llvm_target[0].starts_with("thumbeb") && !llvm_target[0].starts_with("armeb") {
|
||||
sources.extend(&[
|
||||
("__aeabi_cdcmp", "arm/aeabi_cdcmp.S"),
|
||||
("__aeabi_cdcmpeq_check_nan", "arm/aeabi_cdcmpeq_check_nan.c"),
|
||||
("__aeabi_cfcmp", "arm/aeabi_cfcmp.S"),
|
||||
("__aeabi_cfcmpeq_check_nan", "arm/aeabi_cfcmpeq_check_nan.c"),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
if llvm_target[0] == "armv7" {
|
||||
sources.extend(&[
|
||||
("__sync_fetch_and_add_4", "arm/sync_fetch_and_add_4.S"),
|
||||
("__sync_fetch_and_add_8", "arm/sync_fetch_and_add_8.S"),
|
||||
("__sync_fetch_and_and_4", "arm/sync_fetch_and_and_4.S"),
|
||||
("__sync_fetch_and_and_8", "arm/sync_fetch_and_and_8.S"),
|
||||
("__sync_fetch_and_max_4", "arm/sync_fetch_and_max_4.S"),
|
||||
("__sync_fetch_and_max_8", "arm/sync_fetch_and_max_8.S"),
|
||||
("__sync_fetch_and_min_4", "arm/sync_fetch_and_min_4.S"),
|
||||
("__sync_fetch_and_min_8", "arm/sync_fetch_and_min_8.S"),
|
||||
("__sync_fetch_and_nand_4", "arm/sync_fetch_and_nand_4.S"),
|
||||
("__sync_fetch_and_nand_8", "arm/sync_fetch_and_nand_8.S"),
|
||||
("__sync_fetch_and_or_4", "arm/sync_fetch_and_or_4.S"),
|
||||
("__sync_fetch_and_or_8", "arm/sync_fetch_and_or_8.S"),
|
||||
("__sync_fetch_and_sub_4", "arm/sync_fetch_and_sub_4.S"),
|
||||
("__sync_fetch_and_sub_8", "arm/sync_fetch_and_sub_8.S"),
|
||||
("__sync_fetch_and_umax_4", "arm/sync_fetch_and_umax_4.S"),
|
||||
("__sync_fetch_and_umax_8", "arm/sync_fetch_and_umax_8.S"),
|
||||
("__sync_fetch_and_umin_4", "arm/sync_fetch_and_umin_4.S"),
|
||||
("__sync_fetch_and_umin_8", "arm/sync_fetch_and_umin_8.S"),
|
||||
("__sync_fetch_and_xor_4", "arm/sync_fetch_and_xor_4.S"),
|
||||
("__sync_fetch_and_xor_8", "arm/sync_fetch_and_xor_8.S"),
|
||||
]);
|
||||
}
|
||||
|
||||
if llvm_target.last().unwrap().ends_with("eabihf") {
|
||||
if !llvm_target[0].starts_with("thumbv7em")
|
||||
&& !llvm_target[0].starts_with("thumbv8m.main")
|
||||
{
|
||||
// The FPU option chosen for these architectures in cc-rs, ie:
|
||||
// -mfpu=fpv4-sp-d16 for thumbv7em
|
||||
// -mfpu=fpv5-sp-d16 for thumbv8m.main
|
||||
// do not support double precision floating points conversions so the files
|
||||
// that include such instructions are not included for these targets.
|
||||
sources.extend(&[
|
||||
("__fixdfsivfp", "arm/fixdfsivfp.S"),
|
||||
("__fixunsdfsivfp", "arm/fixunsdfsivfp.S"),
|
||||
("__floatsidfvfp", "arm/floatsidfvfp.S"),
|
||||
("__floatunssidfvfp", "arm/floatunssidfvfp.S"),
|
||||
]);
|
||||
}
|
||||
|
||||
sources.extend(&[
|
||||
("__fixsfsivfp", "arm/fixsfsivfp.S"),
|
||||
("__fixunssfsivfp", "arm/fixunssfsivfp.S"),
|
||||
("__floatsisfvfp", "arm/floatsisfvfp.S"),
|
||||
("__floatunssisfvfp", "arm/floatunssisfvfp.S"),
|
||||
("__floatunssisfvfp", "arm/floatunssisfvfp.S"),
|
||||
("__restore_vfp_d8_d15_regs", "arm/restore_vfp_d8_d15_regs.S"),
|
||||
("__save_vfp_d8_d15_regs", "arm/save_vfp_d8_d15_regs.S"),
|
||||
("__negdf2vfp", "arm/negdf2vfp.S"),
|
||||
("__negsf2vfp", "arm/negsf2vfp.S"),
|
||||
]);
|
||||
}
|
||||
|
||||
if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics {
|
||||
sources.extend(&[
|
||||
("__comparetf2", "comparetf2.c"),
|
||||
("__fe_getround", "fp_mode.c"),
|
||||
("__fe_raise_inexact", "fp_mode.c"),
|
||||
]);
|
||||
|
||||
if target.os != "windows" && target.os != "cygwin" {
|
||||
sources.extend(&[("__multc3", "multc3.c")]);
|
||||
}
|
||||
}
|
||||
|
||||
if target.arch == "mips" || target.arch == "riscv32" || target.arch == "riscv64" {
|
||||
sources.extend(&[("__bswapsi2", "bswapsi2.c")]);
|
||||
}
|
||||
|
||||
if target.arch == "mips64" {
|
||||
sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
|
||||
}
|
||||
|
||||
if target.arch == "loongarch64" {
|
||||
sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
|
||||
}
|
||||
|
||||
// Remove the assembly implementations that won't compile for the target
|
||||
if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target.os == "uefi"
|
||||
{
|
||||
let mut to_remove = Vec::new();
|
||||
for (k, v) in sources.map.iter() {
|
||||
if v.ends_with(".S") {
|
||||
to_remove.push(*k);
|
||||
}
|
||||
}
|
||||
sources.remove(&to_remove);
|
||||
}
|
||||
|
||||
if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" {
|
||||
sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]);
|
||||
}
|
||||
|
||||
// Android and Cygwin uses emulated TLS so we need a runtime support function.
|
||||
if target.os == "android" || target.os == "cygwin" {
|
||||
sources.extend(&[("__emutls_get_address", "emutls.c")]);
|
||||
}
|
||||
|
||||
// Work around a bug in the NDK headers (fixed in
|
||||
// https://r.android.com/2038949 which will be released in a future
|
||||
// NDK version) by providing a definition of LONG_BIT.
|
||||
if target.os == "android" {
|
||||
cfg.define("LONG_BIT", "(8 * sizeof(long))");
|
||||
}
|
||||
|
||||
// OpenHarmony also uses emulated TLS.
|
||||
if target.env == "ohos" {
|
||||
sources.extend(&[("__emutls_get_address", "emutls.c")]);
|
||||
}
|
||||
|
||||
// When compiling the C code we require the user to tell us where the
|
||||
// source code is, and this is largely done so when we're compiling as
|
||||
// part of rust-lang/rust we can use the same llvm-project repository as
|
||||
// rust-lang/rust.
|
||||
let root = match env::var_os("RUST_COMPILER_RT_ROOT") {
|
||||
Some(s) => PathBuf::from(s),
|
||||
None => {
|
||||
panic!(
|
||||
"RUST_COMPILER_RT_ROOT is not set. You may need to run \
|
||||
`ci/download-compiler-rt.sh`."
|
||||
);
|
||||
}
|
||||
};
|
||||
if !root.exists() {
|
||||
panic!("RUST_COMPILER_RT_ROOT={} does not exist", root.display());
|
||||
}
|
||||
|
||||
// Support deterministic builds by remapping the __FILE__ prefix if the
|
||||
// compiler supports it. This fixes the nondeterminism caused by the
|
||||
// use of that macro in lib/builtins/int_util.h in compiler-rt.
|
||||
cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display()));
|
||||
|
||||
// Include out-of-line atomics for aarch64, which are all generated by supplying different
|
||||
// sets of flags to the same source file.
|
||||
// Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430) and
|
||||
// on uefi.
|
||||
let src_dir = root.join("lib/builtins");
|
||||
if target.arch == "aarch64" && target.env != "msvc" && target.os != "uefi" {
|
||||
// See below for why we're building these as separate libraries.
|
||||
build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg);
|
||||
|
||||
// Some run-time CPU feature detection is necessary, as well.
|
||||
let cpu_model_src = if src_dir.join("cpu_model.c").exists() {
|
||||
"cpu_model.c"
|
||||
} else {
|
||||
"cpu_model/aarch64.c"
|
||||
};
|
||||
sources.extend(&[("__aarch64_have_lse_atomics", cpu_model_src)]);
|
||||
}
|
||||
|
||||
let mut added_sources = HashSet::new();
|
||||
for (sym, src) in sources.map.iter() {
|
||||
let src = src_dir.join(src);
|
||||
if added_sources.insert(src.clone()) {
|
||||
cfg.file(&src);
|
||||
println!("cargo:rerun-if-changed={}", src.display());
|
||||
}
|
||||
println!("cargo:rustc-cfg={}=\"optimized-c\"", sym);
|
||||
}
|
||||
|
||||
cfg.compile("libcompiler-rt.a");
|
||||
}
|
||||
|
||||
fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &mut cc::Build) {
|
||||
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
||||
let outlined_atomics_file = builtins_dir.join("aarch64").join("lse.S");
|
||||
println!("cargo:rerun-if-changed={}", outlined_atomics_file.display());
|
||||
|
||||
cfg.include(&builtins_dir);
|
||||
|
||||
for instruction_type in &["cas", "swp", "ldadd", "ldclr", "ldeor", "ldset"] {
|
||||
for size in &[1, 2, 4, 8, 16] {
|
||||
if *size == 16 && *instruction_type != "cas" {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (model_number, model_name) in
|
||||
&[(1, "relax"), (2, "acq"), (3, "rel"), (4, "acq_rel")]
|
||||
{
|
||||
// The original compiler-rt build system compiles the same
|
||||
// source file multiple times with different compiler
|
||||
// options. Here we do something slightly different: we
|
||||
// create multiple .S files with the proper #defines and
|
||||
// then include the original file.
|
||||
//
|
||||
// This is needed because the cc crate doesn't allow us to
|
||||
// override the name of object files and libtool requires
|
||||
// all objects in an archive to have unique names.
|
||||
let path =
|
||||
out_dir.join(format!("lse_{}{}_{}.S", instruction_type, size, model_name));
|
||||
let mut file = File::create(&path).unwrap();
|
||||
writeln!(file, "#define L_{}", instruction_type).unwrap();
|
||||
writeln!(file, "#define SIZE {}", size).unwrap();
|
||||
writeln!(file, "#define MODEL {}", model_number).unwrap();
|
||||
writeln!(
|
||||
file,
|
||||
"#include \"{}\"",
|
||||
outlined_atomics_file.canonicalize().unwrap().display()
|
||||
)
|
||||
.unwrap();
|
||||
drop(file);
|
||||
cfg.file(path);
|
||||
|
||||
let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name);
|
||||
println!("cargo:rustc-cfg={}=\"optimized-c\"", sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
136
library/compiler-builtins/compiler-builtins/configure.rs
Normal file
136
library/compiler-builtins/compiler-builtins/configure.rs
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
// Configuration that is shared between `compiler_builtins` and `builtins_test`.
|
||||
|
||||
use std::env;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub struct Target {
|
||||
pub triple: String,
|
||||
pub triple_split: Vec<String>,
|
||||
pub opt_level: String,
|
||||
pub cargo_features: Vec<String>,
|
||||
pub os: String,
|
||||
pub arch: String,
|
||||
pub vendor: String,
|
||||
pub env: String,
|
||||
pub pointer_width: u8,
|
||||
pub little_endian: bool,
|
||||
pub features: Vec<String>,
|
||||
}
|
||||
|
||||
impl Target {
|
||||
pub fn from_env() -> Self {
|
||||
let triple = env::var("TARGET").unwrap();
|
||||
let triple_split = triple.split('-').map(ToOwned::to_owned).collect();
|
||||
let little_endian = match env::var("CARGO_CFG_TARGET_ENDIAN").unwrap().as_str() {
|
||||
"little" => true,
|
||||
"big" => false,
|
||||
x => panic!("unknown endian {x}"),
|
||||
};
|
||||
let cargo_features = env::vars()
|
||||
.filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned))
|
||||
.map(|s| s.to_lowercase().replace("_", "-"))
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
triple,
|
||||
triple_split,
|
||||
os: env::var("CARGO_CFG_TARGET_OS").unwrap(),
|
||||
opt_level: env::var("OPT_LEVEL").unwrap(),
|
||||
cargo_features,
|
||||
arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
|
||||
vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
|
||||
env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
|
||||
pointer_width: env::var("CARGO_CFG_TARGET_POINTER_WIDTH")
|
||||
.unwrap()
|
||||
.parse()
|
||||
.unwrap(),
|
||||
little_endian,
|
||||
features: env::var("CARGO_CFG_TARGET_FEATURE")
|
||||
.unwrap_or_default()
|
||||
.split(",")
|
||||
.map(ToOwned::to_owned)
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn has_feature(&self, feature: &str) -> bool {
|
||||
self.features.iter().any(|f| f == feature)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn configure_aliases(target: &Target) {
|
||||
// To compile builtins-test-intrinsics for thumb targets, where there is no libc
|
||||
println!("cargo::rustc-check-cfg=cfg(thumb)");
|
||||
if target.triple_split[0].starts_with("thumb") {
|
||||
println!("cargo:rustc-cfg=thumb")
|
||||
}
|
||||
|
||||
// compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because
|
||||
// these targets do not have full Thumb-2 support but only original Thumb-1.
|
||||
// We have to cfg our code accordingly.
|
||||
println!("cargo::rustc-check-cfg=cfg(thumb_1)");
|
||||
if target.triple_split[0] == "thumbv6m" || target.triple_split[0] == "thumbv8m.base" {
|
||||
println!("cargo:rustc-cfg=thumb_1")
|
||||
}
|
||||
}
|
||||
|
||||
/// Configure whether or not `f16` and `f128` support should be enabled.
|
||||
pub fn configure_f16_f128(target: &Target) {
|
||||
// Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means
|
||||
// that the backend will not crash when using these types and generates code that can be called
|
||||
// without crashing (no infinite recursion). This does not mean that the platform doesn't have
|
||||
// ABI or other bugs.
|
||||
//
|
||||
// We do this here rather than in `rust-lang/rust` because configuring via cargo features is
|
||||
// not straightforward.
|
||||
//
|
||||
// Original source of this list:
|
||||
// <https://github.com/rust-lang/compiler-builtins/pull/652#issuecomment-2266151350>
|
||||
let f16_enabled = match target.arch.as_str() {
|
||||
// Unsupported <https://github.com/llvm/llvm-project/issues/94434>
|
||||
"arm64ec" => false,
|
||||
// Selection failure <https://github.com/llvm/llvm-project/issues/50374>
|
||||
"s390x" => false,
|
||||
// Infinite recursion <https://github.com/llvm/llvm-project/issues/97981>
|
||||
"csky" => false,
|
||||
"hexagon" => false,
|
||||
"powerpc" | "powerpc64" => false,
|
||||
"sparc" | "sparc64" => false,
|
||||
"wasm32" | "wasm64" => false,
|
||||
// Most everything else works as of LLVM 19
|
||||
_ => true,
|
||||
};
|
||||
|
||||
let f128_enabled = match target.arch.as_str() {
|
||||
// Unsupported (libcall is not supported) <https://github.com/llvm/llvm-project/issues/121122>
|
||||
"amdgpu" => false,
|
||||
// Unsupported <https://github.com/llvm/llvm-project/issues/94434>
|
||||
"arm64ec" => false,
|
||||
// FIXME(llvm20): fixed by <https://github.com/llvm/llvm-project/pull/117525>
|
||||
"mips64" | "mips64r6" => false,
|
||||
// Selection failure <https://github.com/llvm/llvm-project/issues/95471>
|
||||
"nvptx64" => false,
|
||||
// Selection failure <https://github.com/llvm/llvm-project/issues/101545>
|
||||
"powerpc64" if &target.os == "aix" => false,
|
||||
// Selection failure <https://github.com/llvm/llvm-project/issues/41838>
|
||||
"sparc" => false,
|
||||
// Most everything else works as of LLVM 19
|
||||
_ => true,
|
||||
};
|
||||
|
||||
// If the feature is set, disable these types.
|
||||
let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some();
|
||||
|
||||
println!("cargo::rustc-check-cfg=cfg(f16_enabled)");
|
||||
println!("cargo::rustc-check-cfg=cfg(f128_enabled)");
|
||||
|
||||
if f16_enabled && !disable_both {
|
||||
println!("cargo::rustc-cfg=f16_enabled");
|
||||
}
|
||||
|
||||
if f128_enabled && !disable_both {
|
||||
println!("cargo::rustc-cfg=f128_enabled");
|
||||
}
|
||||
}
|
||||
21
library/compiler-builtins/compiler-builtins/src/aarch64.rs
Normal file
21
library/compiler-builtins/compiler-builtins/src/aarch64.rs
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#![allow(unused_imports)]
|
||||
|
||||
use core::intrinsics;
|
||||
|
||||
intrinsics! {
|
||||
#[unsafe(naked)]
|
||||
#[cfg(all(target_os = "uefi", not(feature = "no-asm")))]
|
||||
pub unsafe extern "C" fn __chkstk() {
|
||||
core::arch::naked_asm!(
|
||||
".p2align 2",
|
||||
"lsl x16, x15, #4",
|
||||
"mov x17, sp",
|
||||
"1:",
|
||||
"sub x17, x17, 4096",
|
||||
"subs x16, x16, 4096",
|
||||
"ldr xzr, [x17]",
|
||||
"b.gt 1b",
|
||||
"ret",
|
||||
);
|
||||
}
|
||||
}
|
||||
273
library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs
Normal file
273
library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs
Normal file
|
|
@ -0,0 +1,273 @@
|
|||
//! Aarch64 targets have two possible implementations for atomics:
|
||||
//! 1. Load-Locked, Store-Conditional (LL/SC), older and slower.
|
||||
//! 2. Large System Extensions (LSE), newer and faster.
|
||||
//! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics",
|
||||
//! where atomic operations call into the compiler runtime to dispatch between two depending on
|
||||
//! which is supported on the current CPU.
|
||||
//! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion.
|
||||
//!
|
||||
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
|
||||
//! Use the `compiler-rt` intrinsics if you want LSE support.
|
||||
//!
|
||||
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
|
||||
//!
|
||||
//! Generate functions for each of the following symbols:
|
||||
//! __aarch64_casM_ORDER
|
||||
//! __aarch64_swpN_ORDER
|
||||
//! __aarch64_ldaddN_ORDER
|
||||
//! __aarch64_ldclrN_ORDER
|
||||
//! __aarch64_ldeorN_ORDER
|
||||
//! __aarch64_ldsetN_ORDER
|
||||
//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel }
|
||||
//!
|
||||
//! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants.
|
||||
//! We do something similar, but with macro arguments.
|
||||
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
|
||||
|
||||
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
|
||||
|
||||
/// Translate a byte size to a Rust type.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! int_ty {
|
||||
(1) => { i8 };
|
||||
(2) => { i16 };
|
||||
(4) => { i32 };
|
||||
(8) => { i64 };
|
||||
(16) => { i128 };
|
||||
}
|
||||
|
||||
/// Given a byte size and a register number, return a register of the appropriate size.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/102374/0101/Registers-in-AArch64---general-purpose-registers>.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! reg {
|
||||
(1, $num:literal) => { concat!("w", $num) };
|
||||
(2, $num:literal) => { concat!("w", $num) };
|
||||
(4, $num:literal) => { concat!("w", $num) };
|
||||
(8, $num:literal) => { concat!("x", $num) };
|
||||
}
|
||||
|
||||
/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! acquire {
|
||||
(Relaxed) => { "" };
|
||||
(Acquire) => { "a" };
|
||||
(Release) => { "" };
|
||||
(AcqRel) => { "a" };
|
||||
}
|
||||
|
||||
/// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! release {
|
||||
(Relaxed) => { "" };
|
||||
(Acquire) => { "" };
|
||||
(Release) => { "l" };
|
||||
(AcqRel) => { "l" };
|
||||
}
|
||||
|
||||
/// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! size {
|
||||
(1) => { "b" };
|
||||
(2) => { "h" };
|
||||
(4) => { "" };
|
||||
(8) => { "" };
|
||||
(16) => { "" };
|
||||
}
|
||||
|
||||
/// Given a byte size, translate it to an Unsigned eXTend instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM->
|
||||
#[rustfmt::skip]
|
||||
macro_rules! uxt {
|
||||
(1) => { "uxtb" };
|
||||
(2) => { "uxth" };
|
||||
($_:tt) => { "mov" };
|
||||
}
|
||||
|
||||
/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Register instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXR--Load-Exclusive-Register->.
|
||||
macro_rules! ldxr {
|
||||
($ordering:ident, $bytes:tt) => {
|
||||
concat!("ld", acquire!($ordering), "xr", size!($bytes))
|
||||
};
|
||||
}
|
||||
|
||||
/// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXR--Store-Exclusive-Register->.
|
||||
macro_rules! stxr {
|
||||
($ordering:ident, $bytes:tt) => {
|
||||
concat!("st", release!($ordering), "xr", size!($bytes))
|
||||
};
|
||||
}
|
||||
|
||||
/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers->
|
||||
macro_rules! ldxp {
|
||||
($ordering:ident) => {
|
||||
concat!("ld", acquire!($ordering), "xp")
|
||||
};
|
||||
}
|
||||
|
||||
/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers->.
|
||||
macro_rules! stxp {
|
||||
($ordering:ident) => {
|
||||
concat!("st", release!($ordering), "xp")
|
||||
};
|
||||
}
|
||||
|
||||
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
|
||||
macro_rules! compare_and_swap {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[unsafe(naked)]
|
||||
pub unsafe extern "C" fn $name (
|
||||
expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes)
|
||||
) -> int_ty!($bytes) {
|
||||
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
|
||||
core::arch::naked_asm! {
|
||||
// UXT s(tmp0), s(0)
|
||||
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
|
||||
"0:",
|
||||
// LDXR s(0), [x2]
|
||||
concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x2]"),
|
||||
// cmp s(0), s(tmp0)
|
||||
concat!("cmp ", reg!($bytes, 0), ", ", reg!($bytes, 16)),
|
||||
"bne 1f",
|
||||
// STXR w(tmp1), s(1), [x2]
|
||||
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 1), ", [x2]"),
|
||||
"cbnz w17, 0b",
|
||||
"1:",
|
||||
"ret",
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// i128 uses a completely different impl, so it has its own macro.
|
||||
macro_rules! compare_and_swap_i128 {
|
||||
($ordering:ident, $name:ident) => {
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[unsafe(naked)]
|
||||
pub unsafe extern "C" fn $name (
|
||||
expected: i128, desired: i128, ptr: *mut i128
|
||||
) -> i128 {
|
||||
core::arch::naked_asm! {
|
||||
"mov x16, x0",
|
||||
"mov x17, x1",
|
||||
"0:",
|
||||
// LDXP x0, x1, [x4]
|
||||
concat!(ldxp!($ordering), " x0, x1, [x4]"),
|
||||
"cmp x0, x16",
|
||||
"ccmp x1, x17, #0, eq",
|
||||
"bne 1f",
|
||||
// STXP w(tmp2), x2, x3, [x4]
|
||||
concat!(stxp!($ordering), " w15, x2, x3, [x4]"),
|
||||
"cbnz w15, 0b",
|
||||
"1:",
|
||||
"ret",
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.swap>.
|
||||
macro_rules! swap {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[unsafe(naked)]
|
||||
pub unsafe extern "C" fn $name (
|
||||
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
|
||||
) -> int_ty!($bytes) {
|
||||
core::arch::naked_asm! {
|
||||
// mov s(tmp0), s(0)
|
||||
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
|
||||
"0:",
|
||||
// LDXR s(0), [x1]
|
||||
concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"),
|
||||
// STXR w(tmp1), s(tmp0), [x1]
|
||||
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
|
||||
"cbnz w17, 0b",
|
||||
"ret",
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
|
||||
macro_rules! fetch_op {
|
||||
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[unsafe(naked)]
|
||||
pub unsafe extern "C" fn $name (
|
||||
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
|
||||
) -> int_ty!($bytes) {
|
||||
core::arch::naked_asm! {
|
||||
// mov s(tmp0), s(0)
|
||||
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
|
||||
"0:",
|
||||
// LDXR s(0), [x1]
|
||||
concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"),
|
||||
// OP s(tmp1), s(0), s(tmp0)
|
||||
concat!($op, " ", reg!($bytes, 17), ", ", reg!($bytes, 0), ", ", reg!($bytes, 16)),
|
||||
// STXR w(tmp2), s(tmp1), [x1]
|
||||
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
|
||||
"cbnz w15, 0b",
|
||||
"ret",
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need a single macro to pass to `foreach_ldadd`.
|
||||
macro_rules! add {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "add" }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! and {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "bic" }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! xor {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "eor" }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! or {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "orr" }
|
||||
};
|
||||
}
|
||||
|
||||
// See `generate_aarch64_outlined_atomics` in build.rs.
|
||||
include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs"));
|
||||
foreach_cas!(compare_and_swap);
|
||||
foreach_cas16!(compare_and_swap_i128);
|
||||
foreach_swp!(swap);
|
||||
foreach_ldadd!(add);
|
||||
foreach_ldclr!(and);
|
||||
foreach_ldeor!(xor);
|
||||
foreach_ldset!(or);
|
||||
280
library/compiler-builtins/compiler-builtins/src/arm.rs
Normal file
280
library/compiler-builtins/compiler-builtins/src/arm.rs
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
#![cfg(not(feature = "no-asm"))]
|
||||
|
||||
// Interfaces used by naked trampolines.
|
||||
extern "C" {
|
||||
fn __udivmodsi4(a: u32, b: u32, rem: *mut u32) -> u32;
|
||||
fn __udivmoddi4(a: u64, b: u64, rem: *mut u64) -> u64;
|
||||
fn __divmoddi4(a: i64, b: i64, rem: *mut i64) -> i64;
|
||||
}
|
||||
|
||||
extern "aapcs" {
|
||||
// AAPCS is not always the correct ABI for these intrinsics, but we only use this to
|
||||
// forward another `__aeabi_` call so it doesn't matter.
|
||||
fn __aeabi_idiv(a: i32, b: i32) -> i32;
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
// NOTE This function and the ones below are implemented using assembly because they are using a
|
||||
// custom calling convention which can't be implemented using a normal Rust function.
|
||||
#[unsafe(naked)]
|
||||
#[cfg(not(target_env = "msvc"))]
|
||||
pub unsafe extern "C" fn __aeabi_uidivmod() {
|
||||
core::arch::naked_asm!(
|
||||
"push {{lr}}",
|
||||
"sub sp, sp, #4",
|
||||
"mov r2, sp",
|
||||
"bl {trampoline}",
|
||||
"ldr r1, [sp]",
|
||||
"add sp, sp, #4",
|
||||
"pop {{pc}}",
|
||||
trampoline = sym crate::arm::__udivmodsi4
|
||||
);
|
||||
}
|
||||
|
||||
#[unsafe(naked)]
|
||||
pub unsafe extern "C" fn __aeabi_uldivmod() {
|
||||
core::arch::naked_asm!(
|
||||
"push {{r4, lr}}",
|
||||
"sub sp, sp, #16",
|
||||
"add r4, sp, #8",
|
||||
"str r4, [sp]",
|
||||
"bl {trampoline}",
|
||||
"ldr r2, [sp, #8]",
|
||||
"ldr r3, [sp, #12]",
|
||||
"add sp, sp, #16",
|
||||
"pop {{r4, pc}}",
|
||||
trampoline = sym crate::arm::__udivmoddi4
|
||||
);
|
||||
}
|
||||
|
||||
#[unsafe(naked)]
|
||||
pub unsafe extern "C" fn __aeabi_idivmod() {
|
||||
core::arch::naked_asm!(
|
||||
"push {{r0, r1, r4, lr}}",
|
||||
"bl {trampoline}",
|
||||
"pop {{r1, r2}}",
|
||||
"muls r2, r2, r0",
|
||||
"subs r1, r1, r2",
|
||||
"pop {{r4, pc}}",
|
||||
trampoline = sym crate::arm::__aeabi_idiv,
|
||||
);
|
||||
}
|
||||
|
||||
#[unsafe(naked)]
|
||||
pub unsafe extern "C" fn __aeabi_ldivmod() {
|
||||
core::arch::naked_asm!(
|
||||
"push {{r4, lr}}",
|
||||
"sub sp, sp, #16",
|
||||
"add r4, sp, #8",
|
||||
"str r4, [sp]",
|
||||
"bl {trampoline}",
|
||||
"ldr r2, [sp, #8]",
|
||||
"ldr r3, [sp, #12]",
|
||||
"add sp, sp, #16",
|
||||
"pop {{r4, pc}}",
|
||||
trampoline = sym crate::arm::__divmoddi4,
|
||||
);
|
||||
}
|
||||
|
||||
// FIXME(arm): The `*4` and `*8` variants should be defined as aliases.
|
||||
|
||||
/// `memcpy` provided with the `aapcs` ABI.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memcpy` requirements apply.
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memcpy(dst: *mut u8, src: *const u8, n: usize) {
|
||||
// SAFETY: memcpy preconditions apply.
|
||||
unsafe { crate::mem::memcpy(dst, src, n) };
|
||||
}
|
||||
|
||||
/// `memcpy` for 4-byte alignment.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to
|
||||
/// four bytes.
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dst: *mut u8, src: *const u8, n: usize) {
|
||||
// We are guaranteed 4-alignment, so accessing at u32 is okay.
|
||||
let mut dst = dst.cast::<u32>();
|
||||
let mut src = src.cast::<u32>();
|
||||
debug_assert!(dst.is_aligned());
|
||||
debug_assert!(src.is_aligned());
|
||||
let mut n = n;
|
||||
|
||||
while n >= 4 {
|
||||
// SAFETY: `dst` and `src` are both valid for at least 4 bytes, from
|
||||
// `memcpy` preconditions and the loop guard.
|
||||
unsafe { *dst = *src };
|
||||
|
||||
// FIXME(addr): if we can make this end-of-address-space safe without losing
|
||||
// performance, we may want to consider that.
|
||||
// SAFETY: memcpy is not expected to work at the end of the address space
|
||||
unsafe {
|
||||
dst = dst.offset(1);
|
||||
src = src.offset(1);
|
||||
}
|
||||
|
||||
n -= 4;
|
||||
}
|
||||
|
||||
// SAFETY: `dst` and `src` will still be valid for `n` bytes
|
||||
unsafe { __aeabi_memcpy(dst.cast::<u8>(), src.cast::<u8>(), n) };
|
||||
}
|
||||
|
||||
/// `memcpy` for 8-byte alignment.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to
|
||||
/// eight bytes.
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dst: *mut u8, src: *const u8, n: usize) {
|
||||
debug_assert!(dst.addr() & 7 == 0);
|
||||
debug_assert!(src.addr() & 7 == 0);
|
||||
|
||||
// SAFETY: memcpy preconditions apply, less strict alignment.
|
||||
unsafe { __aeabi_memcpy4(dst, src, n) };
|
||||
}
|
||||
|
||||
/// `memmove` provided with the `aapcs` ABI.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memmove` requirements apply.
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memmove(dst: *mut u8, src: *const u8, n: usize) {
|
||||
// SAFETY: memmove preconditions apply.
|
||||
unsafe { crate::mem::memmove(dst, src, n) };
|
||||
}
|
||||
|
||||
/// `memmove` for 4-byte alignment.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memmove` requirements apply. Additionally, `dest` and `src` must be aligned to
|
||||
/// four bytes.
|
||||
#[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memmove4(dst: *mut u8, src: *const u8, n: usize) {
|
||||
debug_assert!(dst.addr() & 3 == 0);
|
||||
debug_assert!(src.addr() & 3 == 0);
|
||||
|
||||
// SAFETY: same preconditions, less strict aligment.
|
||||
unsafe { __aeabi_memmove(dst, src, n) };
|
||||
}
|
||||
|
||||
/// `memmove` for 8-byte alignment.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memmove` requirements apply. Additionally, `dst` and `src` must be aligned to
|
||||
/// eight bytes.
|
||||
#[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memmove8(dst: *mut u8, src: *const u8, n: usize) {
|
||||
debug_assert!(dst.addr() & 7 == 0);
|
||||
debug_assert!(src.addr() & 7 == 0);
|
||||
|
||||
// SAFETY: memmove preconditions apply, less strict alignment.
|
||||
unsafe { __aeabi_memmove(dst, src, n) };
|
||||
}
|
||||
|
||||
/// `memset` provided with the `aapcs` ABI.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memset` requirements apply.
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memset(dst: *mut u8, n: usize, c: i32) {
|
||||
// Note the different argument order
|
||||
// SAFETY: memset preconditions apply.
|
||||
unsafe { crate::mem::memset(dst, c, n) };
|
||||
}
|
||||
|
||||
/// `memset` for 4-byte alignment.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memset` requirements apply. Additionally, `dest` and `src` must be aligned to
|
||||
/// four bytes.
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memset4(dst: *mut u8, n: usize, c: i32) {
|
||||
let mut dst = dst.cast::<u32>();
|
||||
debug_assert!(dst.is_aligned());
|
||||
let mut n = n;
|
||||
|
||||
let byte = (c as u32) & 0xff;
|
||||
let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;
|
||||
|
||||
while n >= 4 {
|
||||
// SAFETY: `dst` is valid for at least 4 bytes, from `memset` preconditions and
|
||||
// the loop guard.
|
||||
unsafe { *dst = c };
|
||||
|
||||
// FIXME(addr): if we can make this end-of-address-space safe without losing
|
||||
// performance, we may want to consider that.
|
||||
// SAFETY: memcpy is not expected to work at the end of the address space
|
||||
unsafe {
|
||||
dst = dst.offset(1);
|
||||
}
|
||||
n -= 4;
|
||||
}
|
||||
|
||||
// SAFETY: `dst` will still be valid for `n` bytes
|
||||
unsafe { __aeabi_memset(dst.cast::<u8>(), n, byte as i32) };
|
||||
}
|
||||
|
||||
/// `memset` for 8-byte alignment.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memset` requirements apply. Additionally, `dst` and `src` must be aligned to
|
||||
/// eight bytes.
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memset8(dst: *mut u8, n: usize, c: i32) {
|
||||
debug_assert!(dst.addr() & 7 == 0);
|
||||
|
||||
// SAFETY: memset preconditions apply, less strict alignment.
|
||||
unsafe { __aeabi_memset4(dst, n, c) };
|
||||
}
|
||||
|
||||
/// `memclr` provided with the `aapcs` ABI.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memclr` requirements apply.
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memclr(dst: *mut u8, n: usize) {
|
||||
// SAFETY: memclr preconditions apply, less strict alignment.
|
||||
unsafe { __aeabi_memset(dst, n, 0) };
|
||||
}
|
||||
|
||||
/// `memclr` for 4-byte alignment.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memclr` requirements apply. Additionally, `dest` and `src` must be aligned to
|
||||
/// four bytes.
|
||||
#[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memclr4(dst: *mut u8, n: usize) {
|
||||
debug_assert!(dst.addr() & 3 == 0);
|
||||
|
||||
// SAFETY: memclr preconditions apply, less strict alignment.
|
||||
unsafe { __aeabi_memset4(dst, n, 0) };
|
||||
}
|
||||
|
||||
/// `memclr` for 8-byte alignment.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Usual `memclr` requirements apply. Additionally, `dst` and `src` must be aligned to
|
||||
/// eight bytes.
|
||||
#[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
|
||||
pub unsafe extern "aapcs" fn __aeabi_memclr8(dst: *mut u8, n: usize) {
|
||||
debug_assert!(dst.addr() & 7 == 0);
|
||||
|
||||
// SAFETY: memclr preconditions apply, less strict alignment.
|
||||
unsafe { __aeabi_memset4(dst, n, 0) };
|
||||
}
|
||||
}
|
||||
290
library/compiler-builtins/compiler-builtins/src/arm_linux.rs
Normal file
290
library/compiler-builtins/compiler-builtins/src/arm_linux.rs
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
use core::sync::atomic::{AtomicU32, Ordering};
|
||||
use core::{arch, mem};
|
||||
|
||||
// Kernel-provided user-mode helper functions:
|
||||
// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
|
||||
unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool {
|
||||
let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ());
|
||||
f(oldval, newval, ptr) == 0
|
||||
}
|
||||
|
||||
unsafe fn __kuser_memory_barrier() {
|
||||
let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ());
|
||||
f();
|
||||
}
|
||||
|
||||
// Word-align a pointer
|
||||
fn align_ptr<T>(ptr: *mut T) -> *mut u32 {
|
||||
// This gives us a mask of 0 when T == u32 since the pointer is already
|
||||
// supposed to be aligned, which avoids any masking in that case.
|
||||
let ptr_mask = 3 & (4 - mem::size_of::<T>());
|
||||
(ptr as usize & !ptr_mask) as *mut u32
|
||||
}
|
||||
|
||||
// Calculate the shift and mask of a value inside an aligned word
|
||||
fn get_shift_mask<T>(ptr: *mut T) -> (u32, u32) {
|
||||
// Mask to get the low byte/halfword/word
|
||||
let mask = match mem::size_of::<T>() {
|
||||
1 => 0xff,
|
||||
2 => 0xffff,
|
||||
4 => 0xffffffff,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// If we are on big-endian then we need to adjust the shift accordingly
|
||||
let endian_adjust = if cfg!(target_endian = "little") {
|
||||
0
|
||||
} else {
|
||||
4 - mem::size_of::<T>() as u32
|
||||
};
|
||||
|
||||
// Shift to get the desired element in the word
|
||||
let ptr_mask = 3 & (4 - mem::size_of::<T>());
|
||||
let shift = ((ptr as usize & ptr_mask) as u32 ^ endian_adjust) * 8;
|
||||
|
||||
(shift, mask)
|
||||
}
|
||||
|
||||
// Extract a value from an aligned word
|
||||
fn extract_aligned(aligned: u32, shift: u32, mask: u32) -> u32 {
|
||||
(aligned >> shift) & mask
|
||||
}
|
||||
|
||||
// Insert a value into an aligned word
|
||||
fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 {
|
||||
(aligned & !(mask << shift)) | ((val & mask) << shift)
|
||||
}
|
||||
|
||||
/// Performs a relaxed atomic load of 4 bytes at `ptr`. Some of the bytes are allowed to be out of
|
||||
/// bounds as long as `size_of::<T>()` bytes are in bounds.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// - `ptr` must be 4-aligned.
|
||||
/// - `size_of::<T>()` must be at most 4.
|
||||
/// - if `size_of::<T>() == 1`, `ptr` or `ptr` offset by 1, 2 or 3 bytes must be valid for a relaxed
|
||||
/// atomic read of 1 byte.
|
||||
/// - if `size_of::<T>() == 2`, `ptr` or `ptr` offset by 2 bytes must be valid for a relaxed atomic
|
||||
/// read of 2 bytes.
|
||||
/// - if `size_of::<T>() == 4`, `ptr` must be valid for a relaxed atomic read of 4 bytes.
|
||||
unsafe fn atomic_load_aligned<T>(ptr: *mut u32) -> u32 {
|
||||
if mem::size_of::<T>() == 4 {
|
||||
// SAFETY: As `T` has a size of 4, the caller garantees this is sound.
|
||||
unsafe { AtomicU32::from_ptr(ptr).load(Ordering::Relaxed) }
|
||||
} else {
|
||||
// SAFETY:
|
||||
// As all 4 bytes pointed to by `ptr` might not be dereferenceable due to being out of
|
||||
// bounds when doing atomic operations on a `u8`/`i8`/`u16`/`i16`, inline ASM is used to
|
||||
// avoid causing undefined behaviour. However, as `ptr` is 4-aligned and at least 1 byte of
|
||||
// `ptr` is dereferencable, the load won't cause a segfault as the page size is always
|
||||
// larger than 4 bytes.
|
||||
// The `ldr` instruction does not touch the stack or flags, or write to memory, so
|
||||
// `nostack`, `preserves_flags` and `readonly` are sound. The caller garantees that `ptr` is
|
||||
// 4-aligned, as required by `ldr`.
|
||||
unsafe {
|
||||
let res: u32;
|
||||
arch::asm!(
|
||||
"ldr {res}, [{ptr}]",
|
||||
ptr = in(reg) ptr,
|
||||
res = lateout(reg) res,
|
||||
options(nostack, preserves_flags, readonly)
|
||||
);
|
||||
res
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generic atomic read-modify-write operation
|
||||
unsafe fn atomic_rmw<T, F: Fn(u32) -> u32, G: Fn(u32, u32) -> u32>(ptr: *mut T, f: F, g: G) -> u32 {
|
||||
let aligned_ptr = align_ptr(ptr);
|
||||
let (shift, mask) = get_shift_mask(ptr);
|
||||
|
||||
loop {
|
||||
let curval_aligned = atomic_load_aligned::<T>(aligned_ptr);
|
||||
let curval = extract_aligned(curval_aligned, shift, mask);
|
||||
let newval = f(curval);
|
||||
let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
|
||||
if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
|
||||
return g(curval, newval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generic atomic compare-exchange operation
|
||||
unsafe fn atomic_cmpxchg<T>(ptr: *mut T, oldval: u32, newval: u32) -> u32 {
|
||||
let aligned_ptr = align_ptr(ptr);
|
||||
let (shift, mask) = get_shift_mask(ptr);
|
||||
|
||||
loop {
|
||||
let curval_aligned = atomic_load_aligned::<T>(aligned_ptr);
|
||||
let curval = extract_aligned(curval_aligned, shift, mask);
|
||||
if curval != oldval {
|
||||
return curval;
|
||||
}
|
||||
let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
|
||||
if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
|
||||
return oldval;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! atomic_rmw {
|
||||
($name:ident, $ty:ty, $op:expr, $fetch:expr) => {
|
||||
intrinsics! {
|
||||
pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty {
|
||||
atomic_rmw(ptr, |x| $op(x as $ty, val) as u32, |old, new| $fetch(old, new)) as $ty
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
(@old $name:ident, $ty:ty, $op:expr) => {
|
||||
atomic_rmw!($name, $ty, $op, |old, _| old);
|
||||
};
|
||||
|
||||
(@new $name:ident, $ty:ty, $op:expr) => {
|
||||
atomic_rmw!($name, $ty, $op, |_, new| new);
|
||||
};
|
||||
}
|
||||
macro_rules! atomic_cmpxchg {
|
||||
($name:ident, $ty:ty) => {
|
||||
intrinsics! {
|
||||
pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty {
|
||||
atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b));
|
||||
atomic_rmw!(@old __sync_fetch_and_add_2, u16, |a: u16, b: u16| a
|
||||
.wrapping_add(b));
|
||||
atomic_rmw!(@old __sync_fetch_and_add_4, u32, |a: u32, b: u32| a
|
||||
.wrapping_add(b));
|
||||
|
||||
atomic_rmw!(@new __sync_add_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_add(b));
|
||||
atomic_rmw!(@new __sync_add_and_fetch_2, u16, |a: u16, b: u16| a
|
||||
.wrapping_add(b));
|
||||
atomic_rmw!(@new __sync_add_and_fetch_4, u32, |a: u32, b: u32| a
|
||||
.wrapping_add(b));
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
|
||||
atomic_rmw!(@old __sync_fetch_and_sub_2, u16, |a: u16, b: u16| a
|
||||
.wrapping_sub(b));
|
||||
atomic_rmw!(@old __sync_fetch_and_sub_4, u32, |a: u32, b: u32| a
|
||||
.wrapping_sub(b));
|
||||
|
||||
atomic_rmw!(@new __sync_sub_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
|
||||
atomic_rmw!(@new __sync_sub_and_fetch_2, u16, |a: u16, b: u16| a
|
||||
.wrapping_sub(b));
|
||||
atomic_rmw!(@new __sync_sub_and_fetch_4, u32, |a: u32, b: u32| a
|
||||
.wrapping_sub(b));
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b);
|
||||
atomic_rmw!(@old __sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b);
|
||||
atomic_rmw!(@old __sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b);
|
||||
|
||||
atomic_rmw!(@new __sync_and_and_fetch_1, u8, |a: u8, b: u8| a & b);
|
||||
atomic_rmw!(@new __sync_and_and_fetch_2, u16, |a: u16, b: u16| a & b);
|
||||
atomic_rmw!(@new __sync_and_and_fetch_4, u32, |a: u32, b: u32| a & b);
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b);
|
||||
atomic_rmw!(@old __sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b);
|
||||
atomic_rmw!(@old __sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b);
|
||||
|
||||
atomic_rmw!(@new __sync_or_and_fetch_1, u8, |a: u8, b: u8| a | b);
|
||||
atomic_rmw!(@new __sync_or_and_fetch_2, u16, |a: u16, b: u16| a | b);
|
||||
atomic_rmw!(@new __sync_or_and_fetch_4, u32, |a: u32, b: u32| a | b);
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b);
|
||||
atomic_rmw!(@old __sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b);
|
||||
atomic_rmw!(@old __sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b);
|
||||
|
||||
atomic_rmw!(@new __sync_xor_and_fetch_1, u8, |a: u8, b: u8| a ^ b);
|
||||
atomic_rmw!(@new __sync_xor_and_fetch_2, u16, |a: u16, b: u16| a ^ b);
|
||||
atomic_rmw!(@new __sync_xor_and_fetch_4, u32, |a: u32, b: u32| a ^ b);
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b));
|
||||
atomic_rmw!(@old __sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b));
|
||||
atomic_rmw!(@old __sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b));
|
||||
|
||||
atomic_rmw!(@new __sync_nand_and_fetch_1, u8, |a: u8, b: u8| !(a & b));
|
||||
atomic_rmw!(@new __sync_nand_and_fetch_2, u16, |a: u16, b: u16| !(a & b));
|
||||
atomic_rmw!(@new __sync_nand_and_fetch_4, u32, |a: u32, b: u32| !(a & b));
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
atomic_rmw!(@old __sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
atomic_rmw!(@old __sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
atomic_rmw!(@old __sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
atomic_rmw!(@old __sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
atomic_rmw!(@old __sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
atomic_rmw!(@old __sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
|
||||
atomic_rmw!(@old __sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
atomic_rmw!(@old __sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
atomic_rmw!(@old __sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
});
|
||||
|
||||
atomic_rmw!(@old __sync_lock_test_and_set_1, u8, |_: u8, b: u8| b);
|
||||
atomic_rmw!(@old __sync_lock_test_and_set_2, u16, |_: u16, b: u16| b);
|
||||
atomic_rmw!(@old __sync_lock_test_and_set_4, u32, |_: u32, b: u32| b);
|
||||
|
||||
atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8);
|
||||
atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16);
|
||||
atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32);
|
||||
|
||||
intrinsics! {
|
||||
pub unsafe extern "C" fn __sync_synchronize() {
|
||||
__kuser_memory_barrier();
|
||||
}
|
||||
}
|
||||
23
library/compiler-builtins/compiler-builtins/src/avr.rs
Normal file
23
library/compiler-builtins/compiler-builtins/src/avr.rs
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
intrinsics! {
|
||||
pub unsafe extern "C" fn abort() -> ! {
|
||||
// On AVRs, an architecture that doesn't support traps, unreachable code
|
||||
// paths get lowered into calls to `abort`:
|
||||
//
|
||||
// https://github.com/llvm/llvm-project/blob/cbe8f3ad7621e402b050e768f400ff0d19c3aedd/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp#L4462
|
||||
//
|
||||
// When control gets here, it means that either core::intrinsics::abort()
|
||||
// was called or an undefined bebavior has occurred, so there's not that
|
||||
// much we can do to recover - we can't `panic!()`, because for all we
|
||||
// know the environment is gone now, so panicking might end up with us
|
||||
// getting back to this very function.
|
||||
//
|
||||
// So let's do the next best thing, loop.
|
||||
//
|
||||
// Alternatively we could (try to) restart the program, but since
|
||||
// undefined behavior is undefined, there's really no obligation for us
|
||||
// to do anything here - for all we care, we could just set the chip on
|
||||
// fire; but that'd be bad for the environment.
|
||||
|
||||
loop {}
|
||||
}
|
||||
}
|
||||
209
library/compiler-builtins/compiler-builtins/src/float/add.rs
Normal file
209
library/compiler-builtins/compiler-builtins/src/float/add.rs
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
use crate::float::Float;
|
||||
use crate::int::{CastInto, Int, MinInt};
|
||||
|
||||
/// Returns `a + b`
|
||||
fn add<F: Float>(a: F, b: F) -> F
|
||||
where
|
||||
u32: CastInto<F::Int>,
|
||||
F::Int: CastInto<u32>,
|
||||
i32: CastInto<F::Int>,
|
||||
F::Int: CastInto<i32>,
|
||||
{
|
||||
let one = F::Int::ONE;
|
||||
let zero = F::Int::ZERO;
|
||||
|
||||
let bits = F::BITS.cast();
|
||||
let significand_bits = F::SIG_BITS;
|
||||
let max_exponent = F::EXP_SAT;
|
||||
|
||||
let implicit_bit = F::IMPLICIT_BIT;
|
||||
let significand_mask = F::SIG_MASK;
|
||||
let sign_bit = F::SIGN_MASK as F::Int;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
let quiet_bit = implicit_bit >> 1;
|
||||
let qnan_rep = exponent_mask | quiet_bit;
|
||||
|
||||
let mut a_rep = a.to_bits();
|
||||
let mut b_rep = b.to_bits();
|
||||
let a_abs = a_rep & abs_mask;
|
||||
let b_abs = b_rep & abs_mask;
|
||||
|
||||
// Detect if a or b is zero, infinity, or NaN.
|
||||
if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one {
|
||||
// NaN + anything = qNaN
|
||||
if a_abs > inf_rep {
|
||||
return F::from_bits(a_abs | quiet_bit);
|
||||
}
|
||||
// anything + NaN = qNaN
|
||||
if b_abs > inf_rep {
|
||||
return F::from_bits(b_abs | quiet_bit);
|
||||
}
|
||||
|
||||
if a_abs == inf_rep {
|
||||
// +/-infinity + -/+infinity = qNaN
|
||||
if (a.to_bits() ^ b.to_bits()) == sign_bit {
|
||||
return F::from_bits(qnan_rep);
|
||||
} else {
|
||||
// +/-infinity + anything remaining = +/- infinity
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
// anything remaining + +/-infinity = +/-infinity
|
||||
if b_abs == inf_rep {
|
||||
return b;
|
||||
}
|
||||
|
||||
// zero + anything = anything
|
||||
if a_abs == MinInt::ZERO {
|
||||
// but we need to get the sign right for zero + zero
|
||||
if b_abs == MinInt::ZERO {
|
||||
return F::from_bits(a.to_bits() & b.to_bits());
|
||||
} else {
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
// anything + zero = anything
|
||||
if b_abs == MinInt::ZERO {
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
// Swap a and b if necessary so that a has the larger absolute value.
|
||||
if b_abs > a_abs {
|
||||
// Don't use mem::swap because it may generate references to memcpy in unoptimized code.
|
||||
let tmp = a_rep;
|
||||
a_rep = b_rep;
|
||||
b_rep = tmp;
|
||||
}
|
||||
|
||||
// Extract the exponent and significand from the (possibly swapped) a and b.
|
||||
let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits).cast();
|
||||
let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits).cast();
|
||||
let mut a_significand = a_rep & significand_mask;
|
||||
let mut b_significand = b_rep & significand_mask;
|
||||
|
||||
// normalize any denormals, and adjust the exponent accordingly.
|
||||
if a_exponent == 0 {
|
||||
let (exponent, significand) = F::normalize(a_significand);
|
||||
a_exponent = exponent;
|
||||
a_significand = significand;
|
||||
}
|
||||
if b_exponent == 0 {
|
||||
let (exponent, significand) = F::normalize(b_significand);
|
||||
b_exponent = exponent;
|
||||
b_significand = significand;
|
||||
}
|
||||
|
||||
// The sign of the result is the sign of the larger operand, a. If they
|
||||
// have opposite signs, we are performing a subtraction; otherwise addition.
|
||||
let result_sign = a_rep & sign_bit;
|
||||
let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
|
||||
|
||||
// Shift the significands to give us round, guard and sticky, and or in the
|
||||
// implicit significand bit. (If we fell through from the denormal path it
|
||||
// was already set by normalize(), but setting it twice won't hurt
|
||||
// anything.)
|
||||
a_significand = (a_significand | implicit_bit) << 3;
|
||||
b_significand = (b_significand | implicit_bit) << 3;
|
||||
|
||||
// Shift the significand of b by the difference in exponents, with a sticky
|
||||
// bottom bit to get rounding correct.
|
||||
let align = a_exponent.wrapping_sub(b_exponent).cast();
|
||||
if align != MinInt::ZERO {
|
||||
if align < bits {
|
||||
let sticky =
|
||||
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO);
|
||||
b_significand = (b_significand >> align.cast()) | sticky;
|
||||
} else {
|
||||
b_significand = one; // sticky; b is known to be non-zero.
|
||||
}
|
||||
}
|
||||
if subtraction {
|
||||
a_significand = a_significand.wrapping_sub(b_significand);
|
||||
// If a == -b, return +zero.
|
||||
if a_significand == MinInt::ZERO {
|
||||
return F::from_bits(MinInt::ZERO);
|
||||
}
|
||||
|
||||
// If partial cancellation occured, we need to left-shift the result
|
||||
// and adjust the exponent:
|
||||
if a_significand < implicit_bit << 3 {
|
||||
let shift =
|
||||
a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32;
|
||||
a_significand <<= shift;
|
||||
a_exponent -= shift;
|
||||
}
|
||||
} else {
|
||||
// addition
|
||||
a_significand += b_significand;
|
||||
|
||||
// If the addition carried up, we need to right-shift the result and
|
||||
// adjust the exponent:
|
||||
if a_significand & (implicit_bit << 4) != MinInt::ZERO {
|
||||
let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO);
|
||||
a_significand = (a_significand >> 1) | sticky;
|
||||
a_exponent += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// If we have overflowed the type, return +/- infinity:
|
||||
if a_exponent >= max_exponent as i32 {
|
||||
return F::from_bits(inf_rep | result_sign);
|
||||
}
|
||||
|
||||
if a_exponent <= 0 {
|
||||
// Result is denormal before rounding; the exponent is zero and we
|
||||
// need to shift the significand.
|
||||
let shift = (1 - a_exponent).cast();
|
||||
let sticky =
|
||||
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
|
||||
a_significand = (a_significand >> shift.cast()) | sticky;
|
||||
a_exponent = 0;
|
||||
}
|
||||
|
||||
// Low three bits are round, guard, and sticky.
|
||||
let a_significand_i32: i32 = a_significand.cast();
|
||||
let round_guard_sticky: i32 = a_significand_i32 & 0x7;
|
||||
|
||||
// Shift the significand into place, and mask off the implicit bit.
|
||||
let mut result = (a_significand >> 3) & significand_mask;
|
||||
|
||||
// Insert the exponent and sign.
|
||||
result |= a_exponent.cast() << significand_bits;
|
||||
result |= result_sign;
|
||||
|
||||
// Final rounding. The result may overflow to infinity, but that is the
|
||||
// correct result in that case.
|
||||
if round_guard_sticky > 0x4 {
|
||||
result += one;
|
||||
}
|
||||
if round_guard_sticky == 0x4 {
|
||||
result += result & one;
|
||||
}
|
||||
|
||||
F::from_bits(result)
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
#[aapcs_on_arm]
|
||||
#[arm_aeabi_alias = __aeabi_fadd]
|
||||
pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 {
|
||||
add(a, b)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[arm_aeabi_alias = __aeabi_dadd]
|
||||
pub extern "C" fn __adddf3(a: f64, b: f64) -> f64 {
|
||||
add(a, b)
|
||||
}
|
||||
|
||||
#[ppc_alias = __addkf3]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __addtf3(a: f128, b: f128) -> f128 {
|
||||
add(a, b)
|
||||
}
|
||||
}
|
||||
248
library/compiler-builtins/compiler-builtins/src/float/cmp.rs
Normal file
248
library/compiler-builtins/compiler-builtins/src/float/cmp.rs
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
#![allow(unreachable_code)]
|
||||
|
||||
use crate::float::Float;
|
||||
use crate::int::MinInt;
|
||||
|
||||
// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L22
|
||||
#[cfg(target_arch = "avr")]
|
||||
pub type CmpResult = i8;
|
||||
|
||||
// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L25
|
||||
#[cfg(not(target_arch = "avr"))]
|
||||
pub type CmpResult = i32;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum Result {
|
||||
Less,
|
||||
Equal,
|
||||
Greater,
|
||||
Unordered,
|
||||
}
|
||||
|
||||
impl Result {
|
||||
fn to_le_abi(self) -> CmpResult {
|
||||
match self {
|
||||
Result::Less => -1,
|
||||
Result::Equal => 0,
|
||||
Result::Greater => 1,
|
||||
Result::Unordered => 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_ge_abi(self) -> CmpResult {
|
||||
match self {
|
||||
Result::Less => -1,
|
||||
Result::Equal => 0,
|
||||
Result::Greater => 1,
|
||||
Result::Unordered => -1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn cmp<F: Float>(a: F, b: F) -> Result {
|
||||
let one = F::Int::ONE;
|
||||
let zero = F::Int::ZERO;
|
||||
let szero = F::SignedInt::ZERO;
|
||||
|
||||
let sign_bit = F::SIGN_MASK as F::Int;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
|
||||
let a_rep = a.to_bits();
|
||||
let b_rep = b.to_bits();
|
||||
let a_abs = a_rep & abs_mask;
|
||||
let b_abs = b_rep & abs_mask;
|
||||
|
||||
// If either a or b is NaN, they are unordered.
|
||||
if a_abs > inf_rep || b_abs > inf_rep {
|
||||
return Result::Unordered;
|
||||
}
|
||||
|
||||
// If a and b are both zeros, they are equal.
|
||||
if a_abs | b_abs == zero {
|
||||
return Result::Equal;
|
||||
}
|
||||
|
||||
let a_srep = a.to_bits_signed();
|
||||
let b_srep = b.to_bits_signed();
|
||||
|
||||
// If at least one of a and b is positive, we get the same result comparing
|
||||
// a and b as signed integers as we would with a fp_ting-point compare.
|
||||
if a_srep & b_srep >= szero {
|
||||
if a_srep < b_srep {
|
||||
Result::Less
|
||||
} else if a_srep == b_srep {
|
||||
Result::Equal
|
||||
} else {
|
||||
Result::Greater
|
||||
}
|
||||
// Otherwise, both are negative, so we need to flip the sense of the
|
||||
// comparison to get the correct result. (This assumes a twos- or ones-
|
||||
// complement integer representation; if integers are represented in a
|
||||
// sign-magnitude representation, then this flip is incorrect).
|
||||
} else if a_srep > b_srep {
|
||||
Result::Less
|
||||
} else if a_srep == b_srep {
|
||||
Result::Equal
|
||||
} else {
|
||||
Result::Greater
|
||||
}
|
||||
}
|
||||
|
||||
fn unord<F: Float>(a: F, b: F) -> bool {
|
||||
let one = F::Int::ONE;
|
||||
|
||||
let sign_bit = F::SIGN_MASK as F::Int;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
|
||||
let a_rep = a.to_bits();
|
||||
let b_rep = b.to_bits();
|
||||
let a_abs = a_rep & abs_mask;
|
||||
let b_abs = b_rep & abs_mask;
|
||||
|
||||
a_abs > inf_rep || b_abs > inf_rep
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
pub extern "C" fn __lesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
pub extern "C" fn __gesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_fcmpun]
|
||||
pub extern "C" fn __unordsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
|
||||
unord(a, b) as crate::float::cmp::CmpResult
|
||||
}
|
||||
|
||||
pub extern "C" fn __eqsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
pub extern "C" fn __ltsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
pub extern "C" fn __nesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
pub extern "C" fn __gtsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
|
||||
pub extern "C" fn __ledf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
pub extern "C" fn __gedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_dcmpun]
|
||||
pub extern "C" fn __unorddf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
|
||||
unord(a, b) as crate::float::cmp::CmpResult
|
||||
}
|
||||
|
||||
pub extern "C" fn __eqdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
pub extern "C" fn __ltdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
pub extern "C" fn __nedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
pub extern "C" fn __gtdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
intrinsics! {
|
||||
#[ppc_alias = __lekf2]
|
||||
pub extern "C" fn __letf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[ppc_alias = __gekf2]
|
||||
pub extern "C" fn __getf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
|
||||
#[ppc_alias = __unordkf2]
|
||||
pub extern "C" fn __unordtf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
|
||||
unord(a, b) as crate::float::cmp::CmpResult
|
||||
}
|
||||
|
||||
#[ppc_alias = __eqkf2]
|
||||
pub extern "C" fn __eqtf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[ppc_alias = __ltkf2]
|
||||
pub extern "C" fn __lttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[ppc_alias = __nekf2]
|
||||
pub extern "C" fn __netf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[ppc_alias = __gtkf2]
|
||||
pub extern "C" fn __gttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
intrinsics! {
|
||||
pub extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 {
|
||||
(__lesf2(a, b) <= 0) as i32
|
||||
}
|
||||
|
||||
pub extern "aapcs" fn __aeabi_fcmpge(a: f32, b: f32) -> i32 {
|
||||
(__gesf2(a, b) >= 0) as i32
|
||||
}
|
||||
|
||||
pub extern "aapcs" fn __aeabi_fcmpeq(a: f32, b: f32) -> i32 {
|
||||
(__eqsf2(a, b) == 0) as i32
|
||||
}
|
||||
|
||||
pub extern "aapcs" fn __aeabi_fcmplt(a: f32, b: f32) -> i32 {
|
||||
(__ltsf2(a, b) < 0) as i32
|
||||
}
|
||||
|
||||
pub extern "aapcs" fn __aeabi_fcmpgt(a: f32, b: f32) -> i32 {
|
||||
(__gtsf2(a, b) > 0) as i32
|
||||
}
|
||||
|
||||
pub extern "aapcs" fn __aeabi_dcmple(a: f64, b: f64) -> i32 {
|
||||
(__ledf2(a, b) <= 0) as i32
|
||||
}
|
||||
|
||||
pub extern "aapcs" fn __aeabi_dcmpge(a: f64, b: f64) -> i32 {
|
||||
(__gedf2(a, b) >= 0) as i32
|
||||
}
|
||||
|
||||
pub extern "aapcs" fn __aeabi_dcmpeq(a: f64, b: f64) -> i32 {
|
||||
(__eqdf2(a, b) == 0) as i32
|
||||
}
|
||||
|
||||
pub extern "aapcs" fn __aeabi_dcmplt(a: f64, b: f64) -> i32 {
|
||||
(__ltdf2(a, b) < 0) as i32
|
||||
}
|
||||
|
||||
pub extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 {
|
||||
(__gtdf2(a, b) > 0) as i32
|
||||
}
|
||||
}
|
||||
489
library/compiler-builtins/compiler-builtins/src/float/conv.rs
Normal file
489
library/compiler-builtins/compiler-builtins/src/float/conv.rs
Normal file
|
|
@ -0,0 +1,489 @@
|
|||
use core::ops::Neg;
|
||||
|
||||
use super::Float;
|
||||
use crate::int::{CastFrom, CastInto, Int, MinInt};
|
||||
|
||||
/// Conversions from integers to floats.
|
||||
///
|
||||
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
|
||||
/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
|
||||
/// mantissa _with the implicit bit set_!
|
||||
/// - Figure out if rounding needs to occur by classifying the bits that are to be truncated. Some
|
||||
/// patterns are used to simplify this. Adjust the mantissa with the result if needed.
|
||||
/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros). Subtract one.
|
||||
/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
|
||||
/// from the exponent (above) accounts for the explicit bit being set in the mantissa.
|
||||
///
|
||||
/// # Terminology
|
||||
///
|
||||
/// - `i`: the original integer
|
||||
/// - `i_m`: the integer, shifted fully left (no leading zeros)
|
||||
/// - `n`: number of leading zeroes
|
||||
/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit.
|
||||
/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set.
|
||||
/// - `adj`: the bits that will be truncated, possibly compressed in some way.
|
||||
/// - `m`: the resulting mantissa. Implicit bit is usually set.
|
||||
mod int_to_float {
|
||||
use super::*;
|
||||
|
||||
/// Calculate the exponent from the number of leading zeros.
|
||||
///
|
||||
/// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
|
||||
/// bit set can be added back later.
|
||||
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
|
||||
F::Int::cast_from(F::EXP_BIAS - 1 + I::BITS - n)
|
||||
}
|
||||
|
||||
/// Adjust a mantissa with dropped bits to perform correct rounding.
|
||||
///
|
||||
/// The dropped bits should be exactly the bits that get truncated (left-aligned), but they
|
||||
/// can be combined or compressed in some way that simplifies operations.
|
||||
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
|
||||
// Branchlessly extract a `1` if rounding up should happen, 0 otherwise
|
||||
// This accounts for rounding to even.
|
||||
let adj = (dropped_bits - ((dropped_bits >> (F::BITS - 1)) & !m_base)) >> (F::BITS - 1);
|
||||
|
||||
// Add one when we need to round up. Break ties to even.
|
||||
m_base + adj
|
||||
}
|
||||
|
||||
/// Shift the exponent to its position and add the mantissa.
|
||||
///
|
||||
/// If the mantissa has the implicit bit set, the exponent should be one less than its actual
|
||||
/// value to cancel it out.
|
||||
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
|
||||
// + rather than | so the mantissa can overflow into the exponent
|
||||
(e << F::SIG_BITS) + m
|
||||
}
|
||||
|
||||
/// Shift distance from a left-aligned integer to a smaller float.
|
||||
fn shift_f_lt_i<I: Int, F: Float>() -> u32 {
|
||||
(I::BITS - F::BITS) + F::EXP_BITS
|
||||
}
|
||||
|
||||
/// Shift distance from an integer with `n` leading zeros to a smaller float.
|
||||
fn shift_f_gt_i<I: Int, F: Float>(n: u32) -> u32 {
|
||||
F::SIG_BITS - I::BITS + 1 + n
|
||||
}
|
||||
|
||||
/// Perform a signed operation as unsigned, then add the sign back.
|
||||
pub fn signed<I, F, Conv>(i: I, conv: Conv) -> F
|
||||
where
|
||||
F: Float,
|
||||
I: Int,
|
||||
F::Int: CastFrom<I>,
|
||||
Conv: Fn(I::UnsignedInt) -> F::Int,
|
||||
{
|
||||
let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
|
||||
F::from_bits(conv(i.unsigned_abs()) | sign_bit)
|
||||
}
|
||||
|
||||
pub fn u32_to_f32_bits(i: u32) -> u32 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set (significant bits)
|
||||
let m_base = (i << n) >> f32::EXP_BITS;
|
||||
// Bits that will be dropped (insignificant bits)
|
||||
let adj = (i << n) << (f32::SIG_BITS + 1);
|
||||
let m = m_adj::<f32>(m_base, adj);
|
||||
let e = exp::<u32, f32>(n) - 1;
|
||||
repr::<f32>(e, m)
|
||||
}
|
||||
|
||||
pub fn u32_to_f64_bits(i: u32) -> u64 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set
|
||||
let m = (i as u64) << shift_f_gt_i::<u32, f64>(n);
|
||||
let e = exp::<u32, f64>(n) - 1;
|
||||
repr::<f64>(e, m)
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn u32_to_f128_bits(i: u32) -> u128 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
|
||||
// Shift into mantissa position that is correct for the type, but shifted into the lower
|
||||
// 64 bits over so can can avoid 128-bit math.
|
||||
let m = (i as u64) << (shift_f_gt_i::<u32, f128>(n) - 64);
|
||||
let e = exp::<u32, f128>(n) as u64 - 1;
|
||||
// High 64 bits of f128 representation.
|
||||
let h = (e << (f128::SIG_BITS - 64)) + m;
|
||||
|
||||
// Shift back to the high bits, the rest of the mantissa will always be 0.
|
||||
(h as u128) << 64
|
||||
}
|
||||
|
||||
pub fn u64_to_f32_bits(i: u64) -> u32 {
|
||||
let n = i.leading_zeros();
|
||||
let i_m = i.wrapping_shl(n);
|
||||
// Mantissa with implicit bit set
|
||||
let m_base: u32 = (i_m >> shift_f_lt_i::<u64, f32>()) as u32;
|
||||
// The entire lower half of `i` will be truncated (masked portion), plus the
|
||||
// next `EXP_BITS` bits.
|
||||
let adj = ((i_m >> f32::EXP_BITS) | i_m & 0xFFFF) as u32;
|
||||
let m = m_adj::<f32>(m_base, adj);
|
||||
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
|
||||
repr::<f32>(e, m)
|
||||
}
|
||||
|
||||
pub fn u64_to_f64_bits(i: u64) -> u64 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set
|
||||
let m_base = (i << n) >> f64::EXP_BITS;
|
||||
let adj = (i << n) << (f64::SIG_BITS + 1);
|
||||
let m = m_adj::<f64>(m_base, adj);
|
||||
let e = exp::<u64, f64>(n) - 1;
|
||||
repr::<f64>(e, m)
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn u64_to_f128_bits(i: u64) -> u128 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set
|
||||
let m = (i as u128) << shift_f_gt_i::<u64, f128>(n);
|
||||
let e = exp::<u64, f128>(n) - 1;
|
||||
repr::<f128>(e, m)
|
||||
}
|
||||
|
||||
pub fn u128_to_f32_bits(i: u128) -> u32 {
|
||||
let n = i.leading_zeros();
|
||||
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
|
||||
let m_base: u32 = (i_m >> shift_f_lt_i::<u128, f32>()) as u32;
|
||||
|
||||
// Within the upper `F::BITS`, everything except for the signifcand
|
||||
// gets truncated
|
||||
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast();
|
||||
|
||||
// The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
|
||||
// check if it is nonzero.
|
||||
let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into();
|
||||
let adj = d1 | d2;
|
||||
|
||||
// Mantissa with implicit bit set
|
||||
let m = m_adj::<f32>(m_base, adj);
|
||||
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) - 1 };
|
||||
repr::<f32>(e, m)
|
||||
}
|
||||
|
||||
pub fn u128_to_f64_bits(i: u128) -> u64 {
|
||||
let n = i.leading_zeros();
|
||||
let i_m = i.wrapping_shl(n);
|
||||
// Mantissa with implicit bit set
|
||||
let m_base: u64 = (i_m >> shift_f_lt_i::<u128, f64>()) as u64;
|
||||
// The entire lower half of `i` will be truncated (masked portion), plus the
|
||||
// next `EXP_BITS` bits.
|
||||
let adj = ((i_m >> f64::EXP_BITS) | i_m & 0xFFFF_FFFF) as u64;
|
||||
let m = m_adj::<f64>(m_base, adj);
|
||||
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
|
||||
repr::<f64>(e, m)
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn u128_to_f128_bits(i: u128) -> u128 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set
|
||||
let m_base = (i << n) >> f128::EXP_BITS;
|
||||
let adj = (i << n) << (f128::SIG_BITS + 1);
|
||||
let m = m_adj::<f128>(m_base, adj);
|
||||
let e = exp::<u128, f128>(n) - 1;
|
||||
repr::<f128>(e, m)
|
||||
}
|
||||
}
|
||||
|
||||
// Conversions from unsigned integers to floats.
|
||||
intrinsics! {
|
||||
#[arm_aeabi_alias = __aeabi_ui2f]
|
||||
pub extern "C" fn __floatunsisf(i: u32) -> f32 {
|
||||
f32::from_bits(int_to_float::u32_to_f32_bits(i))
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_ui2d]
|
||||
pub extern "C" fn __floatunsidf(i: u32) -> f64 {
|
||||
f64::from_bits(int_to_float::u32_to_f64_bits(i))
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_ul2f]
|
||||
pub extern "C" fn __floatundisf(i: u64) -> f32 {
|
||||
f32::from_bits(int_to_float::u64_to_f32_bits(i))
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_ul2d]
|
||||
pub extern "C" fn __floatundidf(i: u64) -> f64 {
|
||||
f64::from_bits(int_to_float::u64_to_f64_bits(i))
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
|
||||
pub extern "C" fn __floatuntisf(i: u128) -> f32 {
|
||||
f32::from_bits(int_to_float::u128_to_f32_bits(i))
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
|
||||
pub extern "C" fn __floatuntidf(i: u128) -> f64 {
|
||||
f64::from_bits(int_to_float::u128_to_f64_bits(i))
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatunsikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatunsitf(i: u32) -> f128 {
|
||||
f128::from_bits(int_to_float::u32_to_f128_bits(i))
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatundikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatunditf(i: u64) -> f128 {
|
||||
f128::from_bits(int_to_float::u64_to_f128_bits(i))
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatuntikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatuntitf(i: u128) -> f128 {
|
||||
f128::from_bits(int_to_float::u128_to_f128_bits(i))
|
||||
}
|
||||
}
|
||||
|
||||
// Conversions from signed integers to floats.
|
||||
intrinsics! {
|
||||
#[arm_aeabi_alias = __aeabi_i2f]
|
||||
pub extern "C" fn __floatsisf(i: i32) -> f32 {
|
||||
int_to_float::signed(i, int_to_float::u32_to_f32_bits)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_i2d]
|
||||
pub extern "C" fn __floatsidf(i: i32) -> f64 {
|
||||
int_to_float::signed(i, int_to_float::u32_to_f64_bits)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_l2f]
|
||||
pub extern "C" fn __floatdisf(i: i64) -> f32 {
|
||||
int_to_float::signed(i, int_to_float::u64_to_f32_bits)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_l2d]
|
||||
pub extern "C" fn __floatdidf(i: i64) -> f64 {
|
||||
int_to_float::signed(i, int_to_float::u64_to_f64_bits)
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
|
||||
pub extern "C" fn __floattisf(i: i128) -> f32 {
|
||||
int_to_float::signed(i, int_to_float::u128_to_f32_bits)
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
|
||||
pub extern "C" fn __floattidf(i: i128) -> f64 {
|
||||
int_to_float::signed(i, int_to_float::u128_to_f64_bits)
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatsikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatsitf(i: i32) -> f128 {
|
||||
int_to_float::signed(i, int_to_float::u32_to_f128_bits)
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatdikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatditf(i: i64) -> f128 {
|
||||
int_to_float::signed(i, int_to_float::u64_to_f128_bits)
|
||||
}
|
||||
|
||||
#[ppc_alias = __floattikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floattitf(i: i128) -> f128 {
|
||||
int_to_float::signed(i, int_to_float::u128_to_f128_bits)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic float to unsigned int conversions.
|
||||
fn float_to_unsigned_int<F, U>(f: F) -> U
|
||||
where
|
||||
F: Float,
|
||||
U: Int<UnsignedInt = U>,
|
||||
F::Int: CastInto<U>,
|
||||
F::Int: CastFrom<u32>,
|
||||
F::Int: CastInto<U::UnsignedInt>,
|
||||
u32: CastFrom<F::Int>,
|
||||
{
|
||||
float_to_int_inner::<F, U, _, _>(f.to_bits(), |i: U| i, || U::MAX)
|
||||
}
|
||||
|
||||
/// Generic float to signed int conversions.
|
||||
fn float_to_signed_int<F, I>(f: F) -> I
|
||||
where
|
||||
F: Float,
|
||||
I: Int + Neg<Output = I>,
|
||||
I::UnsignedInt: Int,
|
||||
F::Int: CastInto<I::UnsignedInt>,
|
||||
F::Int: CastFrom<u32>,
|
||||
u32: CastFrom<F::Int>,
|
||||
{
|
||||
float_to_int_inner::<F, I, _, _>(
|
||||
f.to_bits() & !F::SIGN_MASK,
|
||||
|i: I| if f.is_sign_negative() { -i } else { i },
|
||||
|| if f.is_sign_negative() { I::MIN } else { I::MAX },
|
||||
)
|
||||
}
|
||||
|
||||
/// Float to int conversions, generic for both signed and unsigned.
|
||||
///
|
||||
/// Parameters:
|
||||
/// - `fbits`: `abg(f)` bitcasted to an integer.
|
||||
/// - `map_inbounds`: apply this transformation to integers that are within range (add the sign back).
|
||||
/// - `out_of_bounds`: return value when out of range for `I`.
|
||||
fn float_to_int_inner<F, I, FnFoo, FnOob>(
|
||||
fbits: F::Int,
|
||||
map_inbounds: FnFoo,
|
||||
out_of_bounds: FnOob,
|
||||
) -> I
|
||||
where
|
||||
F: Float,
|
||||
I: Int,
|
||||
FnFoo: FnOnce(I) -> I,
|
||||
FnOob: FnOnce() -> I,
|
||||
I::UnsignedInt: Int,
|
||||
F::Int: CastInto<I::UnsignedInt>,
|
||||
F::Int: CastFrom<u32>,
|
||||
u32: CastFrom<F::Int>,
|
||||
{
|
||||
let int_max_exp = F::EXP_BIAS + I::MAX.ilog2() + 1;
|
||||
let foobar = F::EXP_BIAS + I::UnsignedInt::BITS - 1;
|
||||
|
||||
if fbits < F::ONE.to_bits() {
|
||||
// < 0 gets rounded to 0
|
||||
I::ZERO
|
||||
} else if fbits < F::Int::cast_from(int_max_exp) << F::SIG_BITS {
|
||||
// >= 1, < integer max
|
||||
let m_base = if I::UnsignedInt::BITS >= F::Int::BITS {
|
||||
I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1)
|
||||
} else {
|
||||
I::UnsignedInt::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1))
|
||||
};
|
||||
|
||||
// Set the implicit 1-bit.
|
||||
let m: I::UnsignedInt = (I::UnsignedInt::ONE << (I::BITS - 1)) | m_base;
|
||||
|
||||
// Shift based on the exponent and bias.
|
||||
let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIG_BITS);
|
||||
|
||||
let unsigned = m >> s;
|
||||
map_inbounds(I::from_unsigned(unsigned))
|
||||
} else if fbits <= F::EXP_MASK {
|
||||
// >= max (incl. inf)
|
||||
out_of_bounds()
|
||||
} else {
|
||||
I::ZERO
|
||||
}
|
||||
}
|
||||
|
||||
// Conversions from floats to unsigned integers.
|
||||
intrinsics! {
|
||||
#[arm_aeabi_alias = __aeabi_f2uiz]
|
||||
pub extern "C" fn __fixunssfsi(f: f32) -> u32 {
|
||||
float_to_unsigned_int(f)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_f2ulz]
|
||||
pub extern "C" fn __fixunssfdi(f: f32) -> u64 {
|
||||
float_to_unsigned_int(f)
|
||||
}
|
||||
|
||||
pub extern "C" fn __fixunssfti(f: f32) -> u128 {
|
||||
float_to_unsigned_int(f)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_d2uiz]
|
||||
pub extern "C" fn __fixunsdfsi(f: f64) -> u32 {
|
||||
float_to_unsigned_int(f)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_d2ulz]
|
||||
pub extern "C" fn __fixunsdfdi(f: f64) -> u64 {
|
||||
float_to_unsigned_int(f)
|
||||
}
|
||||
|
||||
pub extern "C" fn __fixunsdfti(f: f64) -> u128 {
|
||||
float_to_unsigned_int(f)
|
||||
}
|
||||
|
||||
#[ppc_alias = __fixunskfsi]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __fixunstfsi(f: f128) -> u32 {
|
||||
float_to_unsigned_int(f)
|
||||
}
|
||||
|
||||
#[ppc_alias = __fixunskfdi]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __fixunstfdi(f: f128) -> u64 {
|
||||
float_to_unsigned_int(f)
|
||||
}
|
||||
|
||||
#[ppc_alias = __fixunskfti]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __fixunstfti(f: f128) -> u128 {
|
||||
float_to_unsigned_int(f)
|
||||
}
|
||||
}
|
||||
|
||||
// Conversions from floats to signed integers.
|
||||
intrinsics! {
|
||||
#[arm_aeabi_alias = __aeabi_f2iz]
|
||||
pub extern "C" fn __fixsfsi(f: f32) -> i32 {
|
||||
float_to_signed_int(f)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_f2lz]
|
||||
pub extern "C" fn __fixsfdi(f: f32) -> i64 {
|
||||
float_to_signed_int(f)
|
||||
}
|
||||
|
||||
pub extern "C" fn __fixsfti(f: f32) -> i128 {
|
||||
float_to_signed_int(f)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_d2iz]
|
||||
pub extern "C" fn __fixdfsi(f: f64) -> i32 {
|
||||
float_to_signed_int(f)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_d2lz]
|
||||
pub extern "C" fn __fixdfdi(f: f64) -> i64 {
|
||||
float_to_signed_int(f)
|
||||
}
|
||||
|
||||
pub extern "C" fn __fixdfti(f: f64) -> i128 {
|
||||
float_to_signed_int(f)
|
||||
}
|
||||
|
||||
#[ppc_alias = __fixkfsi]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __fixtfsi(f: f128) -> i32 {
|
||||
float_to_signed_int(f)
|
||||
}
|
||||
|
||||
#[ppc_alias = __fixkfdi]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __fixtfdi(f: f128) -> i64 {
|
||||
float_to_signed_int(f)
|
||||
}
|
||||
|
||||
#[ppc_alias = __fixkfti]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __fixtfti(f: f128) -> i128 {
|
||||
float_to_signed_int(f)
|
||||
}
|
||||
}
|
||||
635
library/compiler-builtins/compiler-builtins/src/float/div.rs
Normal file
635
library/compiler-builtins/compiler-builtins/src/float/div.rs
Normal file
|
|
@ -0,0 +1,635 @@
|
|||
//! Floating point division routines.
|
||||
//!
|
||||
//! This module documentation gives an overview of the method used. More documentation is inline.
|
||||
//!
|
||||
//! # Relevant notation
|
||||
//!
|
||||
//! - `m_a`: the mantissa of `a`, in base 2
|
||||
//! - `p_a`: the exponent of `a`, in base 2. I.e. `a = m_a * 2^p_a`
|
||||
//! - `uqN` (e.g. `uq1`): this refers to Q notation for fixed-point numbers. UQ1.31 is an unsigned
|
||||
//! fixed-point number with 1 integral bit, and 31 decimal bits. A `uqN` variable of type `uM`
|
||||
//! will have N bits of integer and M-N bits of fraction.
|
||||
//! - `hw`: half width, i.e. for `f64` this will be a `u32`.
|
||||
//! - `x` is the best estimate of `1/m_b`
|
||||
//!
|
||||
//! # Method Overview
|
||||
//!
|
||||
//! Division routines must solve for `a / b`, which is `res = m_a*2^p_a / m_b*2^p_b`. The basic
|
||||
//! process is as follows:
|
||||
//!
|
||||
//! - Rearange the exponent and significand to simplify the operations:
|
||||
//! `res = (m_a / m_b) * 2^{p_a - p_b}`.
|
||||
//! - Check for early exits (infinity, zero, etc).
|
||||
//! - If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent.
|
||||
//! - Set the implicit bit so math is correct.
|
||||
//! - Shift mantissa significant digits (with implicit bit) fully left such that fixed-point UQ1
|
||||
//! or UQ0 numbers can be used for mantissa math. These will have greater precision than the
|
||||
//! actual mantissa, which is important for correct rounding.
|
||||
//! - Calculate the reciprocal of `m_b`, `x`.
|
||||
//! - Use the reciprocal to multiply rather than divide: `res = m_a * x_b * 2^{p_a - p_b}`.
|
||||
//! - Reapply rounding.
|
||||
//!
|
||||
//! # Reciprocal calculation
|
||||
//!
|
||||
//! Calculating the reciprocal is the most complicated part of this process. It uses the
|
||||
//! [Newton-Raphson method], which picks an initial estimation (of the reciprocal) and performs
|
||||
//! a number of iterations to increase its precision.
|
||||
//!
|
||||
//! In general, Newton's method takes the following form:
|
||||
//!
|
||||
//! ```text
|
||||
//! `x_n` is a guess or the result of a previous iteration. Increasing `n` converges to the
|
||||
//! desired result.
|
||||
//!
|
||||
//! The result approaches a zero of `f(x)` by applying a correction to the previous gues.
|
||||
//!
|
||||
//! x_{n+1} = x_n - f(x_n) / f'(x_n)
|
||||
//! ```
|
||||
//!
|
||||
//! Applying this to find the reciprocal:
|
||||
//!
|
||||
//! ```text
|
||||
//! 1 / x = b
|
||||
//!
|
||||
//! Rearrange so we can solve by finding a zero
|
||||
//! 0 = (1 / x) - b = f(x)
|
||||
//!
|
||||
//! f'(x) = -x^{-2}
|
||||
//!
|
||||
//! x_{n+1} = 2*x_n - b*x_n^2
|
||||
//! ```
|
||||
//!
|
||||
//! This is a process that can be repeated to calculate the reciprocal with enough precision to
|
||||
//! achieve a correctly rounded result for the overall division operation. The maximum required
|
||||
//! number of iterations is known since precision doubles with each iteration.
|
||||
//!
|
||||
//! # Half-width operations
|
||||
//!
|
||||
//! Calculating the reciprocal requires widening multiplication and performing arithmetic on the
|
||||
//! results, meaning that emulated integer arithmetic on `u128` (for `f64`) and `u256` (for `f128`)
|
||||
//! gets used instead of native math.
|
||||
//!
|
||||
//! To make this more efficient, all but the final operation can be computed using half-width
|
||||
//! integers. For example, rather than computing four iterations using 128-bit integers for `f64`,
|
||||
//! we can instead perform three iterations using native 64-bit integers and only one final
|
||||
//! iteration using the full 128 bits.
|
||||
//!
|
||||
//! This works because of precision doubling. Some leeway is allowed here because the fixed-point
|
||||
//! number has more bits than the final mantissa will.
|
||||
//!
|
||||
//! [Newton-Raphson method]: https://en.wikipedia.org/wiki/Newton%27s_method
|
||||
|
||||
use core::mem::size_of;
|
||||
use core::ops;
|
||||
|
||||
use super::HalfRep;
|
||||
use crate::float::Float;
|
||||
use crate::int::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
|
||||
|
||||
fn div<F: Float>(a: F, b: F) -> F
|
||||
where
|
||||
F::Int: CastInto<i32>,
|
||||
F::Int: From<HalfRep<F>>,
|
||||
F::Int: From<u8>,
|
||||
F::Int: HInt + DInt,
|
||||
<F::Int as HInt>::D: ops::Shr<u32, Output = <F::Int as HInt>::D>,
|
||||
F::Int: From<u32>,
|
||||
u16: CastInto<F::Int>,
|
||||
i32: CastInto<F::Int>,
|
||||
u32: CastInto<F::Int>,
|
||||
u128: CastInto<HalfRep<F>>,
|
||||
{
|
||||
let one = F::Int::ONE;
|
||||
let zero = F::Int::ZERO;
|
||||
let one_hw = HalfRep::<F>::ONE;
|
||||
let zero_hw = HalfRep::<F>::ZERO;
|
||||
let hw = F::BITS / 2;
|
||||
let lo_mask = F::Int::MAX >> hw;
|
||||
|
||||
let significand_bits = F::SIG_BITS;
|
||||
// Saturated exponent, representing infinity
|
||||
let exponent_sat: F::Int = F::EXP_SAT.cast();
|
||||
|
||||
let exponent_bias = F::EXP_BIAS;
|
||||
let implicit_bit = F::IMPLICIT_BIT;
|
||||
let significand_mask = F::SIG_MASK;
|
||||
let sign_bit = F::SIGN_MASK;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
let quiet_bit = implicit_bit >> 1;
|
||||
let qnan_rep = exponent_mask | quiet_bit;
|
||||
let (mut half_iterations, full_iterations) = get_iterations::<F>();
|
||||
let recip_precision = reciprocal_precision::<F>();
|
||||
|
||||
if F::BITS == 128 {
|
||||
// FIXME(tgross35): f128 seems to require one more half iteration than expected
|
||||
half_iterations += 1;
|
||||
}
|
||||
|
||||
let a_rep = a.to_bits();
|
||||
let b_rep = b.to_bits();
|
||||
|
||||
// Exponent numeric representationm not accounting for bias
|
||||
let a_exponent = (a_rep >> significand_bits) & exponent_sat;
|
||||
let b_exponent = (b_rep >> significand_bits) & exponent_sat;
|
||||
let quotient_sign = (a_rep ^ b_rep) & sign_bit;
|
||||
|
||||
let mut a_significand = a_rep & significand_mask;
|
||||
let mut b_significand = b_rep & significand_mask;
|
||||
|
||||
// The exponent of our final result in its encoded form
|
||||
let mut res_exponent: i32 =
|
||||
i32::cast_from(a_exponent) - i32::cast_from(b_exponent) + (exponent_bias as i32);
|
||||
|
||||
// Detect if a or b is zero, denormal, infinity, or NaN.
|
||||
if a_exponent.wrapping_sub(one) >= (exponent_sat - one)
|
||||
|| b_exponent.wrapping_sub(one) >= (exponent_sat - one)
|
||||
{
|
||||
let a_abs = a_rep & abs_mask;
|
||||
let b_abs = b_rep & abs_mask;
|
||||
|
||||
// NaN / anything = qNaN
|
||||
if a_abs > inf_rep {
|
||||
return F::from_bits(a_rep | quiet_bit);
|
||||
}
|
||||
|
||||
// anything / NaN = qNaN
|
||||
if b_abs > inf_rep {
|
||||
return F::from_bits(b_rep | quiet_bit);
|
||||
}
|
||||
|
||||
if a_abs == inf_rep {
|
||||
if b_abs == inf_rep {
|
||||
// infinity / infinity = NaN
|
||||
return F::from_bits(qnan_rep);
|
||||
} else {
|
||||
// infinity / anything else = +/- infinity
|
||||
return F::from_bits(a_abs | quotient_sign);
|
||||
}
|
||||
}
|
||||
|
||||
// anything else / infinity = +/- 0
|
||||
if b_abs == inf_rep {
|
||||
return F::from_bits(quotient_sign);
|
||||
}
|
||||
|
||||
if a_abs == zero {
|
||||
if b_abs == zero {
|
||||
// zero / zero = NaN
|
||||
return F::from_bits(qnan_rep);
|
||||
} else {
|
||||
// zero / anything else = +/- zero
|
||||
return F::from_bits(quotient_sign);
|
||||
}
|
||||
}
|
||||
|
||||
// anything else / zero = +/- infinity
|
||||
if b_abs == zero {
|
||||
return F::from_bits(inf_rep | quotient_sign);
|
||||
}
|
||||
|
||||
// a is denormal. Renormalize it and set the scale to include the necessary exponent
|
||||
// adjustment.
|
||||
if a_abs < implicit_bit {
|
||||
let (exponent, significand) = F::normalize(a_significand);
|
||||
res_exponent += exponent;
|
||||
a_significand = significand;
|
||||
}
|
||||
|
||||
// b is denormal. Renormalize it and set the scale to include the necessary exponent
|
||||
// adjustment.
|
||||
if b_abs < implicit_bit {
|
||||
let (exponent, significand) = F::normalize(b_significand);
|
||||
res_exponent -= exponent;
|
||||
b_significand = significand;
|
||||
}
|
||||
}
|
||||
|
||||
// Set the implicit significand bit. If we fell through from the
|
||||
// denormal path it was already set by normalize( ), but setting it twice
|
||||
// won't hurt anything.
|
||||
a_significand |= implicit_bit;
|
||||
b_significand |= implicit_bit;
|
||||
|
||||
// Transform to a fixed-point representation by shifting the significand to the high bits. We
|
||||
// know this is in the range [1.0, 2.0] since the implicit bit is set to 1 above.
|
||||
let b_uq1 = b_significand << (F::BITS - significand_bits - 1);
|
||||
|
||||
// Align the significand of b as a UQ1.(n-1) fixed-point number in the range
|
||||
// [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
|
||||
// polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
|
||||
// The max error for this approximation is achieved at endpoints, so
|
||||
// abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289...,
|
||||
// which is about 4.5 bits.
|
||||
// The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571...
|
||||
//
|
||||
// Then, refine the reciprocal estimate using a quadratically converging
|
||||
// Newton-Raphson iteration:
|
||||
// x_{n+1} = x_n * (2 - x_n * b)
|
||||
//
|
||||
// Let b be the original divisor considered "in infinite precision" and
|
||||
// obtained from IEEE754 representation of function argument (with the
|
||||
// implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in
|
||||
// UQ1.(W-1).
|
||||
//
|
||||
// Let b_hw be an infinitely precise number obtained from the highest (HW-1)
|
||||
// bits of divisor significand (with the implicit bit set). Corresponds to
|
||||
// half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated**
|
||||
// version of b_UQ1.
|
||||
//
|
||||
// Let e_n := x_n - 1/b_hw
|
||||
// E_n := x_n - 1/b
|
||||
// abs(E_n) <= abs(e_n) + (1/b_hw - 1/b)
|
||||
// = abs(e_n) + (b - b_hw) / (b*b_hw)
|
||||
// <= abs(e_n) + 2 * 2^-HW
|
||||
//
|
||||
// rep_t-sized iterations may be slower than the corresponding half-width
|
||||
// variant depending on the handware and whether single/double/quad precision
|
||||
// is selected.
|
||||
//
|
||||
// NB: Using half-width iterations increases computation errors due to
|
||||
// rounding, so error estimations have to be computed taking the selected
|
||||
// mode into account!
|
||||
let mut x_uq0 = if half_iterations > 0 {
|
||||
// Starting with (n-1) half-width iterations
|
||||
let b_uq1_hw: HalfRep<F> = b_uq1.hi();
|
||||
|
||||
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
|
||||
// with W0 being either 16 or 32 and W0 <= HW.
|
||||
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
|
||||
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
|
||||
let c_hw = c_hw::<F>();
|
||||
|
||||
// Check that the top bit is set, i.e. value is within `[1, 2)`.
|
||||
debug_assert!(b_uq1_hw & (one_hw << (HalfRep::<F>::BITS - 1)) > zero_hw);
|
||||
|
||||
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
|
||||
// so x0 fits to UQ0.HW without wrapping.
|
||||
let mut x_uq0_hw: HalfRep<F> =
|
||||
c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
|
||||
|
||||
// An e_0 error is comprised of errors due to
|
||||
// * x0 being an inherently imprecise first approximation of 1/b_hw
|
||||
// * C_hw being some (irrational) number **truncated** to W0 bits
|
||||
// Please note that e_0 is calculated against the infinitely precise
|
||||
// reciprocal of b_hw (that is, **truncated** version of b).
|
||||
//
|
||||
// e_0 <= 3/4 - 1/sqrt(2) + 2^-W0
|
||||
//
|
||||
// By construction, 1 <= b < 2
|
||||
// f(x) = x * (2 - b*x) = 2*x - b*x^2
|
||||
// f'(x) = 2 * (1 - b*x)
|
||||
//
|
||||
// On the [0, 1] interval, f(0) = 0,
|
||||
// then it increses until f(1/b) = 1 / b, maximum on (0, 1),
|
||||
// then it decreses to f(1) = 2 - b
|
||||
//
|
||||
// Let g(x) = x - f(x) = b*x^2 - x.
|
||||
// On (0, 1/b), g(x) < 0 <=> f(x) > x
|
||||
// On (1/b, 1], g(x) > 0 <=> f(x) < x
|
||||
//
|
||||
// For half-width iterations, b_hw is used instead of b.
|
||||
for _ in 0..half_iterations {
|
||||
// corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp
|
||||
// of corr_UQ1_hw.
|
||||
// "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1).
|
||||
// On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided
|
||||
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
|
||||
// expected to be strictly positive because b_UQ1_hw has its highest bit set
|
||||
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
|
||||
//
|
||||
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
|
||||
// obtaining an UQ1.(HW-1) number and proving its highest bit could be
|
||||
// considered to be 0 to be able to represent it in UQ0.HW.
|
||||
// From the above analysis of f(x), if corr_UQ1_hw would be represented
|
||||
// without any intermediate loss of precision (that is, in twice_rep_t)
|
||||
// x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly
|
||||
// less otherwise. On the other hand, to obtain [1.]000..., one have to pass
|
||||
// 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due
|
||||
// to 1.0 being not representable as UQ0.HW).
|
||||
// The fact corr_UQ1_hw was virtually round up (due to result of
|
||||
// multiplication being **first** truncated, then negated - to improve
|
||||
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
|
||||
//
|
||||
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
|
||||
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
|
||||
// any number of iterations, so just subtract 2 from the reciprocal
|
||||
// approximation after last iteration.
|
||||
//
|
||||
// In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW:
|
||||
// corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1
|
||||
// = 1 - e_n * b_hw + 2*eps1
|
||||
// x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2
|
||||
// = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2
|
||||
// = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2
|
||||
// e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2
|
||||
// = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw
|
||||
// \------ >0 -------/ \-- >0 ---/
|
||||
// abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U)
|
||||
x_uq0_hw = next_guess(x_uq0_hw, b_uq1_hw);
|
||||
}
|
||||
|
||||
// For initial half-width iterations, U = 2^-HW
|
||||
// Let abs(e_n) <= u_n * U,
|
||||
// then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U)
|
||||
// u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2)
|
||||
//
|
||||
// Account for possible overflow (see above). For an overflow to occur for the
|
||||
// first time, for "ideal" corr_UQ1_hw (that is, without intermediate
|
||||
// truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum
|
||||
// value representable in UQ0.HW or less by 1. This means that 1/b_hw have to
|
||||
// be not below that value (see g(x) above), so it is safe to decrement just
|
||||
// once after the final iteration. On the other hand, an effective value of
|
||||
// divisor changes after this point (from b_hw to b), so adjust here.
|
||||
x_uq0_hw = x_uq0_hw.wrapping_sub(one_hw);
|
||||
|
||||
// Error estimations for full-precision iterations are calculated just
|
||||
// as above, but with U := 2^-W and taking extra decrementing into account.
|
||||
// We need at least one such iteration.
|
||||
//
|
||||
// Simulating operations on a twice_rep_t to perform a single final full-width
|
||||
// iteration. Using ad-hoc multiplication implementations to take advantage
|
||||
// of particular structure of operands.
|
||||
let blo: F::Int = b_uq1 & lo_mask;
|
||||
|
||||
// x_UQ0 = x_UQ0_hw * 2^HW - 1
|
||||
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
|
||||
//
|
||||
// <--- higher half ---><--- lower half --->
|
||||
// [x_UQ0_hw * b_UQ1_hw]
|
||||
// + [ x_UQ0_hw * blo ]
|
||||
// - [ b_UQ1 ]
|
||||
// = [ result ][.... discarded ...]
|
||||
let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw)
|
||||
+ ((F::Int::from(x_uq0_hw) * blo) >> hw))
|
||||
.wrapping_sub(one)
|
||||
.wrapping_neg(); // account for *possible* carry
|
||||
|
||||
let lo_corr: F::Int = corr_uq1 & lo_mask;
|
||||
let hi_corr: F::Int = corr_uq1 >> hw;
|
||||
|
||||
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
|
||||
let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
|
||||
.wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
|
||||
// 1 to account for the highest bit of corr_UQ1 can be 1
|
||||
// 1 to account for possible carry
|
||||
// Just like the case of half-width iterations but with possibility
|
||||
// of overflowing by one extra Ulp of x_UQ0.
|
||||
.wrapping_sub(F::Int::from(2u8));
|
||||
|
||||
x_uq0 -= one;
|
||||
// ... and then traditional fixup by 2 should work
|
||||
|
||||
// On error estimation:
|
||||
// abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW
|
||||
// + (2^-HW + 2^-W))
|
||||
// abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW
|
||||
//
|
||||
// Then like for the half-width iterations:
|
||||
// With 0 <= eps1, eps2 < 2^-W
|
||||
// E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b
|
||||
// abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ]
|
||||
// abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ]
|
||||
x_uq0
|
||||
} else {
|
||||
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
|
||||
let c: F::Int = F::Int::from(0x7504F333u32) << (F::BITS - 32);
|
||||
let mut x_uq0: F::Int = c.wrapping_sub(b_uq1);
|
||||
|
||||
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
|
||||
// x_uq0
|
||||
for _ in 0..full_iterations {
|
||||
x_uq0 = next_guess(x_uq0, b_uq1);
|
||||
}
|
||||
|
||||
x_uq0
|
||||
};
|
||||
|
||||
// Finally, account for possible overflow, as explained above.
|
||||
x_uq0 = x_uq0.wrapping_sub(2.cast());
|
||||
|
||||
// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
|
||||
x_uq0 -= recip_precision.cast();
|
||||
|
||||
// Now 1/b - (2*P) * 2^-W < x < 1/b
|
||||
// FIXME Is x_UQ0 still >= 0.5?
|
||||
|
||||
let mut quotient_uq1: F::Int = x_uq0.widen_mul(a_significand << 1).hi();
|
||||
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
|
||||
|
||||
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
|
||||
// adjust it to be in [1.0, 2.0) as UQ1.SB.
|
||||
let mut residual_lo = if quotient_uq1 < (implicit_bit << 1) {
|
||||
// Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB,
|
||||
// effectively doubling its value as well as its error estimation.
|
||||
let residual_lo = (a_significand << (significand_bits + 1))
|
||||
.wrapping_sub(quotient_uq1.wrapping_mul(b_significand));
|
||||
res_exponent -= 1;
|
||||
a_significand <<= 1;
|
||||
residual_lo
|
||||
} else {
|
||||
// Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it
|
||||
// to UQ1.SB by right shifting by 1. Least significant bit is omitted.
|
||||
quotient_uq1 >>= 1;
|
||||
(a_significand << significand_bits).wrapping_sub(quotient_uq1.wrapping_mul(b_significand))
|
||||
};
|
||||
|
||||
// drop mutability
|
||||
let quotient = quotient_uq1;
|
||||
|
||||
// NB: residualLo is calculated above for the normal result case.
|
||||
// It is re-computed on denormal path that is expected to be not so
|
||||
// performance-sensitive.
|
||||
//
|
||||
// Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB
|
||||
// Each NextAfter() increments the floating point value by at least 2^-SB
|
||||
// (more, if exponent was incremented).
|
||||
// Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint):
|
||||
// q
|
||||
// | | * | | | | |
|
||||
// <---> 2^t
|
||||
// | | | | | * | |
|
||||
// q
|
||||
// To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB.
|
||||
// (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB
|
||||
// (8*P) * 2^-W < 0.5 * 2^-SB
|
||||
// P < 2^(W-4-SB)
|
||||
// Generally, for at most R NextAfter() to be enough,
|
||||
// P < (2*R - 1) * 2^(W-4-SB)
|
||||
// For f32 (0+3): 10 < 32 (OK)
|
||||
// For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required
|
||||
// For f64: 220 < 256 (OK)
|
||||
// For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
|
||||
//
|
||||
// If we have overflowed the exponent, return infinity
|
||||
if res_exponent >= i32::cast_from(exponent_sat) {
|
||||
return F::from_bits(inf_rep | quotient_sign);
|
||||
}
|
||||
|
||||
// Now, quotient <= the correctly-rounded result
|
||||
// and may need taking NextAfter() up to 3 times (see error estimates above)
|
||||
// r = a - b * q
|
||||
let mut abs_result = if res_exponent > 0 {
|
||||
let mut ret = quotient & significand_mask;
|
||||
ret |= F::Int::from(res_exponent as u32) << significand_bits;
|
||||
residual_lo <<= 1;
|
||||
ret
|
||||
} else {
|
||||
if ((significand_bits as i32) + res_exponent) < 0 {
|
||||
return F::from_bits(quotient_sign);
|
||||
}
|
||||
|
||||
let ret = quotient.wrapping_shr(u32::cast_from(res_exponent.wrapping_neg()) + 1);
|
||||
residual_lo = a_significand
|
||||
.wrapping_shl(significand_bits.wrapping_add(CastInto::<u32>::cast(res_exponent)))
|
||||
.wrapping_sub(ret.wrapping_mul(b_significand) << 1);
|
||||
ret
|
||||
};
|
||||
|
||||
residual_lo += abs_result & one; // tie to even
|
||||
// conditionally turns the below LT comparison into LTE
|
||||
abs_result += u8::from(residual_lo > b_significand).into();
|
||||
|
||||
if F::BITS == 128 || (F::BITS == 32 && half_iterations > 0) {
|
||||
// Do not round Infinity to NaN
|
||||
abs_result +=
|
||||
u8::from(abs_result < inf_rep && residual_lo > (2 + 1).cast() * b_significand).into();
|
||||
}
|
||||
|
||||
if F::BITS == 128 {
|
||||
abs_result +=
|
||||
u8::from(abs_result < inf_rep && residual_lo > (4 + 1).cast() * b_significand).into();
|
||||
}
|
||||
|
||||
F::from_bits(abs_result | quotient_sign)
|
||||
}
|
||||
|
||||
/// Calculate the number of iterations required for a float type's precision.
|
||||
///
|
||||
/// This returns `(h, f)` where `h` is the number of iterations to be done using integers at half
|
||||
/// the float's bit width, and `f` is the number of iterations done using integers of the float's
|
||||
/// full width. This is further explained in the module documentation.
|
||||
///
|
||||
/// # Requirements
|
||||
///
|
||||
/// The initial estimate should have at least 8 bits of precision. If this is not true, results
|
||||
/// will be inaccurate.
|
||||
const fn get_iterations<F: Float>() -> (usize, usize) {
|
||||
// Precision doubles with each iteration. Assume we start with 8 bits of precision.
|
||||
let total_iterations = F::BITS.ilog2() as usize - 2;
|
||||
|
||||
if 2 * size_of::<F>() <= size_of::<*const ()>() {
|
||||
// If widening multiplication will be efficient (uses word-sized integers), there is no
|
||||
// reason to use half-sized iterations.
|
||||
(0, total_iterations)
|
||||
} else {
|
||||
// Otherwise, do as many iterations as possible at half width.
|
||||
(total_iterations - 1, 1)
|
||||
}
|
||||
}
|
||||
|
||||
/// `u_n` for different precisions (with N-1 half-width iterations).
|
||||
///
|
||||
/// W0 is the precision of C
|
||||
/// u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW
|
||||
///
|
||||
/// Estimated with bc:
|
||||
///
|
||||
/// ```text
|
||||
/// define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; }
|
||||
/// define half2(un) { return 2.0 * un / 2.0^hw + 2.0; }
|
||||
/// define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; }
|
||||
/// define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; }
|
||||
///
|
||||
/// | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1)
|
||||
/// u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797
|
||||
/// u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440
|
||||
/// u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317
|
||||
/// u_3 | < 7.31 | | < 7.31 | < 27054456580
|
||||
/// u_4 | | | | < 80.4
|
||||
/// Final (U_N) | same as u_3 | < 72 | < 218 | < 13920
|
||||
/// ````
|
||||
///
|
||||
/// Add 2 to `U_N` due to final decrement.
|
||||
const fn reciprocal_precision<F: Float>() -> u16 {
|
||||
let (half_iterations, full_iterations) = get_iterations::<F>();
|
||||
|
||||
if full_iterations < 1 {
|
||||
panic!("Must have at least one full iteration");
|
||||
}
|
||||
|
||||
// FIXME(tgross35): calculate this programmatically
|
||||
if F::BITS == 32 && half_iterations == 2 && full_iterations == 1 {
|
||||
74u16
|
||||
} else if F::BITS == 32 && half_iterations == 0 && full_iterations == 3 {
|
||||
10
|
||||
} else if F::BITS == 64 && half_iterations == 3 && full_iterations == 1 {
|
||||
220
|
||||
} else if F::BITS == 128 && half_iterations == 4 && full_iterations == 1 {
|
||||
13922
|
||||
} else {
|
||||
panic!("Invalid number of iterations")
|
||||
}
|
||||
}
|
||||
|
||||
/// The value of `C` adjusted to half width.
|
||||
///
|
||||
/// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW with W0 being either
|
||||
/// 16 or 32 and W0 <= HW. That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from
|
||||
/// which b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
|
||||
fn c_hw<F: Float>() -> HalfRep<F>
|
||||
where
|
||||
F::Int: DInt,
|
||||
u128: CastInto<HalfRep<F>>,
|
||||
{
|
||||
const C_U128: u128 = 0x7504f333f9de6108b2fb1366eaa6a542;
|
||||
const { C_U128 >> (u128::BITS - <HalfRep<F>>::BITS) }.cast()
|
||||
}
|
||||
|
||||
/// Perform one iteration at any width to approach `1/b`, given previous guess `x`. Returns
|
||||
/// the next `x` as a UQ0 number.
|
||||
///
|
||||
/// This is the `x_{n+1} = 2*x_n - b*x_n^2` algorithm, implemented as `x_n * (2 - b*x_n)`. It
|
||||
/// uses widening multiplication to calculate the result with necessary precision.
|
||||
fn next_guess<I>(x_uq0: I, b_uq1: I) -> I
|
||||
where
|
||||
I: Int + HInt,
|
||||
<I as HInt>::D: ops::Shr<u32, Output = <I as HInt>::D>,
|
||||
{
|
||||
// `corr = 2 - b*x_n`
|
||||
//
|
||||
// This looks like `0 - b*x_n`. However, this works - in `UQ1`, `0.0 - x = 2.0 - x`.
|
||||
let corr_uq1: I = I::ZERO.wrapping_sub(x_uq0.widen_mul(b_uq1).hi());
|
||||
|
||||
// `x_n * corr = x_n * (2 - b*x_n)`
|
||||
(x_uq0.widen_mul(corr_uq1) >> (I::BITS - 1)).lo()
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
#[arm_aeabi_alias = __aeabi_fdiv]
|
||||
pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 {
|
||||
div(a, b)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_ddiv]
|
||||
pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 {
|
||||
div(a, b)
|
||||
}
|
||||
|
||||
#[ppc_alias = __divkf3]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 {
|
||||
div(a, b)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 {
|
||||
a / b
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
pub extern "C" fn __divdf3vfp(a: f64, b: f64) -> f64 {
|
||||
a / b
|
||||
}
|
||||
}
|
||||
123
library/compiler-builtins/compiler-builtins/src/float/extend.rs
Normal file
123
library/compiler-builtins/compiler-builtins/src/float/extend.rs
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
use crate::float::Float;
|
||||
use crate::int::{CastInto, Int, MinInt};
|
||||
|
||||
/// Generic conversion from a narrower to a wider IEEE-754 floating-point type
|
||||
fn extend<F: Float, R: Float>(a: F) -> R
|
||||
where
|
||||
F::Int: CastInto<u64>,
|
||||
u64: CastInto<F::Int>,
|
||||
u32: CastInto<R::Int>,
|
||||
R::Int: CastInto<u32>,
|
||||
R::Int: CastInto<u64>,
|
||||
u64: CastInto<R::Int>,
|
||||
F::Int: CastInto<R::Int>,
|
||||
{
|
||||
let src_zero = F::Int::ZERO;
|
||||
let src_one = F::Int::ONE;
|
||||
let src_bits = F::BITS;
|
||||
let src_sig_bits = F::SIG_BITS;
|
||||
let src_exp_bias = F::EXP_BIAS;
|
||||
let src_min_normal = F::IMPLICIT_BIT;
|
||||
let src_infinity = F::EXP_MASK;
|
||||
let src_sign_mask = F::SIGN_MASK;
|
||||
let src_abs_mask = src_sign_mask - src_one;
|
||||
let src_qnan = F::SIG_MASK;
|
||||
let src_nan_code = src_qnan - src_one;
|
||||
|
||||
let dst_bits = R::BITS;
|
||||
let dst_sig_bits = R::SIG_BITS;
|
||||
let dst_inf_exp = R::EXP_SAT;
|
||||
let dst_exp_bias = R::EXP_BIAS;
|
||||
let dst_min_normal = R::IMPLICIT_BIT;
|
||||
|
||||
let sig_bits_delta = dst_sig_bits - src_sig_bits;
|
||||
let exp_bias_delta = dst_exp_bias - src_exp_bias;
|
||||
let a_abs = a.to_bits() & src_abs_mask;
|
||||
let mut abs_result = R::Int::ZERO;
|
||||
|
||||
if a_abs.wrapping_sub(src_min_normal) < src_infinity.wrapping_sub(src_min_normal) {
|
||||
// a is a normal number.
|
||||
// Extend to the destination type by shifting the significand and
|
||||
// exponent into the proper position and rebiasing the exponent.
|
||||
let abs_dst: R::Int = a_abs.cast();
|
||||
let bias_dst: R::Int = exp_bias_delta.cast();
|
||||
abs_result = abs_dst.wrapping_shl(sig_bits_delta);
|
||||
abs_result += bias_dst.wrapping_shl(dst_sig_bits);
|
||||
} else if a_abs >= src_infinity {
|
||||
// a is NaN or infinity.
|
||||
// Conjure the result by beginning with infinity, then setting the qNaN
|
||||
// bit (if needed) and right-aligning the rest of the trailing NaN
|
||||
// payload field.
|
||||
let qnan_dst: R::Int = (a_abs & src_qnan).cast();
|
||||
let nan_code_dst: R::Int = (a_abs & src_nan_code).cast();
|
||||
let inf_exp_dst: R::Int = dst_inf_exp.cast();
|
||||
abs_result = inf_exp_dst.wrapping_shl(dst_sig_bits);
|
||||
abs_result |= qnan_dst.wrapping_shl(sig_bits_delta);
|
||||
abs_result |= nan_code_dst.wrapping_shl(sig_bits_delta);
|
||||
} else if a_abs != src_zero {
|
||||
// a is denormal.
|
||||
// Renormalize the significand and clear the leading bit, then insert
|
||||
// the correct adjusted exponent in the destination type.
|
||||
let scale = a_abs.leading_zeros() - src_min_normal.leading_zeros();
|
||||
let abs_dst: R::Int = a_abs.cast();
|
||||
let bias_dst: R::Int = (exp_bias_delta - scale + 1).cast();
|
||||
abs_result = abs_dst.wrapping_shl(sig_bits_delta + scale);
|
||||
abs_result = (abs_result ^ dst_min_normal) | (bias_dst.wrapping_shl(dst_sig_bits));
|
||||
}
|
||||
|
||||
let sign_result: R::Int = (a.to_bits() & src_sign_mask).cast();
|
||||
R::from_bits(abs_result | (sign_result.wrapping_shl(dst_bits - src_bits)))
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
#[aapcs_on_arm]
|
||||
#[arm_aeabi_alias = __aeabi_f2d]
|
||||
pub extern "C" fn __extendsfdf2(a: f32) -> f64 {
|
||||
extend(a)
|
||||
}
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
#[aapcs_on_arm]
|
||||
#[apple_f16_arg_abi]
|
||||
#[arm_aeabi_alias = __aeabi_h2f]
|
||||
#[cfg(f16_enabled)]
|
||||
pub extern "C" fn __extendhfsf2(a: f16) -> f32 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[apple_f16_arg_abi]
|
||||
#[cfg(f16_enabled)]
|
||||
pub extern "C" fn __gnu_h2f_ieee(a: f16) -> f32 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[apple_f16_arg_abi]
|
||||
#[cfg(f16_enabled)]
|
||||
pub extern "C" fn __extendhfdf2(a: f16) -> f64 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[ppc_alias = __extendhfkf2]
|
||||
#[cfg(all(f16_enabled, f128_enabled))]
|
||||
pub extern "C" fn __extendhftf2(a: f16) -> f128 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[ppc_alias = __extendsfkf2]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __extendsftf2(a: f32) -> f128 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[ppc_alias = __extenddfkf2]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __extenddftf2(a: f64) -> f128 {
|
||||
extend(a)
|
||||
}
|
||||
}
|
||||
15
library/compiler-builtins/compiler-builtins/src/float/mod.rs
Normal file
15
library/compiler-builtins/compiler-builtins/src/float/mod.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
pub mod add;
|
||||
pub mod cmp;
|
||||
pub mod conv;
|
||||
pub mod div;
|
||||
pub mod extend;
|
||||
pub mod mul;
|
||||
pub mod pow;
|
||||
pub mod sub;
|
||||
pub(crate) mod traits;
|
||||
pub mod trunc;
|
||||
|
||||
#[cfg(not(feature = "unstable-public-internals"))]
|
||||
pub(crate) use traits::{Float, HalfRep};
|
||||
#[cfg(feature = "unstable-public-internals")]
|
||||
pub use traits::{Float, HalfRep};
|
||||
200
library/compiler-builtins/compiler-builtins/src/float/mul.rs
Normal file
200
library/compiler-builtins/compiler-builtins/src/float/mul.rs
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
use crate::float::Float;
|
||||
use crate::int::{CastInto, DInt, HInt, Int, MinInt};
|
||||
|
||||
fn mul<F: Float>(a: F, b: F) -> F
|
||||
where
|
||||
u32: CastInto<F::Int>,
|
||||
F::Int: CastInto<u32>,
|
||||
i32: CastInto<F::Int>,
|
||||
F::Int: CastInto<i32>,
|
||||
F::Int: HInt,
|
||||
{
|
||||
let one = F::Int::ONE;
|
||||
let zero = F::Int::ZERO;
|
||||
|
||||
let bits = F::BITS;
|
||||
let significand_bits = F::SIG_BITS;
|
||||
let max_exponent = F::EXP_SAT;
|
||||
|
||||
let exponent_bias = F::EXP_BIAS;
|
||||
|
||||
let implicit_bit = F::IMPLICIT_BIT;
|
||||
let significand_mask = F::SIG_MASK;
|
||||
let sign_bit = F::SIGN_MASK;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
let quiet_bit = implicit_bit >> 1;
|
||||
let qnan_rep = exponent_mask | quiet_bit;
|
||||
let exponent_bits = F::EXP_BITS;
|
||||
|
||||
let a_rep = a.to_bits();
|
||||
let b_rep = b.to_bits();
|
||||
|
||||
let a_exponent = (a_rep >> significand_bits) & max_exponent.cast();
|
||||
let b_exponent = (b_rep >> significand_bits) & max_exponent.cast();
|
||||
let product_sign = (a_rep ^ b_rep) & sign_bit;
|
||||
|
||||
let mut a_significand = a_rep & significand_mask;
|
||||
let mut b_significand = b_rep & significand_mask;
|
||||
let mut scale = 0;
|
||||
|
||||
// Detect if a or b is zero, denormal, infinity, or NaN.
|
||||
if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
|
||||
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
|
||||
{
|
||||
let a_abs = a_rep & abs_mask;
|
||||
let b_abs = b_rep & abs_mask;
|
||||
|
||||
// NaN + anything = qNaN
|
||||
if a_abs > inf_rep {
|
||||
return F::from_bits(a_rep | quiet_bit);
|
||||
}
|
||||
// anything + NaN = qNaN
|
||||
if b_abs > inf_rep {
|
||||
return F::from_bits(b_rep | quiet_bit);
|
||||
}
|
||||
|
||||
if a_abs == inf_rep {
|
||||
if b_abs != zero {
|
||||
// infinity * non-zero = +/- infinity
|
||||
return F::from_bits(a_abs | product_sign);
|
||||
} else {
|
||||
// infinity * zero = NaN
|
||||
return F::from_bits(qnan_rep);
|
||||
}
|
||||
}
|
||||
|
||||
if b_abs == inf_rep {
|
||||
if a_abs != zero {
|
||||
// infinity * non-zero = +/- infinity
|
||||
return F::from_bits(b_abs | product_sign);
|
||||
} else {
|
||||
// infinity * zero = NaN
|
||||
return F::from_bits(qnan_rep);
|
||||
}
|
||||
}
|
||||
|
||||
// zero * anything = +/- zero
|
||||
if a_abs == zero {
|
||||
return F::from_bits(product_sign);
|
||||
}
|
||||
|
||||
// anything * zero = +/- zero
|
||||
if b_abs == zero {
|
||||
return F::from_bits(product_sign);
|
||||
}
|
||||
|
||||
// one or both of a or b is denormal, the other (if applicable) is a
|
||||
// normal number. Renormalize one or both of a and b, and set scale to
|
||||
// include the necessary exponent adjustment.
|
||||
if a_abs < implicit_bit {
|
||||
let (exponent, significand) = F::normalize(a_significand);
|
||||
scale += exponent;
|
||||
a_significand = significand;
|
||||
}
|
||||
|
||||
if b_abs < implicit_bit {
|
||||
let (exponent, significand) = F::normalize(b_significand);
|
||||
scale += exponent;
|
||||
b_significand = significand;
|
||||
}
|
||||
}
|
||||
|
||||
// Or in the implicit significand bit. (If we fell through from the
|
||||
// denormal path it was already set by normalize( ), but setting it twice
|
||||
// won't hurt anything.)
|
||||
a_significand |= implicit_bit;
|
||||
b_significand |= implicit_bit;
|
||||
|
||||
// Get the significand of a*b. Before multiplying the significands, shift
|
||||
// one of them left to left-align it in the field. Thus, the product will
|
||||
// have (exponentBits + 2) integral digits, all but two of which must be
|
||||
// zero. Normalizing this result is just a conditional left-shift by one
|
||||
// and bumping the exponent accordingly.
|
||||
let (mut product_low, mut product_high) = a_significand
|
||||
.widen_mul(b_significand << exponent_bits)
|
||||
.lo_hi();
|
||||
|
||||
let a_exponent_i32: i32 = a_exponent.cast();
|
||||
let b_exponent_i32: i32 = b_exponent.cast();
|
||||
let mut product_exponent: i32 = a_exponent_i32
|
||||
.wrapping_add(b_exponent_i32)
|
||||
.wrapping_add(scale)
|
||||
.wrapping_sub(exponent_bias as i32);
|
||||
|
||||
// Normalize the significand, adjust exponent if needed.
|
||||
if (product_high & implicit_bit) != zero {
|
||||
product_exponent = product_exponent.wrapping_add(1);
|
||||
} else {
|
||||
product_high = (product_high << 1) | (product_low >> (bits - 1));
|
||||
product_low <<= 1;
|
||||
}
|
||||
|
||||
// If we have overflowed the type, return +/- infinity.
|
||||
if product_exponent >= max_exponent as i32 {
|
||||
return F::from_bits(inf_rep | product_sign);
|
||||
}
|
||||
|
||||
if product_exponent <= 0 {
|
||||
// Result is denormal before rounding
|
||||
//
|
||||
// If the result is so small that it just underflows to zero, return
|
||||
// a zero of the appropriate sign. Mathematically there is no need to
|
||||
// handle this case separately, but we make it a special case to
|
||||
// simplify the shift logic.
|
||||
let shift = one.wrapping_sub(product_exponent.cast()).cast();
|
||||
if shift >= bits {
|
||||
return F::from_bits(product_sign);
|
||||
}
|
||||
|
||||
// Otherwise, shift the significand of the result so that the round
|
||||
// bit is the high bit of `product_low`.
|
||||
// Ensure one of the non-highest bits in `product_low` is set if the shifted out bit are
|
||||
// not all zero so that the result is correctly rounded below.
|
||||
let sticky = product_low << (bits - shift) != zero;
|
||||
product_low =
|
||||
(product_high << (bits - shift)) | (product_low >> shift) | (sticky as u32).cast();
|
||||
product_high >>= shift;
|
||||
} else {
|
||||
// Result is normal before rounding; insert the exponent.
|
||||
product_high &= significand_mask;
|
||||
product_high |= product_exponent.cast() << significand_bits;
|
||||
}
|
||||
|
||||
// Insert the sign of the result:
|
||||
product_high |= product_sign;
|
||||
|
||||
// Final rounding. The final result may overflow to infinity, or underflow
|
||||
// to zero, but those are the correct results in those cases. We use the
|
||||
// default IEEE-754 round-to-nearest, ties-to-even rounding mode.
|
||||
if product_low > sign_bit {
|
||||
product_high += one;
|
||||
}
|
||||
|
||||
if product_low == sign_bit {
|
||||
product_high += product_high & one;
|
||||
}
|
||||
|
||||
F::from_bits(product_high)
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
#[aapcs_on_arm]
|
||||
#[arm_aeabi_alias = __aeabi_fmul]
|
||||
pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 {
|
||||
mul(a, b)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[arm_aeabi_alias = __aeabi_dmul]
|
||||
pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 {
|
||||
mul(a, b)
|
||||
}
|
||||
|
||||
#[ppc_alias = __mulkf3]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __multf3(a: f128, b: f128) -> f128 {
|
||||
mul(a, b)
|
||||
}
|
||||
}
|
||||
40
library/compiler-builtins/compiler-builtins/src/float/pow.rs
Normal file
40
library/compiler-builtins/compiler-builtins/src/float/pow.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
use crate::float::Float;
|
||||
use crate::int::Int;
|
||||
|
||||
/// Returns `a` raised to the power `b`
|
||||
fn pow<F: Float>(a: F, b: i32) -> F {
|
||||
let mut a = a;
|
||||
let recip = b < 0;
|
||||
let mut pow = Int::abs_diff(b, 0);
|
||||
let mut mul = F::ONE;
|
||||
loop {
|
||||
if (pow & 1) != 0 {
|
||||
mul *= a;
|
||||
}
|
||||
pow >>= 1;
|
||||
if pow == 0 {
|
||||
break;
|
||||
}
|
||||
a *= a;
|
||||
}
|
||||
|
||||
if recip { F::ONE / mul } else { mul }
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 {
|
||||
pow(a, b)
|
||||
}
|
||||
|
||||
pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 {
|
||||
pow(a, b)
|
||||
}
|
||||
|
||||
#[ppc_alias = __powikf2]
|
||||
#[cfg(f128_enabled)]
|
||||
// FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly.
|
||||
#[cfg(not(target_env = "msvc"))]
|
||||
pub extern "C" fn __powitf2(a: f128, b: i32) -> f128 {
|
||||
pow(a, b)
|
||||
}
|
||||
}
|
||||
24
library/compiler-builtins/compiler-builtins/src/float/sub.rs
Normal file
24
library/compiler-builtins/compiler-builtins/src/float/sub.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
use crate::float::Float;
|
||||
|
||||
intrinsics! {
|
||||
#[arm_aeabi_alias = __aeabi_fsub]
|
||||
pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 {
|
||||
crate::float::add::__addsf3(a, f32::from_bits(b.to_bits() ^ f32::SIGN_MASK))
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_dsub]
|
||||
pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 {
|
||||
crate::float::add::__adddf3(a, f64::from_bits(b.to_bits() ^ f64::SIGN_MASK))
|
||||
}
|
||||
|
||||
#[ppc_alias = __subkf3]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __subtf3(a: f128, b: f128) -> f128 {
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
use crate::float::add::__addkf3 as __addtf3;
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
use crate::float::add::__addtf3;
|
||||
|
||||
__addtf3(a, f128::from_bits(b.to_bits() ^ f128::SIGN_MASK))
|
||||
}
|
||||
}
|
||||
189
library/compiler-builtins/compiler-builtins/src/float/traits.rs
Normal file
189
library/compiler-builtins/compiler-builtins/src/float/traits.rs
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
use core::ops;
|
||||
|
||||
use crate::int::{DInt, Int, MinInt};
|
||||
|
||||
/// Wrapper to extract the integer type half of the float's size
|
||||
pub type HalfRep<F> = <<F as Float>::Int as DInt>::H;
|
||||
|
||||
/// Trait for some basic operations on floats
|
||||
#[allow(dead_code)]
|
||||
pub trait Float:
|
||||
Copy
|
||||
+ core::fmt::Debug
|
||||
+ PartialEq
|
||||
+ PartialOrd
|
||||
+ ops::AddAssign
|
||||
+ ops::MulAssign
|
||||
+ ops::Add<Output = Self>
|
||||
+ ops::Sub<Output = Self>
|
||||
+ ops::Div<Output = Self>
|
||||
+ ops::Rem<Output = Self>
|
||||
{
|
||||
/// A uint of the same width as the float
|
||||
type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
|
||||
|
||||
/// A int of the same width as the float
|
||||
type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
|
||||
|
||||
/// An int capable of containing the exponent bits plus a sign bit. This is signed.
|
||||
type ExpInt: Int;
|
||||
|
||||
const ZERO: Self;
|
||||
const ONE: Self;
|
||||
|
||||
/// The bitwidth of the float type.
|
||||
const BITS: u32;
|
||||
|
||||
/// The bitwidth of the significand.
|
||||
const SIG_BITS: u32;
|
||||
|
||||
/// The bitwidth of the exponent.
|
||||
const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
|
||||
|
||||
/// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
|
||||
/// representation.
|
||||
///
|
||||
/// This is in the rightmost position, use `EXP_MASK` for the shifted value.
|
||||
const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
|
||||
|
||||
/// The exponent bias value.
|
||||
const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
|
||||
|
||||
/// A mask for the sign bit.
|
||||
const SIGN_MASK: Self::Int;
|
||||
|
||||
/// A mask for the significand.
|
||||
const SIG_MASK: Self::Int;
|
||||
|
||||
/// The implicit bit of the float format.
|
||||
const IMPLICIT_BIT: Self::Int;
|
||||
|
||||
/// A mask for the exponent.
|
||||
const EXP_MASK: Self::Int;
|
||||
|
||||
/// Returns `self` transmuted to `Self::Int`
|
||||
fn to_bits(self) -> Self::Int;
|
||||
|
||||
/// Returns `self` transmuted to `Self::SignedInt`
|
||||
fn to_bits_signed(self) -> Self::SignedInt;
|
||||
|
||||
/// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
|
||||
/// represented in multiple different ways. This method returns `true` if two NaNs are
|
||||
/// compared.
|
||||
fn eq_repr(self, rhs: Self) -> bool;
|
||||
|
||||
/// Returns true if the sign is negative
|
||||
fn is_sign_negative(self) -> bool;
|
||||
|
||||
/// Returns the exponent, not adjusting for bias.
|
||||
fn exp(self) -> Self::ExpInt;
|
||||
|
||||
/// Returns the significand with no implicit bit (or the "fractional" part)
|
||||
fn frac(self) -> Self::Int;
|
||||
|
||||
/// Returns the significand with implicit bit
|
||||
fn imp_frac(self) -> Self::Int;
|
||||
|
||||
/// Returns a `Self::Int` transmuted back to `Self`
|
||||
fn from_bits(a: Self::Int) -> Self;
|
||||
|
||||
/// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
|
||||
fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self;
|
||||
|
||||
fn abs(self) -> Self {
|
||||
let abs_mask = !Self::SIGN_MASK;
|
||||
Self::from_bits(self.to_bits() & abs_mask)
|
||||
}
|
||||
|
||||
/// Returns (normalized exponent, normalized significand)
|
||||
fn normalize(significand: Self::Int) -> (i32, Self::Int);
|
||||
|
||||
/// Returns if `self` is subnormal
|
||||
fn is_subnormal(self) -> bool;
|
||||
}
|
||||
|
||||
macro_rules! float_impl {
|
||||
($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
|
||||
impl Float for $ty {
|
||||
type Int = $ity;
|
||||
type SignedInt = $sity;
|
||||
type ExpInt = $expty;
|
||||
|
||||
const ZERO: Self = 0.0;
|
||||
const ONE: Self = 1.0;
|
||||
|
||||
const BITS: u32 = $bits;
|
||||
const SIG_BITS: u32 = $significand_bits;
|
||||
|
||||
const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
|
||||
const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
|
||||
const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
|
||||
const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
|
||||
|
||||
fn to_bits(self) -> Self::Int {
|
||||
self.to_bits()
|
||||
}
|
||||
fn to_bits_signed(self) -> Self::SignedInt {
|
||||
self.to_bits() as Self::SignedInt
|
||||
}
|
||||
fn eq_repr(self, rhs: Self) -> bool {
|
||||
#[cfg(feature = "mangled-names")]
|
||||
fn is_nan(x: $ty) -> bool {
|
||||
// When using mangled-names, the "real" compiler-builtins might not have the
|
||||
// necessary builtin (__unordtf2) to test whether `f128` is NaN.
|
||||
// FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
|
||||
// x is NaN if all the bits of the exponent are set and the significand is non-0
|
||||
x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0
|
||||
}
|
||||
#[cfg(not(feature = "mangled-names"))]
|
||||
fn is_nan(x: $ty) -> bool {
|
||||
x.is_nan()
|
||||
}
|
||||
if is_nan(self) && is_nan(rhs) {
|
||||
true
|
||||
} else {
|
||||
self.to_bits() == rhs.to_bits()
|
||||
}
|
||||
}
|
||||
fn is_sign_negative(self) -> bool {
|
||||
self.is_sign_negative()
|
||||
}
|
||||
fn exp(self) -> Self::ExpInt {
|
||||
((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
|
||||
}
|
||||
fn frac(self) -> Self::Int {
|
||||
self.to_bits() & Self::SIG_MASK
|
||||
}
|
||||
fn imp_frac(self) -> Self::Int {
|
||||
self.frac() | Self::IMPLICIT_BIT
|
||||
}
|
||||
fn from_bits(a: Self::Int) -> Self {
|
||||
Self::from_bits(a)
|
||||
}
|
||||
fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
|
||||
Self::from_bits(
|
||||
((negative as Self::Int) << (Self::BITS - 1))
|
||||
| ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
|
||||
| (significand & Self::SIG_MASK),
|
||||
)
|
||||
}
|
||||
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
|
||||
let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
|
||||
(
|
||||
1i32.wrapping_sub(shift as i32),
|
||||
significand << shift as Self::Int,
|
||||
)
|
||||
}
|
||||
fn is_subnormal(self) -> bool {
|
||||
(self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
float_impl!(f16, u16, i16, i8, 16, 10);
|
||||
float_impl!(f32, u32, i32, i16, 32, 23);
|
||||
float_impl!(f64, u64, i64, i16, 64, 52);
|
||||
#[cfg(f128_enabled)]
|
||||
float_impl!(f128, u128, i128, i16, 128, 112);
|
||||
169
library/compiler-builtins/compiler-builtins/src/float/trunc.rs
Normal file
169
library/compiler-builtins/compiler-builtins/src/float/trunc.rs
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
use crate::float::Float;
|
||||
use crate::int::{CastInto, Int, MinInt};
|
||||
|
||||
fn trunc<F: Float, R: Float>(a: F) -> R
|
||||
where
|
||||
F::Int: CastInto<u64>,
|
||||
F::Int: CastInto<u32>,
|
||||
u64: CastInto<F::Int>,
|
||||
u32: CastInto<F::Int>,
|
||||
R::Int: CastInto<u32>,
|
||||
u32: CastInto<R::Int>,
|
||||
F::Int: CastInto<R::Int>,
|
||||
{
|
||||
let src_zero = F::Int::ZERO;
|
||||
let src_one = F::Int::ONE;
|
||||
let src_bits = F::BITS;
|
||||
let src_exp_bias = F::EXP_BIAS;
|
||||
|
||||
let src_min_normal = F::IMPLICIT_BIT;
|
||||
let src_sig_mask = F::SIG_MASK;
|
||||
let src_infinity = F::EXP_MASK;
|
||||
let src_sign_mask = F::SIGN_MASK;
|
||||
let src_abs_mask = src_sign_mask - src_one;
|
||||
let round_mask = (src_one << (F::SIG_BITS - R::SIG_BITS)) - src_one;
|
||||
let halfway = src_one << (F::SIG_BITS - R::SIG_BITS - 1);
|
||||
let src_qnan = src_one << (F::SIG_BITS - 1);
|
||||
let src_nan_code = src_qnan - src_one;
|
||||
|
||||
let dst_zero = R::Int::ZERO;
|
||||
let dst_one = R::Int::ONE;
|
||||
let dst_bits = R::BITS;
|
||||
let dst_inf_exp = R::EXP_SAT;
|
||||
let dst_exp_bias = R::EXP_BIAS;
|
||||
|
||||
let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast();
|
||||
let overflow_exponent: F::Int = (src_exp_bias + dst_inf_exp - dst_exp_bias).cast();
|
||||
let underflow: F::Int = underflow_exponent << F::SIG_BITS;
|
||||
let overflow: F::Int = overflow_exponent << F::SIG_BITS;
|
||||
|
||||
let dst_qnan = R::Int::ONE << (R::SIG_BITS - 1);
|
||||
let dst_nan_code = dst_qnan - dst_one;
|
||||
|
||||
let sig_bits_delta = F::SIG_BITS - R::SIG_BITS;
|
||||
// Break a into a sign and representation of the absolute value.
|
||||
let a_abs = a.to_bits() & src_abs_mask;
|
||||
let sign = a.to_bits() & src_sign_mask;
|
||||
let mut abs_result: R::Int;
|
||||
|
||||
if a_abs.wrapping_sub(underflow) < a_abs.wrapping_sub(overflow) {
|
||||
// The exponent of a is within the range of normal numbers in the
|
||||
// destination format. We can convert by simply right-shifting with
|
||||
// rounding and adjusting the exponent.
|
||||
abs_result = (a_abs >> sig_bits_delta).cast();
|
||||
// Cast before shifting to prevent overflow.
|
||||
let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast();
|
||||
let tmp = bias_diff << R::SIG_BITS;
|
||||
abs_result = abs_result.wrapping_sub(tmp);
|
||||
|
||||
let round_bits = a_abs & round_mask;
|
||||
if round_bits > halfway {
|
||||
// Round to nearest.
|
||||
abs_result += dst_one;
|
||||
} else if round_bits == halfway {
|
||||
// Tie to even.
|
||||
abs_result += abs_result & dst_one;
|
||||
};
|
||||
} else if a_abs > src_infinity {
|
||||
// a is NaN.
|
||||
// Conjure the result by beginning with infinity, setting the qNaN
|
||||
// bit and inserting the (truncated) trailing NaN field.
|
||||
// Cast before shifting to prevent overflow.
|
||||
let dst_inf_exp: R::Int = dst_inf_exp.cast();
|
||||
abs_result = dst_inf_exp << R::SIG_BITS;
|
||||
abs_result |= dst_qnan;
|
||||
abs_result |= dst_nan_code & ((a_abs & src_nan_code) >> (F::SIG_BITS - R::SIG_BITS)).cast();
|
||||
} else if a_abs >= overflow {
|
||||
// a overflows to infinity.
|
||||
// Cast before shifting to prevent overflow.
|
||||
let dst_inf_exp: R::Int = dst_inf_exp.cast();
|
||||
abs_result = dst_inf_exp << R::SIG_BITS;
|
||||
} else {
|
||||
// a underflows on conversion to the destination type or is an exact
|
||||
// zero. The result may be a denormal or zero. Extract the exponent
|
||||
// to get the shift amount for the denormalization.
|
||||
let a_exp: u32 = (a_abs >> F::SIG_BITS).cast();
|
||||
let shift = src_exp_bias - dst_exp_bias - a_exp + 1;
|
||||
|
||||
let significand = (a.to_bits() & src_sig_mask) | src_min_normal;
|
||||
|
||||
// Right shift by the denormalization amount with sticky.
|
||||
if shift > F::SIG_BITS {
|
||||
abs_result = dst_zero;
|
||||
} else {
|
||||
let sticky = if (significand << (src_bits - shift)) != src_zero {
|
||||
src_one
|
||||
} else {
|
||||
src_zero
|
||||
};
|
||||
let denormalized_significand: F::Int = (significand >> shift) | sticky;
|
||||
abs_result = (denormalized_significand >> (F::SIG_BITS - R::SIG_BITS)).cast();
|
||||
let round_bits = denormalized_significand & round_mask;
|
||||
// Round to nearest
|
||||
if round_bits > halfway {
|
||||
abs_result += dst_one;
|
||||
}
|
||||
// Ties to even
|
||||
else if round_bits == halfway {
|
||||
abs_result += abs_result & dst_one;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Apply the signbit to the absolute value.
|
||||
R::from_bits(abs_result | sign.wrapping_shr(src_bits - dst_bits).cast())
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
#[aapcs_on_arm]
|
||||
#[arm_aeabi_alias = __aeabi_d2f]
|
||||
pub extern "C" fn __truncdfsf2(a: f64) -> f32 {
|
||||
trunc(a)
|
||||
}
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
#[aapcs_on_arm]
|
||||
#[apple_f16_ret_abi]
|
||||
#[arm_aeabi_alias = __aeabi_f2h]
|
||||
#[cfg(f16_enabled)]
|
||||
pub extern "C" fn __truncsfhf2(a: f32) -> f16 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[apple_f16_ret_abi]
|
||||
#[cfg(f16_enabled)]
|
||||
pub extern "C" fn __gnu_f2h_ieee(a: f32) -> f16 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[apple_f16_ret_abi]
|
||||
#[arm_aeabi_alias = __aeabi_d2h]
|
||||
#[cfg(f16_enabled)]
|
||||
pub extern "C" fn __truncdfhf2(a: f64) -> f16 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[ppc_alias = __trunckfhf2]
|
||||
#[cfg(all(f16_enabled, f128_enabled))]
|
||||
pub extern "C" fn __trunctfhf2(a: f128) -> f16 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[ppc_alias = __trunckfsf2]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __trunctfsf2(a: f128) -> f32 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[aapcs_on_arm]
|
||||
#[ppc_alias = __trunckfdf2]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __trunctfdf2(a: f128) -> f64 {
|
||||
trunc(a)
|
||||
}
|
||||
}
|
||||
55
library/compiler-builtins/compiler-builtins/src/hexagon.rs
Normal file
55
library/compiler-builtins/compiler-builtins/src/hexagon.rs
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
#![cfg(not(feature = "no-asm"))]
|
||||
|
||||
use core::arch::global_asm;
|
||||
|
||||
global_asm!(include_str!("hexagon/func_macro.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/dfaddsub.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/dfdiv.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/dffma.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/dfminmax.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/dfmul.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/dfsqrt.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/divdi3.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/divsi3.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/fastmath2_dlib_asm.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/fastmath2_ldlib_asm.s"), options(raw));
|
||||
|
||||
global_asm!(
|
||||
include_str!("hexagon/memcpy_forward_vp4cp4n2.s"),
|
||||
options(raw)
|
||||
);
|
||||
|
||||
global_asm!(
|
||||
include_str!("hexagon/memcpy_likely_aligned.s"),
|
||||
options(raw)
|
||||
);
|
||||
|
||||
global_asm!(include_str!("hexagon/moddi3.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/modsi3.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/sfdiv_opt.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/sfsqrt_opt.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/udivdi3.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/udivmoddi4.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/udivmodsi4.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/udivsi3.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/umoddi3.s"), options(raw));
|
||||
|
||||
global_asm!(include_str!("hexagon/umodsi3.s"), options(raw));
|
||||
|
|
@ -0,0 +1,321 @@
|
|||
.text
|
||||
.global __hexagon_adddf3
|
||||
.global __hexagon_subdf3
|
||||
.type __hexagon_adddf3, @function
|
||||
.type __hexagon_subdf3, @function
|
||||
|
||||
.global __qdsp_adddf3 ; .set __qdsp_adddf3, __hexagon_adddf3
|
||||
.global __hexagon_fast_adddf3 ; .set __hexagon_fast_adddf3, __hexagon_adddf3
|
||||
.global __hexagon_fast2_adddf3 ; .set __hexagon_fast2_adddf3, __hexagon_adddf3
|
||||
.global __qdsp_subdf3 ; .set __qdsp_subdf3, __hexagon_subdf3
|
||||
.global __hexagon_fast_subdf3 ; .set __hexagon_fast_subdf3, __hexagon_subdf3
|
||||
.global __hexagon_fast2_subdf3 ; .set __hexagon_fast2_subdf3, __hexagon_subdf3
|
||||
|
||||
.p2align 5
|
||||
__hexagon_adddf3:
|
||||
{
|
||||
r4 = extractu(r1,#11,#20)
|
||||
r5 = extractu(r3,#11,#20)
|
||||
r13:12 = combine(##0x20000000,#0)
|
||||
}
|
||||
{
|
||||
p3 = dfclass(r1:0,#2)
|
||||
p3 = dfclass(r3:2,#2)
|
||||
r9:8 = r13:12
|
||||
p2 = cmp.gtu(r5,r4)
|
||||
}
|
||||
{
|
||||
if (!p3) jump .Ladd_abnormal
|
||||
if (p2) r1:0 = r3:2
|
||||
if (p2) r3:2 = r1:0
|
||||
if (p2) r5:4 = combine(r4,r5)
|
||||
}
|
||||
{
|
||||
r13:12 = insert(r1:0,#52,#11 -2)
|
||||
r9:8 = insert(r3:2,#52,#11 -2)
|
||||
r15 = sub(r4,r5)
|
||||
r7:6 = combine(#62,#1)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.Ladd_continue:
|
||||
{
|
||||
r15 = min(r15,r7)
|
||||
|
||||
r11:10 = neg(r13:12)
|
||||
p2 = cmp.gt(r1,#-1)
|
||||
r14 = #0
|
||||
}
|
||||
{
|
||||
if (!p2) r13:12 = r11:10
|
||||
r11:10 = extractu(r9:8,r15:14)
|
||||
r9:8 = ASR(r9:8,r15)
|
||||
|
||||
|
||||
|
||||
|
||||
r15:14 = #0
|
||||
}
|
||||
{
|
||||
p1 = cmp.eq(r11:10,r15:14)
|
||||
if (!p1.new) r8 = or(r8,r6)
|
||||
r5 = add(r4,#-1024 -60)
|
||||
p3 = cmp.gt(r3,#-1)
|
||||
}
|
||||
{
|
||||
r13:12 = add(r13:12,r9:8)
|
||||
r11:10 = sub(r13:12,r9:8)
|
||||
r7:6 = combine(#54,##2045)
|
||||
}
|
||||
{
|
||||
p0 = cmp.gtu(r4,r7)
|
||||
p0 = !cmp.gtu(r4,r6)
|
||||
if (!p0.new) jump:nt .Ladd_ovf_unf
|
||||
if (!p3) r13:12 = r11:10
|
||||
}
|
||||
{
|
||||
r1:0 = convert_d2df(r13:12)
|
||||
p0 = cmp.eq(r13,#0)
|
||||
p0 = cmp.eq(r12,#0)
|
||||
if (p0.new) jump:nt .Ladd_zero
|
||||
}
|
||||
{
|
||||
r1 += asl(r5,#20)
|
||||
jumpr r31
|
||||
}
|
||||
.falign
|
||||
__hexagon_subdf3:
|
||||
{
|
||||
r3 = togglebit(r3,#31)
|
||||
jump __qdsp_adddf3
|
||||
}
|
||||
|
||||
|
||||
.falign
|
||||
.Ladd_zero:
|
||||
|
||||
|
||||
{
|
||||
r28 = USR
|
||||
r1:0 = #0
|
||||
r3 = #1
|
||||
}
|
||||
{
|
||||
r28 = extractu(r28,#2,#22)
|
||||
r3 = asl(r3,#31)
|
||||
}
|
||||
{
|
||||
p0 = cmp.eq(r28,#2)
|
||||
if (p0.new) r1 = xor(r1,r3)
|
||||
jumpr r31
|
||||
}
|
||||
.falign
|
||||
.Ladd_ovf_unf:
|
||||
{
|
||||
r1:0 = convert_d2df(r13:12)
|
||||
p0 = cmp.eq(r13,#0)
|
||||
p0 = cmp.eq(r12,#0)
|
||||
if (p0.new) jump:nt .Ladd_zero
|
||||
}
|
||||
{
|
||||
r28 = extractu(r1,#11,#20)
|
||||
r1 += asl(r5,#20)
|
||||
}
|
||||
{
|
||||
r5 = add(r5,r28)
|
||||
r3:2 = combine(##0x00100000,#0)
|
||||
}
|
||||
{
|
||||
p0 = cmp.gt(r5,##1024 +1024 -2)
|
||||
if (p0.new) jump:nt .Ladd_ovf
|
||||
}
|
||||
{
|
||||
p0 = cmp.gt(r5,#0)
|
||||
if (p0.new) jumpr:t r31
|
||||
r28 = sub(#1,r5)
|
||||
}
|
||||
{
|
||||
r3:2 = insert(r1:0,#52,#0)
|
||||
r1:0 = r13:12
|
||||
}
|
||||
{
|
||||
r3:2 = lsr(r3:2,r28)
|
||||
}
|
||||
{
|
||||
r1:0 = insert(r3:2,#63,#0)
|
||||
jumpr r31
|
||||
}
|
||||
.falign
|
||||
.Ladd_ovf:
|
||||
|
||||
{
|
||||
r1:0 = r13:12
|
||||
r28 = USR
|
||||
r13:12 = combine(##0x7fefffff,#-1)
|
||||
}
|
||||
{
|
||||
r5 = extractu(r28,#2,#22)
|
||||
r28 = or(r28,#0x28)
|
||||
r9:8 = combine(##0x7ff00000,#0)
|
||||
}
|
||||
{
|
||||
USR = r28
|
||||
r5 ^= lsr(r1,#31)
|
||||
r28 = r5
|
||||
}
|
||||
{
|
||||
p0 = !cmp.eq(r28,#1)
|
||||
p0 = !cmp.eq(r5,#2)
|
||||
if (p0.new) r13:12 = r9:8
|
||||
}
|
||||
{
|
||||
r1:0 = insert(r13:12,#63,#0)
|
||||
}
|
||||
{
|
||||
p0 = dfcmp.eq(r1:0,r1:0)
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
.Ladd_abnormal:
|
||||
{
|
||||
r13:12 = extractu(r1:0,#63,#0)
|
||||
r9:8 = extractu(r3:2,#63,#0)
|
||||
}
|
||||
{
|
||||
p3 = cmp.gtu(r13:12,r9:8)
|
||||
if (!p3.new) r1:0 = r3:2
|
||||
if (!p3.new) r3:2 = r1:0
|
||||
}
|
||||
{
|
||||
|
||||
p0 = dfclass(r1:0,#0x0f)
|
||||
if (!p0.new) jump:nt .Linvalid_nan_add
|
||||
if (!p3) r13:12 = r9:8
|
||||
if (!p3) r9:8 = r13:12
|
||||
}
|
||||
{
|
||||
|
||||
|
||||
p1 = dfclass(r1:0,#0x08)
|
||||
if (p1.new) jump:nt .Linf_add
|
||||
}
|
||||
{
|
||||
p2 = dfclass(r3:2,#0x01)
|
||||
if (p2.new) jump:nt .LB_zero
|
||||
r13:12 = #0
|
||||
}
|
||||
|
||||
{
|
||||
p0 = dfclass(r1:0,#4)
|
||||
if (p0.new) jump:nt .Ladd_two_subnormal
|
||||
r13:12 = combine(##0x20000000,#0)
|
||||
}
|
||||
{
|
||||
r4 = extractu(r1,#11,#20)
|
||||
r5 = #1
|
||||
|
||||
r9:8 = asl(r9:8,#11 -2)
|
||||
}
|
||||
|
||||
|
||||
|
||||
{
|
||||
r13:12 = insert(r1:0,#52,#11 -2)
|
||||
r15 = sub(r4,r5)
|
||||
r7:6 = combine(#62,#1)
|
||||
jump .Ladd_continue
|
||||
}
|
||||
|
||||
.Ladd_two_subnormal:
|
||||
{
|
||||
r13:12 = extractu(r1:0,#63,#0)
|
||||
r9:8 = extractu(r3:2,#63,#0)
|
||||
}
|
||||
{
|
||||
r13:12 = neg(r13:12)
|
||||
r9:8 = neg(r9:8)
|
||||
p0 = cmp.gt(r1,#-1)
|
||||
p1 = cmp.gt(r3,#-1)
|
||||
}
|
||||
{
|
||||
if (p0) r13:12 = r1:0
|
||||
if (p1) r9:8 = r3:2
|
||||
}
|
||||
{
|
||||
r13:12 = add(r13:12,r9:8)
|
||||
}
|
||||
{
|
||||
r9:8 = neg(r13:12)
|
||||
p0 = cmp.gt(r13,#-1)
|
||||
r3:2 = #0
|
||||
}
|
||||
{
|
||||
if (!p0) r1:0 = r9:8
|
||||
if (p0) r1:0 = r13:12
|
||||
r3 = ##0x80000000
|
||||
}
|
||||
{
|
||||
if (!p0) r1 = or(r1,r3)
|
||||
p0 = dfcmp.eq(r1:0,r3:2)
|
||||
if (p0.new) jump:nt .Lzero_plus_zero
|
||||
}
|
||||
{
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
.Linvalid_nan_add:
|
||||
{
|
||||
r28 = convert_df2sf(r1:0)
|
||||
p0 = dfclass(r3:2,#0x0f)
|
||||
if (p0.new) r3:2 = r1:0
|
||||
}
|
||||
{
|
||||
r2 = convert_df2sf(r3:2)
|
||||
r1:0 = #-1
|
||||
jumpr r31
|
||||
}
|
||||
.falign
|
||||
.LB_zero:
|
||||
{
|
||||
p0 = dfcmp.eq(r13:12,r1:0)
|
||||
if (!p0.new) jumpr:t r31
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
.Lzero_plus_zero:
|
||||
{
|
||||
p0 = cmp.eq(r1:0,r3:2)
|
||||
if (p0.new) jumpr:t r31
|
||||
}
|
||||
{
|
||||
r28 = USR
|
||||
}
|
||||
{
|
||||
r28 = extractu(r28,#2,#22)
|
||||
r1:0 = #0
|
||||
}
|
||||
{
|
||||
p0 = cmp.eq(r28,#2)
|
||||
if (p0.new) r1 = ##0x80000000
|
||||
jumpr r31
|
||||
}
|
||||
.Linf_add:
|
||||
|
||||
{
|
||||
p0 = !cmp.eq(r1,r3)
|
||||
p0 = dfclass(r3:2,#8)
|
||||
if (!p0.new) jumpr:t r31
|
||||
}
|
||||
{
|
||||
r2 = ##0x7f800001
|
||||
}
|
||||
{
|
||||
r1:0 = convert_sf2df(r2)
|
||||
jumpr r31
|
||||
}
|
||||
.size __hexagon_adddf3,.-__hexagon_adddf3
|
||||
372
library/compiler-builtins/compiler-builtins/src/hexagon/dfdiv.s
Normal file
372
library/compiler-builtins/compiler-builtins/src/hexagon/dfdiv.s
Normal file
|
|
@ -0,0 +1,372 @@
|
|||
.text
|
||||
.global __hexagon_divdf3
|
||||
.type __hexagon_divdf3,@function
|
||||
.global __qdsp_divdf3 ; .set __qdsp_divdf3, __hexagon_divdf3
|
||||
.global __hexagon_fast_divdf3 ; .set __hexagon_fast_divdf3, __hexagon_divdf3
|
||||
.global __hexagon_fast2_divdf3 ; .set __hexagon_fast2_divdf3, __hexagon_divdf3
|
||||
.p2align 5
|
||||
__hexagon_divdf3:
|
||||
{
|
||||
p2 = dfclass(r1:0,#0x02)
|
||||
p2 = dfclass(r3:2,#0x02)
|
||||
r13:12 = combine(r3,r1)
|
||||
r28 = xor(r1,r3)
|
||||
}
|
||||
{
|
||||
if (!p2) jump .Ldiv_abnormal
|
||||
r7:6 = extractu(r3:2,#23,#52 -23)
|
||||
r8 = ##0x3f800001
|
||||
}
|
||||
{
|
||||
r9 = or(r8,r6)
|
||||
r13 = extractu(r13,#11,#52 -32)
|
||||
r12 = extractu(r12,#11,#52 -32)
|
||||
p3 = cmp.gt(r28,#-1)
|
||||
}
|
||||
|
||||
|
||||
.Ldenorm_continue:
|
||||
{
|
||||
r11,p0 = sfrecipa(r8,r9)
|
||||
r10 = and(r8,#-2)
|
||||
r28 = #1
|
||||
r12 = sub(r12,r13)
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
r10 -= sfmpy(r11,r9):lib
|
||||
r1 = insert(r28,#11 +1,#52 -32)
|
||||
r13 = ##0x00800000 << 3
|
||||
}
|
||||
{
|
||||
r11 += sfmpy(r11,r10):lib
|
||||
r3 = insert(r28,#11 +1,#52 -32)
|
||||
r10 = and(r8,#-2)
|
||||
}
|
||||
{
|
||||
r10 -= sfmpy(r11,r9):lib
|
||||
r5 = #-0x3ff +1
|
||||
r4 = #0x3ff -1
|
||||
}
|
||||
{
|
||||
r11 += sfmpy(r11,r10):lib
|
||||
p1 = cmp.gt(r12,r5)
|
||||
p1 = !cmp.gt(r12,r4)
|
||||
}
|
||||
{
|
||||
r13 = insert(r11,#23,#3)
|
||||
r5:4 = #0
|
||||
r12 = add(r12,#-61)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
r13 = add(r13,#((-3) << 3))
|
||||
}
|
||||
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); }
|
||||
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); }
|
||||
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); }
|
||||
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); }
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
|
||||
r15:14 = sub(r1:0,r3:2)
|
||||
p0 = cmp.gtu(r3:2,r1:0)
|
||||
|
||||
if (!p0.new) r6 = #2
|
||||
}
|
||||
{
|
||||
r5:4 = add(r5:4,r7:6)
|
||||
if (!p0) r1:0 = r15:14
|
||||
r15:14 = #0
|
||||
}
|
||||
{
|
||||
p0 = cmp.eq(r1:0,r15:14)
|
||||
if (!p0.new) r4 = or(r4,r28)
|
||||
}
|
||||
{
|
||||
r7:6 = neg(r5:4)
|
||||
}
|
||||
{
|
||||
if (!p3) r5:4 = r7:6
|
||||
}
|
||||
{
|
||||
r1:0 = convert_d2df(r5:4)
|
||||
if (!p1) jump .Ldiv_ovf_unf
|
||||
}
|
||||
{
|
||||
r1 += asl(r12,#52 -32)
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
.Ldiv_ovf_unf:
|
||||
{
|
||||
r1 += asl(r12,#52 -32)
|
||||
r13 = extractu(r1,#11,#52 -32)
|
||||
}
|
||||
{
|
||||
r7:6 = abs(r5:4)
|
||||
r12 = add(r12,r13)
|
||||
}
|
||||
{
|
||||
p0 = cmp.gt(r12,##0x3ff +0x3ff)
|
||||
if (p0.new) jump:nt .Ldiv_ovf
|
||||
}
|
||||
{
|
||||
p0 = cmp.gt(r12,#0)
|
||||
if (p0.new) jump:nt .Lpossible_unf2
|
||||
}
|
||||
{
|
||||
r13 = add(clb(r7:6),#-1)
|
||||
r12 = sub(#7,r12)
|
||||
r10 = USR
|
||||
r11 = #63
|
||||
}
|
||||
{
|
||||
r13 = min(r12,r11)
|
||||
r11 = or(r10,#0x030)
|
||||
r7:6 = asl(r7:6,r13)
|
||||
r12 = #0
|
||||
}
|
||||
{
|
||||
r15:14 = extractu(r7:6,r13:12)
|
||||
r7:6 = lsr(r7:6,r13)
|
||||
r3:2 = #1
|
||||
}
|
||||
{
|
||||
p0 = cmp.gtu(r3:2,r15:14)
|
||||
if (!p0.new) r6 = or(r2,r6)
|
||||
r7 = setbit(r7,#52 -32+4)
|
||||
}
|
||||
{
|
||||
r5:4 = neg(r7:6)
|
||||
p0 = bitsclr(r6,#(1<<4)-1)
|
||||
if (!p0.new) r10 = r11
|
||||
}
|
||||
{
|
||||
USR = r10
|
||||
if (p3) r5:4 = r7:6
|
||||
r10 = #-0x3ff -(52 +4)
|
||||
}
|
||||
{
|
||||
r1:0 = convert_d2df(r5:4)
|
||||
}
|
||||
{
|
||||
r1 += asl(r10,#52 -32)
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
|
||||
.Lpossible_unf2:
|
||||
|
||||
|
||||
{
|
||||
r3:2 = extractu(r1:0,#63,#0)
|
||||
r15:14 = combine(##0x00100000,#0)
|
||||
r10 = #0x7FFF
|
||||
}
|
||||
{
|
||||
p0 = dfcmp.eq(r15:14,r3:2)
|
||||
p0 = bitsset(r7,r10)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
if (!p0) jumpr r31
|
||||
r10 = USR
|
||||
}
|
||||
|
||||
{
|
||||
r10 = or(r10,#0x30)
|
||||
}
|
||||
{
|
||||
USR = r10
|
||||
}
|
||||
{
|
||||
p0 = dfcmp.eq(r1:0,r1:0)
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
.Ldiv_ovf:
|
||||
|
||||
|
||||
|
||||
{
|
||||
r10 = USR
|
||||
r3:2 = combine(##0x7fefffff,#-1)
|
||||
r1 = mux(p3,#0,#-1)
|
||||
}
|
||||
{
|
||||
r7:6 = combine(##0x7ff00000,#0)
|
||||
r5 = extractu(r10,#2,#22)
|
||||
r10 = or(r10,#0x28)
|
||||
}
|
||||
{
|
||||
USR = r10
|
||||
r5 ^= lsr(r1,#31)
|
||||
r4 = r5
|
||||
}
|
||||
{
|
||||
p0 = !cmp.eq(r4,#1)
|
||||
p0 = !cmp.eq(r5,#2)
|
||||
if (p0.new) r3:2 = r7:6
|
||||
p0 = dfcmp.eq(r3:2,r3:2)
|
||||
}
|
||||
{
|
||||
r1:0 = insert(r3:2,#63,#0)
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.Ldiv_abnormal:
|
||||
{
|
||||
p0 = dfclass(r1:0,#0x0F)
|
||||
p0 = dfclass(r3:2,#0x0F)
|
||||
p3 = cmp.gt(r28,#-1)
|
||||
}
|
||||
{
|
||||
p1 = dfclass(r1:0,#0x08)
|
||||
p1 = dfclass(r3:2,#0x08)
|
||||
}
|
||||
{
|
||||
p2 = dfclass(r1:0,#0x01)
|
||||
p2 = dfclass(r3:2,#0x01)
|
||||
}
|
||||
{
|
||||
if (!p0) jump .Ldiv_nan
|
||||
if (p1) jump .Ldiv_invalid
|
||||
}
|
||||
{
|
||||
if (p2) jump .Ldiv_invalid
|
||||
}
|
||||
{
|
||||
p2 = dfclass(r1:0,#(0x0F ^ 0x01))
|
||||
p2 = dfclass(r3:2,#(0x0F ^ 0x08))
|
||||
}
|
||||
{
|
||||
p1 = dfclass(r1:0,#(0x0F ^ 0x08))
|
||||
p1 = dfclass(r3:2,#(0x0F ^ 0x01))
|
||||
}
|
||||
{
|
||||
if (!p2) jump .Ldiv_zero_result
|
||||
if (!p1) jump .Ldiv_inf_result
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
p0 = dfclass(r1:0,#0x02)
|
||||
p1 = dfclass(r3:2,#0x02)
|
||||
r10 = ##0x00100000
|
||||
}
|
||||
{
|
||||
r13:12 = combine(r3,r1)
|
||||
r1 = insert(r10,#11 +1,#52 -32)
|
||||
r3 = insert(r10,#11 +1,#52 -32)
|
||||
}
|
||||
{
|
||||
if (p0) r1 = or(r1,r10)
|
||||
if (p1) r3 = or(r3,r10)
|
||||
}
|
||||
{
|
||||
r5 = add(clb(r1:0),#-11)
|
||||
r4 = add(clb(r3:2),#-11)
|
||||
r10 = #1
|
||||
}
|
||||
{
|
||||
r12 = extractu(r12,#11,#52 -32)
|
||||
r13 = extractu(r13,#11,#52 -32)
|
||||
}
|
||||
{
|
||||
r1:0 = asl(r1:0,r5)
|
||||
r3:2 = asl(r3:2,r4)
|
||||
if (!p0) r12 = sub(r10,r5)
|
||||
if (!p1) r13 = sub(r10,r4)
|
||||
}
|
||||
{
|
||||
r7:6 = extractu(r3:2,#23,#52 -23)
|
||||
}
|
||||
{
|
||||
r9 = or(r8,r6)
|
||||
jump .Ldenorm_continue
|
||||
}
|
||||
|
||||
.Ldiv_zero_result:
|
||||
{
|
||||
r1 = xor(r1,r3)
|
||||
r3:2 = #0
|
||||
}
|
||||
{
|
||||
r1:0 = insert(r3:2,#63,#0)
|
||||
jumpr r31
|
||||
}
|
||||
.Ldiv_inf_result:
|
||||
{
|
||||
p2 = dfclass(r3:2,#0x01)
|
||||
p2 = dfclass(r1:0,#(0x0F ^ 0x08))
|
||||
}
|
||||
{
|
||||
r10 = USR
|
||||
if (!p2) jump 1f
|
||||
r1 = xor(r1,r3)
|
||||
}
|
||||
{
|
||||
r10 = or(r10,#0x04)
|
||||
}
|
||||
{
|
||||
USR = r10
|
||||
}
|
||||
1:
|
||||
{
|
||||
r3:2 = combine(##0x7ff00000,#0)
|
||||
p0 = dfcmp.uo(r3:2,r3:2)
|
||||
}
|
||||
{
|
||||
r1:0 = insert(r3:2,#63,#0)
|
||||
jumpr r31
|
||||
}
|
||||
.Ldiv_nan:
|
||||
{
|
||||
p0 = dfclass(r1:0,#0x10)
|
||||
p1 = dfclass(r3:2,#0x10)
|
||||
if (!p0.new) r1:0 = r3:2
|
||||
if (!p1.new) r3:2 = r1:0
|
||||
}
|
||||
{
|
||||
r5 = convert_df2sf(r1:0)
|
||||
r4 = convert_df2sf(r3:2)
|
||||
}
|
||||
{
|
||||
r1:0 = #-1
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
.Ldiv_invalid:
|
||||
{
|
||||
r10 = ##0x7f800001
|
||||
}
|
||||
{
|
||||
r1:0 = convert_sf2df(r10)
|
||||
jumpr r31
|
||||
}
|
||||
.size __hexagon_divdf3,.-__hexagon_divdf3
|
||||
534
library/compiler-builtins/compiler-builtins/src/hexagon/dffma.s
Normal file
534
library/compiler-builtins/compiler-builtins/src/hexagon/dffma.s
Normal file
|
|
@ -0,0 +1,534 @@
|
|||
.text
|
||||
.global __hexagon_fmadf4
|
||||
.type __hexagon_fmadf4,@function
|
||||
.global __hexagon_fmadf5
|
||||
.type __hexagon_fmadf5,@function
|
||||
.global __qdsp_fmadf5 ; .set __qdsp_fmadf5, __hexagon_fmadf5
|
||||
.p2align 5
|
||||
__hexagon_fmadf4:
|
||||
__hexagon_fmadf5:
|
||||
fma:
|
||||
{
|
||||
p0 = dfclass(r1:0,#2)
|
||||
p0 = dfclass(r3:2,#2)
|
||||
r13:12 = #0
|
||||
r15:14 = #0
|
||||
}
|
||||
{
|
||||
r13:12 = insert(r1:0,#52,#11 -3)
|
||||
r15:14 = insert(r3:2,#52,#11 -3)
|
||||
r7 = ##0x10000000
|
||||
allocframe(#32)
|
||||
}
|
||||
{
|
||||
r9:8 = mpyu(r12,r14)
|
||||
if (!p0) jump .Lfma_abnormal_ab
|
||||
r13 = or(r13,r7)
|
||||
r15 = or(r15,r7)
|
||||
}
|
||||
{
|
||||
p0 = dfclass(r5:4,#2)
|
||||
if (!p0.new) jump:nt .Lfma_abnormal_c
|
||||
r11:10 = combine(r7,#0)
|
||||
r7:6 = combine(#0,r9)
|
||||
}
|
||||
.Lfma_abnormal_c_restart:
|
||||
{
|
||||
r7:6 += mpyu(r14,r13)
|
||||
r11:10 = insert(r5:4,#52,#11 -3)
|
||||
memd(r29+#0) = r17:16
|
||||
memd(r29+#8) = r19:18
|
||||
}
|
||||
{
|
||||
r7:6 += mpyu(r12,r15)
|
||||
r19:18 = neg(r11:10)
|
||||
p0 = cmp.gt(r5,#-1)
|
||||
r28 = xor(r1,r3)
|
||||
}
|
||||
{
|
||||
r18 = extractu(r1,#11,#20)
|
||||
r19 = extractu(r3,#11,#20)
|
||||
r17:16 = combine(#0,r7)
|
||||
if (!p0) r11:10 = r19:18
|
||||
}
|
||||
{
|
||||
r17:16 += mpyu(r13,r15)
|
||||
r9:8 = combine(r6,r8)
|
||||
r18 = add(r18,r19)
|
||||
|
||||
|
||||
|
||||
|
||||
r19 = extractu(r5,#11,#20)
|
||||
}
|
||||
{
|
||||
r18 = add(r18,#-1023 +(4))
|
||||
p3 = !cmp.gt(r28,#-1)
|
||||
r7:6 = #0
|
||||
r15:14 = #0
|
||||
}
|
||||
{
|
||||
r7:6 = sub(r7:6,r9:8,p3):carry
|
||||
p0 = !cmp.gt(r28,#-1)
|
||||
p1 = cmp.gt(r19,r18)
|
||||
if (p1.new) r19:18 = combine(r18,r19)
|
||||
}
|
||||
{
|
||||
r15:14 = sub(r15:14,r17:16,p3):carry
|
||||
if (p0) r9:8 = r7:6
|
||||
|
||||
|
||||
|
||||
|
||||
r7:6 = #0
|
||||
r19 = sub(r18,r19)
|
||||
}
|
||||
{
|
||||
if (p0) r17:16 = r15:14
|
||||
p0 = cmp.gt(r19,#63)
|
||||
if (p1) r9:8 = r7:6
|
||||
if (p1) r7:6 = r9:8
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
if (p1) r17:16 = r11:10
|
||||
if (p1) r11:10 = r17:16
|
||||
if (p0) r19 = add(r19,#-64)
|
||||
r28 = #63
|
||||
}
|
||||
{
|
||||
|
||||
if (p0) r7:6 = r11:10
|
||||
r28 = asr(r11,#31)
|
||||
r13 = min(r19,r28)
|
||||
r12 = #0
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
if (p0) r11:10 = combine(r28,r28)
|
||||
r5:4 = extract(r7:6,r13:12)
|
||||
r7:6 = lsr(r7:6,r13)
|
||||
r12 = sub(#64,r13)
|
||||
}
|
||||
{
|
||||
r15:14 = #0
|
||||
r28 = #-2
|
||||
r7:6 |= lsl(r11:10,r12)
|
||||
r11:10 = asr(r11:10,r13)
|
||||
}
|
||||
{
|
||||
p3 = cmp.gtu(r5:4,r15:14)
|
||||
if (p3.new) r6 = and(r6,r28)
|
||||
|
||||
|
||||
|
||||
r15:14 = #1
|
||||
r5:4 = #0
|
||||
}
|
||||
{
|
||||
r9:8 = add(r7:6,r9:8,p3):carry
|
||||
}
|
||||
{
|
||||
r17:16 = add(r11:10,r17:16,p3):carry
|
||||
r28 = #62
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
r12 = add(clb(r17:16),#-2)
|
||||
if (!cmp.eq(r12.new,r28)) jump:t 1f
|
||||
}
|
||||
|
||||
{
|
||||
r11:10 = extractu(r9:8,#62,#2)
|
||||
r9:8 = asl(r9:8,#62)
|
||||
r18 = add(r18,#-62)
|
||||
}
|
||||
{
|
||||
r17:16 = insert(r11:10,#62,#0)
|
||||
}
|
||||
{
|
||||
r12 = add(clb(r17:16),#-2)
|
||||
}
|
||||
.falign
|
||||
1:
|
||||
{
|
||||
r11:10 = asl(r17:16,r12)
|
||||
r5:4 |= asl(r9:8,r12)
|
||||
r13 = sub(#64,r12)
|
||||
r18 = sub(r18,r12)
|
||||
}
|
||||
{
|
||||
r11:10 |= lsr(r9:8,r13)
|
||||
p2 = cmp.gtu(r15:14,r5:4)
|
||||
r28 = #1023 +1023 -2
|
||||
}
|
||||
{
|
||||
if (!p2) r10 = or(r10,r14)
|
||||
|
||||
p0 = !cmp.gt(r18,r28)
|
||||
p0 = cmp.gt(r18,#1)
|
||||
if (!p0.new) jump:nt .Lfma_ovf_unf
|
||||
}
|
||||
{
|
||||
|
||||
p0 = cmp.gtu(r15:14,r11:10)
|
||||
r1:0 = convert_d2df(r11:10)
|
||||
r18 = add(r18,#-1023 -60)
|
||||
r17:16 = memd(r29+#0)
|
||||
}
|
||||
{
|
||||
r1 += asl(r18,#20)
|
||||
r19:18 = memd(r29+#8)
|
||||
if (!p0) dealloc_return
|
||||
}
|
||||
.Ladd_yields_zero:
|
||||
|
||||
{
|
||||
r28 = USR
|
||||
r1:0 = #0
|
||||
}
|
||||
{
|
||||
r28 = extractu(r28,#2,#22)
|
||||
r17:16 = memd(r29+#0)
|
||||
r19:18 = memd(r29+#8)
|
||||
}
|
||||
{
|
||||
p0 = cmp.eq(r28,#2)
|
||||
if (p0.new) r1 = ##0x80000000
|
||||
dealloc_return
|
||||
}
|
||||
.Lfma_ovf_unf:
|
||||
{
|
||||
p0 = cmp.gtu(r15:14,r11:10)
|
||||
if (p0.new) jump:nt .Ladd_yields_zero
|
||||
}
|
||||
{
|
||||
r1:0 = convert_d2df(r11:10)
|
||||
r18 = add(r18,#-1023 -60)
|
||||
r28 = r18
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
r1 += asl(r18,#20)
|
||||
r7 = extractu(r1,#11,#20)
|
||||
}
|
||||
{
|
||||
r6 = add(r18,r7)
|
||||
r17:16 = memd(r29+#0)
|
||||
r19:18 = memd(r29+#8)
|
||||
r9:8 = abs(r11:10)
|
||||
}
|
||||
{
|
||||
p0 = cmp.gt(r6,##1023 +1023)
|
||||
if (p0.new) jump:nt .Lfma_ovf
|
||||
}
|
||||
{
|
||||
p0 = cmp.gt(r6,#0)
|
||||
if (p0.new) jump:nt .Lpossible_unf0
|
||||
}
|
||||
{
|
||||
|
||||
|
||||
|
||||
r7 = add(clb(r9:8),#-2)
|
||||
r6 = sub(#1+5,r28)
|
||||
p3 = cmp.gt(r11,#-1)
|
||||
}
|
||||
|
||||
|
||||
|
||||
{
|
||||
r6 = add(r6,r7)
|
||||
r9:8 = asl(r9:8,r7)
|
||||
r1 = USR
|
||||
r28 = #63
|
||||
}
|
||||
{
|
||||
r7 = min(r6,r28)
|
||||
r6 = #0
|
||||
r0 = #0x0030
|
||||
}
|
||||
{
|
||||
r3:2 = extractu(r9:8,r7:6)
|
||||
r9:8 = asr(r9:8,r7)
|
||||
}
|
||||
{
|
||||
p0 = cmp.gtu(r15:14,r3:2)
|
||||
if (!p0.new) r8 = or(r8,r14)
|
||||
r9 = setbit(r9,#20 +3)
|
||||
}
|
||||
{
|
||||
r11:10 = neg(r9:8)
|
||||
p1 = bitsclr(r8,#(1<<3)-1)
|
||||
if (!p1.new) r1 = or(r1,r0)
|
||||
r3:2 = #0
|
||||
}
|
||||
{
|
||||
if (p3) r11:10 = r9:8
|
||||
USR = r1
|
||||
r28 = #-1023 -(52 +3)
|
||||
}
|
||||
{
|
||||
r1:0 = convert_d2df(r11:10)
|
||||
}
|
||||
{
|
||||
r1 += asl(r28,#20)
|
||||
dealloc_return
|
||||
}
|
||||
.Lpossible_unf0:
|
||||
{
|
||||
r28 = ##0x7fefffff
|
||||
r9:8 = abs(r11:10)
|
||||
}
|
||||
{
|
||||
p0 = cmp.eq(r0,#0)
|
||||
p0 = bitsclr(r1,r28)
|
||||
if (!p0.new) dealloc_return:t
|
||||
r28 = #0x7fff
|
||||
}
|
||||
{
|
||||
p0 = bitsset(r9,r28)
|
||||
r3 = USR
|
||||
r2 = #0x0030
|
||||
}
|
||||
{
|
||||
if (p0) r3 = or(r3,r2)
|
||||
}
|
||||
{
|
||||
USR = r3
|
||||
}
|
||||
{
|
||||
p0 = dfcmp.eq(r1:0,r1:0)
|
||||
dealloc_return
|
||||
}
|
||||
.Lfma_ovf:
|
||||
{
|
||||
r28 = USR
|
||||
r11:10 = combine(##0x7fefffff,#-1)
|
||||
r1:0 = r11:10
|
||||
}
|
||||
{
|
||||
r9:8 = combine(##0x7ff00000,#0)
|
||||
r3 = extractu(r28,#2,#22)
|
||||
r28 = or(r28,#0x28)
|
||||
}
|
||||
{
|
||||
USR = r28
|
||||
r3 ^= lsr(r1,#31)
|
||||
r2 = r3
|
||||
}
|
||||
{
|
||||
p0 = !cmp.eq(r2,#1)
|
||||
p0 = !cmp.eq(r3,#2)
|
||||
}
|
||||
{
|
||||
p0 = dfcmp.eq(r9:8,r9:8)
|
||||
if (p0.new) r11:10 = r9:8
|
||||
}
|
||||
{
|
||||
r1:0 = insert(r11:10,#63,#0)
|
||||
dealloc_return
|
||||
}
|
||||
.Lfma_abnormal_ab:
|
||||
{
|
||||
r9:8 = extractu(r1:0,#63,#0)
|
||||
r11:10 = extractu(r3:2,#63,#0)
|
||||
deallocframe
|
||||
}
|
||||
{
|
||||
p3 = cmp.gtu(r9:8,r11:10)
|
||||
if (!p3.new) r1:0 = r3:2
|
||||
if (!p3.new) r3:2 = r1:0
|
||||
}
|
||||
{
|
||||
p0 = dfclass(r1:0,#0x0f)
|
||||
if (!p0.new) jump:nt .Lnan
|
||||
if (!p3) r9:8 = r11:10
|
||||
if (!p3) r11:10 = r9:8
|
||||
}
|
||||
{
|
||||
p1 = dfclass(r1:0,#0x08)
|
||||
p1 = dfclass(r3:2,#0x0e)
|
||||
}
|
||||
{
|
||||
p0 = dfclass(r1:0,#0x08)
|
||||
p0 = dfclass(r3:2,#0x01)
|
||||
}
|
||||
{
|
||||
if (p1) jump .Lab_inf
|
||||
p2 = dfclass(r3:2,#0x01)
|
||||
}
|
||||
{
|
||||
if (p0) jump .Linvalid
|
||||
if (p2) jump .Lab_true_zero
|
||||
r28 = ##0x7c000000
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
p0 = bitsclr(r1,r28)
|
||||
if (p0.new) jump:nt .Lfma_ab_tiny
|
||||
}
|
||||
{
|
||||
r28 = add(clb(r11:10),#-11)
|
||||
}
|
||||
{
|
||||
r11:10 = asl(r11:10,r28)
|
||||
}
|
||||
{
|
||||
r3:2 = insert(r11:10,#63,#0)
|
||||
r1 -= asl(r28,#20)
|
||||
}
|
||||
jump fma
|
||||
|
||||
.Lfma_ab_tiny:
|
||||
r9:8 = combine(##0x00100000,#0)
|
||||
{
|
||||
r1:0 = insert(r9:8,#63,#0)
|
||||
r3:2 = insert(r9:8,#63,#0)
|
||||
}
|
||||
jump fma
|
||||
|
||||
.Lab_inf:
|
||||
{
|
||||
r3:2 = lsr(r3:2,#63)
|
||||
p0 = dfclass(r5:4,#0x10)
|
||||
}
|
||||
{
|
||||
r1:0 ^= asl(r3:2,#63)
|
||||
if (p0) jump .Lnan
|
||||
}
|
||||
{
|
||||
p1 = dfclass(r5:4,#0x08)
|
||||
if (p1.new) jump:nt .Lfma_inf_plus_inf
|
||||
}
|
||||
|
||||
{
|
||||
jumpr r31
|
||||
}
|
||||
.falign
|
||||
.Lfma_inf_plus_inf:
|
||||
{
|
||||
p0 = dfcmp.eq(r1:0,r5:4)
|
||||
if (!p0.new) jump:nt .Linvalid
|
||||
}
|
||||
{
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
.Lnan:
|
||||
{
|
||||
p0 = dfclass(r3:2,#0x10)
|
||||
p1 = dfclass(r5:4,#0x10)
|
||||
if (!p0.new) r3:2 = r1:0
|
||||
if (!p1.new) r5:4 = r1:0
|
||||
}
|
||||
{
|
||||
r3 = convert_df2sf(r3:2)
|
||||
r2 = convert_df2sf(r5:4)
|
||||
}
|
||||
{
|
||||
r3 = convert_df2sf(r1:0)
|
||||
r1:0 = #-1
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
.Linvalid:
|
||||
{
|
||||
r28 = ##0x7f800001
|
||||
}
|
||||
{
|
||||
r1:0 = convert_sf2df(r28)
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
.Lab_true_zero:
|
||||
|
||||
{
|
||||
p0 = dfclass(r5:4,#0x10)
|
||||
if (p0.new) jump:nt .Lnan
|
||||
if (p0.new) r1:0 = r5:4
|
||||
}
|
||||
{
|
||||
p0 = dfcmp.eq(r3:2,r5:4)
|
||||
r1 = lsr(r1,#31)
|
||||
}
|
||||
{
|
||||
r3 ^= asl(r1,#31)
|
||||
if (!p0) r1:0 = r5:4
|
||||
if (!p0) jumpr r31
|
||||
}
|
||||
|
||||
{
|
||||
p0 = cmp.eq(r3:2,r5:4)
|
||||
if (p0.new) jumpr:t r31
|
||||
r1:0 = r3:2
|
||||
}
|
||||
{
|
||||
r28 = USR
|
||||
}
|
||||
{
|
||||
r28 = extractu(r28,#2,#22)
|
||||
r1:0 = #0
|
||||
}
|
||||
{
|
||||
p0 = cmp.eq(r28,#2)
|
||||
if (p0.new) r1 = ##0x80000000
|
||||
jumpr r31
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
.falign
|
||||
.Lfma_abnormal_c:
|
||||
|
||||
|
||||
{
|
||||
p0 = dfclass(r5:4,#0x10)
|
||||
if (p0.new) jump:nt .Lnan
|
||||
if (p0.new) r1:0 = r5:4
|
||||
deallocframe
|
||||
}
|
||||
{
|
||||
p0 = dfclass(r5:4,#0x08)
|
||||
if (p0.new) r1:0 = r5:4
|
||||
if (p0.new) jumpr:nt r31
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
p0 = dfclass(r5:4,#0x01)
|
||||
if (p0.new) jump:nt __hexagon_muldf3
|
||||
r28 = #1
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
allocframe(#32)
|
||||
r11:10 = #0
|
||||
r5 = insert(r28,#11,#20)
|
||||
jump .Lfma_abnormal_c_restart
|
||||
}
|
||||
.size fma,.-fma
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
.text
|
||||
.global __hexagon_mindf3
|
||||
.global __hexagon_maxdf3
|
||||
.type __hexagon_mindf3,@function
|
||||
.type __hexagon_maxdf3,@function
|
||||
.global __qdsp_mindf3 ; .set __qdsp_mindf3, __hexagon_mindf3
|
||||
.global __qdsp_maxdf3 ; .set __qdsp_maxdf3, __hexagon_maxdf3
|
||||
.p2align 5
|
||||
__hexagon_mindf3:
|
||||
{
|
||||
p0 = dfclass(r1:0,#0x10)
|
||||
p1 = dfcmp.gt(r1:0,r3:2)
|
||||
r5:4 = r1:0
|
||||
}
|
||||
{
|
||||
if (p0) r1:0 = r3:2
|
||||
if (p1) r1:0 = r3:2
|
||||
p2 = dfcmp.eq(r1:0,r3:2)
|
||||
if (!p2.new) jumpr:t r31
|
||||
}
|
||||
|
||||
{
|
||||
r1:0 = or(r5:4,r3:2)
|
||||
jumpr r31
|
||||
}
|
||||
.size __hexagon_mindf3,.-__hexagon_mindf3
|
||||
.falign
|
||||
__hexagon_maxdf3:
|
||||
{
|
||||
p0 = dfclass(r1:0,#0x10)
|
||||
p1 = dfcmp.gt(r3:2,r1:0)
|
||||
r5:4 = r1:0
|
||||
}
|
||||
{
|
||||
if (p0) r1:0 = r3:2
|
||||
if (p1) r1:0 = r3:2
|
||||
p2 = dfcmp.eq(r1:0,r3:2)
|
||||
if (!p2.new) jumpr:t r31
|
||||
}
|
||||
|
||||
{
|
||||
r1:0 = and(r5:4,r3:2)
|
||||
jumpr r31
|
||||
}
|
||||
.size __hexagon_maxdf3,.-__hexagon_maxdf3
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue