This detects (a subset of) the cases when `transmute::<T, U>(x)` can be
lowered to a direct `bitcast T x to U` in LLVM. This assists with
efficiently handling a SIMD vector as multiple different types,
e.g. swapping bytes/words/double words around inside some larger vector
type.
C compilers like GCC and Clang handle integer vector types as `__m128i`
for all widths, and implicitly insert bitcasts as required. This patch
allows Rust to express this, even if it takes a bit of `unsafe`, whereas
previously it was impossible to do at all without inline assembly.
Example:
pub fn reverse_u32s(u: u64x2) -> u64x2 {
unsafe {
let tmp = mem::transmute::<_, u32x4>(u);
let swapped = u32x4(tmp.3, tmp.2, tmp.1, tmp.0);
mem::transmute::<_, u64x2>(swapped)
}
}
Compiling with `--opt-level=3` gives:
Before
define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 {
entry-block:
%1 = bitcast <2 x i64> %0 to i128
%u.0.extract.trunc = trunc i128 %1 to i32
%u.4.extract.shift = lshr i128 %1, 32
%u.4.extract.trunc = trunc i128 %u.4.extract.shift to i32
%u.8.extract.shift = lshr i128 %1, 64
%u.8.extract.trunc = trunc i128 %u.8.extract.shift to i32
%u.12.extract.shift = lshr i128 %1, 96
%u.12.extract.trunc = trunc i128 %u.12.extract.shift to i32
%2 = insertelement <4 x i32> undef, i32 %u.12.extract.trunc, i64 0
%3 = insertelement <4 x i32> %2, i32 %u.8.extract.trunc, i64 1
%4 = insertelement <4 x i32> %3, i32 %u.4.extract.trunc, i64 2
%5 = insertelement <4 x i32> %4, i32 %u.0.extract.trunc, i64 3
%6 = bitcast <4 x i32> %5 to <2 x i64>
ret <2 x i64> %6
}
_ZN12reverse_u32s20hbdb206aba18a03d8tbaE:
.cfi_startproc
movd %xmm0, %rax
punpckhqdq %xmm0, %xmm0
movd %xmm0, %rcx
movq %rcx, %rdx
shrq $32, %rdx
movq %rax, %rsi
shrq $32, %rsi
movd %eax, %xmm0
movd %ecx, %xmm1
punpckldq %xmm0, %xmm1
movd %esi, %xmm2
movd %edx, %xmm0
punpckldq %xmm2, %xmm0
punpckldq %xmm1, %xmm0
retq
After
define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 {
entry-block:
%1 = bitcast <2 x i64> %0 to <4 x i32>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%3 = bitcast <4 x i32> %2 to <2 x i64>
ret <2 x i64> %3
}
_ZN12reverse_u32s20hbdb206aba18a03d8tbaE:
.cfi_startproc
pshufd $27, %xmm0, %xmm0
retq
|
||
|---|---|---|
| .. | ||
| compiler-rt@62a4ca6055 | ||
| compiletest | ||
| doc | ||
| driver | ||
| etc | ||
| grammar | ||
| jemalloc@b001609960 | ||
| liballoc | ||
| libarena | ||
| libbacktrace | ||
| libcollections | ||
| libcore | ||
| libcoretest | ||
| libflate | ||
| libfmt_macros | ||
| libgetopts | ||
| libgraphviz | ||
| liblibc | ||
| liblog | ||
| librand | ||
| librbml | ||
| libregex | ||
| libregex_macros | ||
| librustc | ||
| librustc_back | ||
| librustc_llvm | ||
| librustc_trans | ||
| librustdoc | ||
| librustrt | ||
| libserialize | ||
| libstd | ||
| libsync | ||
| libsyntax | ||
| libterm | ||
| libtest | ||
| libtime | ||
| libunicode | ||
| llvm@ec1fdb3b9d | ||
| rt | ||
| rustllvm | ||
| test | ||
| README.md | ||
| snapshots.txt | ||
This is a preliminary version of the Rust compiler, libraries and tools.
Source layout:
| Path | Description |
|---|---|
librustc/ |
The self-hosted compiler |
liballoc/ |
Rust's core allocation library |
libcore/ |
The Rust core library |
libdebug/ |
Debugging utilities |
libstd/ |
The standard library (imported and linked by default) |
libsyntax/ |
The Rust parser and pretty-printer |
libtest/ |
Rust's test-runner code |
| ------------------- | --------------------------------------------------------- |
libarena/ |
The arena (a fast but limited) memory allocator |
libbacktrace/ |
The libbacktrace library |
libcollections/ |
A collection of useful data structures and containers |
libflate/ |
Simple compression library |
libfmt_macros/ |
Macro support for format strings |
libfourcc/ |
Data format identifier library |
libgetopts/ |
Get command-line-options library |
libglob/ |
Unix glob patterns library |
libgraphviz/ |
Generating files for Graphviz |
libhexfloat/ |
Hexadecimal floating-point literals |
liblibc/ |
Bindings for the C standard library |
liblog/ |
Utilities for program-wide and customizable logging |
libnum/ |
Extended number support library (complex, rational, etc) |
librand/ |
Random numbers and distributions |
libregex/ |
Regular expressions |
libregex_macros/ |
The regex! syntax extension |
libsemver/ |
Rust's semantic versioning library |
libserialize/ |
Encode-Decode types library |
libsync/ |
Concurrency mechanisms and primitives |
libterm/ |
ANSI color library for terminals |
libtime/ |
Time operations library |
liburl/ |
URL handling lirary |
libuuid/ |
UUID's handling code |
| ------------------- | --------------------------------------------------------- |
rt/ |
The runtime system |
rt/rust_*.c |
- Some of the runtime services |
rt/vg |
- Valgrind headers |
rt/msvc |
- MSVC support |
rt/sundown |
- The Markdown library used by rustdoc |
| ------------------- | --------------------------------------------------------- |
compiletest/ |
The test runner |
test/ |
Testsuite |
test/codegen |
- Tests for the LLVM IR infrastructure |
test/compile-fail |
- Tests that should fail to compile |
test/debug-info |
- Tests for the debuginfo tool |
test/run-fail |
- Tests that should compile, run and fail |
test/run-make |
- Tests that depend on a Makefile infrastructure |
test/run-pass |
- Tests that should compile, run and succeed |
test/bench |
- Benchmarks and miscellaneous |
test/pretty |
- Pretty-printer tests |
test/auxiliary |
- Dependencies of tests |
| ------------------- | --------------------------------------------------------- |
librustdoc/ |
The Rust API documentation tool |
| ------------------- | --------------------------------------------------------- |
llvm/ |
The LLVM submodule |
rustllvm/ |
LLVM support code |
| ------------------- | --------------------------------------------------------- |
etc/ |
Scripts, editors support, misc |
NOTE: This list (especially the second part of the table which contains modules and libraries) is highly volatile and subject to change.