rust/library/alloc/src
bors 12075f04e6 Auto merge of #123878 - jwong101:inplacecollect, r=jhpratt
optimize inplace collection of Vec

This PR has the following changes:

1. Using `usize::unchecked_mul` in 79424056b0/library/alloc/src/vec/in_place_collect.rs (L262) as LLVM, does not know that the operation can't wrap, since that's the size of the original allocation.

Given the following:

```rust

pub struct Foo([usize; 3]);

pub fn unwrap_copy(v: Vec<Foo>) -> Vec<[usize; 3]> {
    v.into_iter().map(|f| f.0).collect()
}
```

<details>
<summary>Before this commit:</summary>

```llvm
define void `@unwrap_copy(ptr` noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
  %me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
  %me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
  %me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
  %me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
  %me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
  %_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
  %0 = udiv i64 %_19.i.idx, 24

; Unnecessary calculation
  %_16.i.i = mul i64 %me.sroa.0.0.copyload.i, 24
  %dst_cap.i.i = udiv i64 %_16.i.i, 24

  store i64 %dst_cap.i.i, ptr %_0, align 8
  %1 = getelementptr inbounds i8, ptr %_0, i64 8
  store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
  %2 = getelementptr inbounds i8, ptr %_0, i64 16
  store i64 %0, ptr %2, align 8
  ret void
}
```
</details>

<details>
<summary>After:</summary>

```llvm
define void `@unwrap_copy(ptr` noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
  %me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
  %me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
  %me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
  %me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
  %me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
  %_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
  %0 = udiv i64 %_19.i.idx, 24
  store i64 %me.sroa.0.0.copyload.i, ptr %_0, align 8
  %1 = getelementptr inbounds i8, ptr %_0, i64 8
  store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
  %2 = getelementptr inbounds i8, ptr %_0, i64 16
  store i64 %0, ptr %2, align 8, !alias.scope !9, !noalias !14
  ret void
}
```
</details>

Note that there is still one more `mul,udiv` pair that I couldn't get
rid of. The root cause is the same issue as https://github.com/rust-lang/rust/issues/121239, the `nuw` gets
stripped off of `ptr::sub_ptr`.

2.

`Iterator::try_fold` gets called on the underlying Iterator in
`SpecInPlaceCollect::collect_in_place` whenever it does not implement
`TrustedRandomAccess`. For types that impl `Drop`, LLVM currently can't
tell that the drop can never occur, when using the default
`Iterator::try_fold` implementation.

For example, given the following code from #120493

```rust
#[repr(transparent)]
struct WrappedClone {
    inner: String
}

#[no_mangle]
pub fn unwrap_clone(list: Vec<WrappedClone>) -> Vec<String> {
    list.into_iter().map(|s| s.inner).collect()
}
```

<details>
<summary>The asm for the `unwrap_clone` method is currently:</summary>

```asm
unwrap_clone:
        push    rbp
        push    r15
        push    r14
        push    r13
        push    r12
        push    rbx
        push    rax
        mov     rbx, rdi
        mov     r12, qword ptr [rsi]
        mov     rdi, qword ptr [rsi + 8]
        mov     rax, qword ptr [rsi + 16]
        movabs  rsi, -6148914691236517205
        mov     r14, r12
        test    rax, rax
        je      .LBB0_10
        lea     rcx, [rax + 2*rax]
        lea     r14, [r12 + 8*rcx]
        shl     rax, 3
        lea     rax, [rax + 2*rax]
        xor     ecx, ecx
.LBB0_2:
        cmp     qword ptr [r12 + rcx], 0
        je      .LBB0_4
        add     rcx, 24
        cmp     rax, rcx
        jne     .LBB0_2
        jmp     .LBB0_10
.LBB0_4:
        lea     rdx, [rax - 24]
        lea     r14, [r12 + rcx]
        cmp     rdx, rcx
        je      .LBB0_10
        mov     qword ptr [rsp], rdi
        sub     rax, rcx
        add     rax, -24
        mul     rsi
        mov     r15, rdx
        lea     rbp, [r12 + rcx]
        add     rbp, 32
        shr     r15, 4
        mov     r13, qword ptr [rip + __rust_dealloc@GOTPCREL]
        jmp     .LBB0_6
.LBB0_8:
        add     rbp, 24
        dec     r15
        je      .LBB0_9
.LBB0_6:
        mov     rsi, qword ptr [rbp]
        test    rsi, rsi
        je      .LBB0_8
        mov     rdi, qword ptr [rbp - 8]
        mov     edx, 1
        call    r13
        jmp     .LBB0_8
.LBB0_9:
        mov     rdi, qword ptr [rsp]
        movabs  rsi, -6148914691236517205
.LBB0_10:
        sub     r14, r12
        mov     rax, r14
        mul     rsi
        shr     rdx, 4
        mov     qword ptr [rbx], r12
        mov     qword ptr [rbx + 8], rdi
        mov     qword ptr [rbx + 16], rdx
        mov     rax, rbx
        add     rsp, 8
        pop     rbx
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        pop     rbp
        ret
```
</details>

<details>
<summary>After this PR:</summary>

```asm
unwrap_clone:
	mov	rax, rdi
	movups	xmm0, xmmword ptr [rsi]
	mov	rcx, qword ptr [rsi + 16]
	movups	xmmword ptr [rdi], xmm0
	mov	qword ptr [rdi + 16], rcx
	ret
```
</details>

Fixes https://github.com/rust-lang/rust/issues/120493
2024-05-20 00:51:12 +00:00
..
alloc run alloc benchmarks in Miri and fix UB 2022-11-07 10:34:04 +01:00
boxed Do not allocate for ZST ThinBox attempt 2 (using const_allocate) 2024-04-05 19:55:00 +01:00
collections fix typo in binary_heap docs 2024-04-24 22:59:39 +02:00
ffi Use shared statics for the ArcInner for Arc<str, CStr>::default, and for Arc<[T]>::default where alignof(T) <= 16. 2024-05-12 20:29:08 -05:00
raw_vec try_with_capacity for RawVec 2024-03-01 18:20:48 +00:00
rc remove redundant imports 2023-12-10 10:56:22 +08:00
slice ignore core, alloc and test tests that require unwinding on panic=abort 2023-06-13 15:53:24 +02:00
sync Fix some Arc allocator leaks 2024-01-28 18:33:34 +01:00
testing Share testing utilities with non-btree test cases 2022-05-02 10:07:50 +02:00
vec use Result::into_ok on infallible result. 2024-05-18 19:15:21 -05:00
alloc.rs step cfgs 2024-03-20 08:49:13 -04:00
borrow.rs Impl DerefPure for more std types 2024-04-05 00:02:42 +02:00
boxed.rs alloc: implement FromIterator for Box<str> 2024-05-05 10:29:57 -07:00
fmt.rs Auto merge of #125012 - RalfJung:format-error, r=Mark-Simulacrum,workingjubilee 2024-05-12 08:34:32 +00:00
lib.miri.rs add 'x.py miri', and make it work for 'library/{core,alloc,std}' 2024-04-03 20:27:20 +02:00
lib.rs use Result::into_ok on infallible result. 2024-05-18 19:15:21 -05:00
macros.rs Update doc for alloc::format! and core::concat! 2023-09-06 15:11:21 +02:00
raw_vec.rs Avoid more NonNull-raw-NonNull roundtrips in Vec 2024-04-12 18:14:29 -04:00
rc.rs Add note about possible allocation-sharing to Arc/Rc<str/[T]/CStr>::default. 2024-05-12 20:27:29 -05:00
slice.rs lib: fix some unnecessary_cast clippy lint 2024-03-25 23:19:40 +03:00
str.rs fix #124714 str.to_lowercase sigma handling 2024-05-08 17:05:10 +02:00
string.rs Auto merge of #99969 - calebsander:feature/collect-box-str, r=dtolnay 2024-05-19 02:13:06 +00:00
sync.rs Use shared statics for the ArcInner for Arc<str, CStr>::default, and for Arc<[T]>::default where alignof(T) <= 16. 2024-05-12 20:29:08 -05:00
task.rs lib: fix some unnecessary_cast clippy lint 2024-03-25 23:19:40 +03:00
tests.rs remove redundant imports 2023-12-10 10:56:22 +08:00