Skip to content

Commit 67f28a5

Browse files
committed
Optimize bounds checking by validating metadata against maximum
Introduces `KnownLayout::is_valid_metadata` which produces `true` iff the given metadata can describe a valid allocation of `Self` by comparing the given metadata against the theoretical maximum for `Self`'s layout. The theoretical metadata can (and is, in practice) computed at compile time. We insert this check in the two critical places that bounds-check user- provided metadata: `new_box` and `validate_cast_and_convert_metadata`. For sized and simple dynamically sized types, this does not meaningfully impact codegen, as the optimizer was already able to deduce the maximum valid metadata. However, for dynamically padded types, this permits the compiler to use unchecked arithmetic, resulting in MCA cycle count reductions of as much as 44%. Makes progress towards #3079. gherrit-pr-id: Gcf66958135e905f7cd7d2fac87d9f881e5e5185f
1 parent 66e3db6 commit 67f28a5

34 files changed

Lines changed: 631 additions & 772 deletions

File tree

benches/new_box_zeroed_with_elems_dynamic_padding.x86-64

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,20 @@ bench_new_box_zeroed_with_elems_dynamic_padding:
33
push rbx
44
push rax
55
mov rbx, rdi
6-
mov ecx, 3
7-
mov rax, rdi
8-
mul rcx
9-
jo .LBB5_6
10-
mov r14, rax
11-
cmp rax, -10
12-
ja .LBB5_6
13-
lea rax, [r14 + 9]
14-
not r14d
15-
and r14d, 3
16-
add r14, rax
17-
setb al
18-
movabs rcx, 9223372036854775803
19-
cmp r14, rcx
20-
seta cl
21-
or cl, al
22-
je .LBB5_4
23-
.LBB5_6:
24-
xor eax, eax
25-
jmp .LBB5_5
26-
.LBB5_4:
6+
movabs rax, 3074457345618258598
7+
cmp rdi, rax
8+
ja .LBB5_1
9+
lea r14, [rbx + 2*rbx]
10+
or r14, 3
11+
add r14, 9
2712
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
2813
mov esi, 4
2914
mov rdi, r14
3015
call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
31-
.LBB5_5:
16+
jmp .LBB5_3
17+
.LBB5_1:
18+
xor eax, eax
19+
.LBB5_3:
3220
mov rdx, rbx
3321
add rsp, 8
3422
pop rbx
Lines changed: 32 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
Iterations: 100
2-
Instructions: 3200
3-
Total Cycles: 2989
4-
Total uOps: 4300
2+
Instructions: 2100
3+
Total Cycles: 2990
4+
Total uOps: 3000
55

66
Dispatch Width: 4
7-
uOps Per Cycle: 1.44
8-
IPC: 1.07
9-
Block RThroughput: 10.8
7+
uOps Per Cycle: 1.00
8+
IPC: 0.70
9+
Block RThroughput: 7.5
1010

1111

1212
Instruction Info:
@@ -22,29 +22,18 @@ Instruction Info:
2222
2 5 1.00 * push rbx
2323
2 5 1.00 * push rax
2424
1 1 0.33 mov rbx, rdi
25-
1 1 0.33 mov ecx, 3
26-
1 1 0.33 mov rax, rdi
27-
2 4 1.00 mul rcx
28-
1 1 1.00 jo .LBB5_6
29-
1 1 0.33 mov r14, rax
30-
1 1 0.33 cmp rax, -10
31-
1 1 1.00 ja .LBB5_6
32-
1 1 0.50 lea rax, [r14 + 9]
33-
1 1 0.33 not r14d
34-
1 1 0.33 and r14d, 3
35-
1 1 0.33 add r14, rax
36-
1 1 0.50 setb al
37-
1 1 0.33 movabs rcx, 9223372036854775803
38-
1 1 0.33 cmp r14, rcx
39-
2 2 1.00 seta cl
40-
1 1 0.33 or cl, al
41-
1 1 1.00 je .LBB5_4
42-
1 0 0.25 xor eax, eax
43-
1 1 1.00 jmp .LBB5_5
25+
1 1 0.33 movabs rax, 3074457345618258598
26+
1 1 0.33 cmp rdi, rax
27+
1 1 1.00 ja .LBB5_1
28+
1 1 0.50 lea r14, [rbx + 2*rbx]
29+
1 1 0.33 or r14, 3
30+
1 1 0.33 add r14, 9
4431
4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
4532
1 1 0.33 mov esi, 4
4633
1 1 0.33 mov rdi, r14
4734
4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
35+
1 1 1.00 jmp .LBB5_3
36+
1 0 0.25 xor eax, eax
4837
1 1 0.33 mov rdx, rbx
4938
1 1 0.33 add rsp, 8
5039
1 6 0.50 * pop rbx
@@ -65,39 +54,28 @@ Resources:
6554

6655
Resource pressure per iteration:
6756
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
68-
- - 8.99 8.98 5.00 10.03 4.49 4.51
57+
- - 4.49 4.50 5.00 6.01 4.50 4.50
6958

7059
Resource pressure by instruction:
7160
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
72-
- - - - 1.00 - 0.49 0.51 push r14
73-
- - - - 1.00 - 0.51 0.49 push rbx
74-
- - - - 1.00 - 0.49 0.51 push rax
75-
- - 0.95 0.04 - 0.01 - - mov rbx, rdi
76-
- - - 0.97 - 0.03 - - mov ecx, 3
77-
- - 0.02 0.02 - 0.96 - - mov rax, rdi
78-
- - 1.00 1.00 - - - - mul rcx
79-
- - - - - 1.00 - - jo .LBB5_6
80-
- - 0.02 0.97 - 0.01 - - mov r14, rax
81-
- - 0.97 0.03 - - - - cmp rax, -10
82-
- - - - - 1.00 - - ja .LBB5_6
83-
- - 0.99 0.01 - - - - lea rax, [r14 + 9]
84-
- - 0.01 0.99 - - - - not r14d
85-
- - 0.97 0.03 - - - - and r14d, 3
86-
- - 0.01 0.98 - 0.01 - - add r14, rax
87-
- - 1.00 - - - - - setb al
88-
- - 0.02 - - 0.98 - - movabs rcx, 9223372036854775803
89-
- - - 0.97 - 0.03 - - cmp r14, rcx
90-
- - 2.00 - - - - - seta cl
91-
- - 0.03 0.03 - 0.94 - - or cl, al
92-
- - - - - 1.00 - - je .LBB5_4
61+
- - - - 1.00 - 0.50 0.50 push r14
62+
- - - - 1.00 - 0.50 0.50 push rbx
63+
- - - - 1.00 - 0.50 0.50 push rax
64+
- - 0.49 0.50 - 0.01 - - mov rbx, rdi
65+
- - 0.50 0.50 - - - - movabs rax, 3074457345618258598
66+
- - 0.50 0.50 - - - - cmp rdi, rax
67+
- - - - - 1.00 - - ja .LBB5_1
68+
- - 0.50 0.50 - - - - lea r14, [rbx + 2*rbx]
69+
- - 0.50 0.50 - - - - or r14, 3
70+
- - 0.50 - - 0.50 - - add r14, 9
71+
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
72+
- - - 0.50 - 0.50 - - mov esi, 4
73+
- - 0.50 0.50 - - - - mov rdi, r14
74+
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
75+
- - - - - 1.00 - - jmp .LBB5_3
9376
- - - - - - - - xor eax, eax
94-
- - - - - 1.00 - - jmp .LBB5_5
95-
- - - - 1.00 1.00 1.02 0.98 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
96-
- - 0.03 0.97 - - - - mov esi, 4
97-
- - 0.96 0.01 - 0.03 - - mov rdi, r14
98-
- - - - 1.00 1.00 0.98 1.02 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
99-
- - - 0.97 - 0.03 - - mov rdx, rbx
100-
- - 0.01 0.99 - - - - add rsp, 8
77+
- - 0.51 0.49 - - - - mov rdx, rbx
78+
- - 0.49 0.51 - - - - add rsp, 8
10179
- - - - - - 0.50 0.50 pop rbx
10280
- - - - - - 0.50 0.50 pop r14
10381
- - - - - 1.00 - - ret

benches/new_box_zeroed_with_elems_dynamic_size.x86-64

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,17 @@ bench_new_box_zeroed_with_elems_dynamic_size:
33
push rbx
44
push rax
55
mov rbx, rdi
6-
movabs rax, 4611686018427387900
6+
movabs rax, 4611686018427387901
77
cmp rdi, rax
8-
jbe .LBB5_2
9-
xor eax, eax
10-
jmp .LBB5_3
11-
.LBB5_2:
8+
ja .LBB5_1
129
lea r14, [2*rbx + 4]
1310
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
1411
mov esi, 2
1512
mov rdi, r14
1613
call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
14+
jmp .LBB5_3
15+
.LBB5_1:
16+
xor eax, eax
1717
.LBB5_3:
1818
mov rdx, rbx
1919
add rsp, 8

benches/new_box_zeroed_with_elems_dynamic_size.x86-64.mca

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@ Instruction Info:
2222
2 5 1.00 * push rbx
2323
2 5 1.00 * push rax
2424
1 1 0.33 mov rbx, rdi
25-
1 1 0.33 movabs rax, 4611686018427387900
25+
1 1 0.33 movabs rax, 4611686018427387901
2626
1 1 0.33 cmp rdi, rax
27-
1 1 1.00 jbe .LBB5_2
28-
1 0 0.25 xor eax, eax
29-
1 1 1.00 jmp .LBB5_3
27+
1 1 1.00 ja .LBB5_1
3028
1 1 0.50 lea r14, [2*rbx + 4]
3129
4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
3230
1 1 0.33 mov esi, 2
3331
1 1 0.33 mov rdi, r14
3432
4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
33+
1 1 1.00 jmp .LBB5_3
34+
1 0 0.25 xor eax, eax
3535
1 1 0.33 mov rdx, rbx
3636
1 1 0.33 add rsp, 8
3737
1 6 0.50 * pop rbx
@@ -52,26 +52,26 @@ Resources:
5252

5353
Resource pressure per iteration:
5454
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
55-
- - 3.97 3.97 5.00 5.06 4.50 4.50
55+
- - 3.97 3.98 5.00 5.05 4.50 4.50
5656

5757
Resource pressure by instruction:
5858
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
5959
- - - - 1.00 - 0.50 0.50 push r14
6060
- - - - 1.00 - 0.50 0.50 push rbx
6161
- - - - 1.00 - 0.50 0.50 push rax
62-
- - 0.94 0.05 - 0.01 - - mov rbx, rdi
63-
- - 0.05 0.95 - - - - movabs rax, 4611686018427387900
64-
- - 0.95 - - 0.05 - - cmp rdi, rax
65-
- - - - - 1.00 - - jbe .LBB5_2
66-
- - - - - - - - xor eax, eax
67-
- - - - - 1.00 - - jmp .LBB5_3
68-
- - - 1.00 - - - - lea r14, [2*rbx + 4]
62+
- - 0.05 0.94 - 0.01 - - mov rbx, rdi
63+
- - 0.94 0.06 - - - - movabs rax, 4611686018427387901
64+
- - 0.06 0.94 - - - - cmp rdi, rax
65+
- - - - - 1.00 - - ja .LBB5_1
66+
- - 0.94 0.06 - - - - lea r14, [2*rbx + 4]
6967
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
70-
- - 0.06 0.94 - - - - mov esi, 2
71-
- - 0.94 0.06 - - - - mov rdi, r14
68+
- - 0.98 0.02 - - - - mov esi, 2
69+
- - 0.02 0.94 - 0.04 - - mov rdi, r14
7270
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
73-
- - 0.05 0.95 - - - - mov rdx, rbx
74-
- - 0.98 0.02 - - - - add rsp, 8
71+
- - - - - 1.00 - - jmp .LBB5_3
72+
- - - - - - - - xor eax, eax
73+
- - 0.94 0.06 - - - - mov rdx, rbx
74+
- - 0.04 0.96 - - - - add rsp, 8
7575
- - - - - - 0.50 0.50 pop rbx
7676
- - - - - - 0.50 0.50 pop r14
7777
- - - - - 1.00 - - ret

benches/new_vec_zeroed.x86-64

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,40 @@
11
bench_new_vec_zeroed:
2+
mov rax, rdi
3+
movabs rcx, 1537228672809129301
4+
cmp rsi, rcx
5+
ja .LBB5_5
6+
test rsi, rsi
7+
je .LBB5_2
28
push r15
39
push r14
4-
push r12
510
push rbx
6-
push rax
7-
mov rbx, rdi
8-
movabs r12, 9223372036854775805
9-
mov ecx, 6
10-
mov rax, rsi
11-
mul rcx
12-
jo .LBB5_6
13-
cmp rax, r12
14-
jbe .LBB5_2
15-
.LBB5_6:
16-
add r12, 3
17-
mov qword ptr [rbx], r12
18-
jmp .LBB5_7
19-
.LBB5_2:
20-
mov r14, rsi
21-
test rax, rax
22-
je .LBB5_3
23-
mov r15, rax
11+
lea rcx, [rsi + rsi]
12+
lea rbx, [rcx + 2*rcx]
13+
mov r14, rax
14+
mov r15, rsi
2415
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
2516
mov esi, 2
26-
mov rdi, r15
17+
mov rdi, rbx
2718
call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
28-
test rax, rax
29-
jne .LBB5_5
30-
jmp .LBB5_6
31-
.LBB5_3:
32-
mov eax, 2
33-
.LBB5_5:
34-
mov qword ptr [rbx], r14
35-
mov qword ptr [rbx + 8], rax
36-
mov qword ptr [rbx + 16], r14
37-
.LBB5_7:
38-
mov rax, rbx
39-
add rsp, 8
19+
mov rsi, r15
20+
mov rcx, rax
21+
mov rax, r14
22+
test rcx, rcx
4023
pop rbx
41-
pop r12
4224
pop r14
4325
pop r15
26+
je .LBB5_5
27+
mov qword ptr [rax], rsi
28+
mov qword ptr [rax + 8], rcx
29+
mov qword ptr [rax + 16], rsi
30+
ret
31+
.LBB5_5:
32+
movabs rcx, -9223372036854775808
33+
mov qword ptr [rax], rcx
34+
ret
35+
.LBB5_2:
36+
mov ecx, 2
37+
mov qword ptr [rax], rsi
38+
mov qword ptr [rax + 8], rcx
39+
mov qword ptr [rax + 16], rsi
4440
ret

0 commit comments

Comments
 (0)