Skip to content

Commit 799dad9

Browse files
committed
[wip] experiment with reversing alignment and size validation order
gherrit-pr-id: G74bf0db1b7c30669171b5985ce24849a264746e7
1 parent 67f28a5 commit 799dad9

57 files changed

Lines changed: 1170 additions & 1187 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
bench_ref_from_bytes_dynamic_padding:
2-
test dil, 3
3-
jne .LBB5_3
42
movabs rax, 9223372036854775804
53
and rax, rsi
64
cmp rax, 9
7-
jb .LBB5_3
5+
setb cl
6+
test dil, 3
7+
setne dl
8+
or dl, cl
9+
jne .LBB5_1
810
add rax, -9
911
movabs rcx, -6148914691236517205
1012
mul rcx
1113
shr rdx
12-
lea rax, [rdx + 2*rdx]
13-
or rax, 3
14-
add rax, 9
15-
cmp rsi, rax
16-
je .LBB5_4
17-
.LBB5_3:
18-
xor edi, edi
19-
mov rdx, rsi
20-
.LBB5_4:
21-
mov rax, rdi
14+
lea rcx, [rdx + 2*rdx]
15+
or rcx, 3
16+
add rcx, 9
17+
xor eax, eax
18+
cmp rsi, rcx
19+
cmovne rdx, rsi
20+
cmove rax, rdi
21+
ret
22+
.LBB5_1:
23+
xor eax, eax
2224
ret
Lines changed: 41 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
Iterations: 100
2-
Instructions: 1900
3-
Total Cycles: 645
4-
Total uOps: 2000
2+
Instructions: 2200
3+
Total Cycles: 783
4+
Total uOps: 2500
55

66
Dispatch Width: 4
7-
uOps Per Cycle: 3.10
8-
IPC: 2.95
9-
Block RThroughput: 5.0
7+
uOps Per Cycle: 3.19
8+
IPC: 2.81
9+
Block RThroughput: 6.3
1010

1111

1212
Instruction Info:
@@ -18,24 +18,27 @@ Instruction Info:
1818
[6]: HasSideEffects (U)
1919

2020
[1] [2] [3] [4] [5] [6] Instructions:
21-
1 1 0.33 test dil, 3
22-
1 1 1.00 jne .LBB5_3
2321
1 1 0.33 movabs rax, 9223372036854775804
2422
1 1 0.33 and rax, rsi
2523
1 1 0.33 cmp rax, 9
26-
1 1 1.00 jb .LBB5_3
24+
1 1 0.50 setb cl
25+
1 1 0.33 test dil, 3
26+
1 1 0.50 setne dl
27+
1 1 0.33 or dl, cl
28+
1 1 1.00 jne .LBB5_1
2729
1 1 0.33 add rax, -9
2830
1 1 0.33 movabs rcx, -6148914691236517205
2931
2 4 1.00 mul rcx
3032
1 1 0.50 shr rdx
31-
1 1 0.50 lea rax, [rdx + 2*rdx]
32-
1 1 0.33 or rax, 3
33-
1 1 0.33 add rax, 9
34-
1 1 0.33 cmp rsi, rax
35-
1 1 1.00 je .LBB5_4
36-
1 0 0.25 xor edi, edi
37-
1 1 0.33 mov rdx, rsi
38-
1 1 0.33 mov rax, rdi
33+
1 1 0.50 lea rcx, [rdx + 2*rdx]
34+
1 1 0.33 or rcx, 3
35+
1 1 0.33 add rcx, 9
36+
1 0 0.25 xor eax, eax
37+
1 1 0.33 cmp rsi, rcx
38+
2 2 0.67 cmovne rdx, rsi
39+
2 2 0.67 cmove rax, rdi
40+
1 1 1.00 U ret
41+
1 0 0.25 xor eax, eax
3942
1 1 1.00 U ret
4043

4144

@@ -52,26 +55,29 @@ Resources:
5255

5356
Resource pressure per iteration:
5457
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
55-
- - 6.32 6.33 - 6.35 - -
58+
- - 7.65 7.67 - 7.68 - -
5659

5760
Resource pressure by instruction:
5861
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
59-
- - 0.64 0.35 - 0.01 - - test dil, 3
60-
- - - - - 1.00 - - jne .LBB5_3
61-
- - 0.34 0.65 - 0.01 - - movabs rax, 9223372036854775804
62-
- - 0.35 0.65 - - - - and rax, rsi
63-
- - 0.33 0.34 - 0.33 - - cmp rax, 9
64-
- - - - - 1.00 - - jb .LBB5_3
65-
- - 0.35 - - 0.65 - - add rax, -9
66-
- - 0.97 0.01 - 0.02 - - movabs rcx, -6148914691236517205
62+
- - - 0.99 - 0.01 - - movabs rax, 9223372036854775804
63+
- - 0.04 0.95 - 0.01 - - and rax, rsi
64+
- - 0.09 0.85 - 0.06 - - cmp rax, 9
65+
- - 0.50 - - 0.50 - - setb cl
66+
- - 0.01 0.95 - 0.04 - - test dil, 3
67+
- - 0.36 - - 0.64 - - setne dl
68+
- - 0.47 0.12 - 0.41 - - or dl, cl
69+
- - - - - 1.00 - - jne .LBB5_1
70+
- - - 0.95 - 0.05 - - add rax, -9
71+
- - - 0.81 - 0.19 - - movabs rcx, -6148914691236517205
6772
- - 1.00 1.00 - - - - mul rcx
68-
- - 0.99 - - 0.01 - - shr rdx
69-
- - 0.33 0.67 - - - - lea rax, [rdx + 2*rdx]
70-
- - 0.34 0.66 - - - - or rax, 3
71-
- - 0.33 0.66 - 0.01 - - add rax, 9
72-
- - 0.01 0.99 - - - - cmp rsi, rax
73-
- - - - - 1.00 - - je .LBB5_4
74-
- - - - - - - - xor edi, edi
75-
- - 0.32 0.01 - 0.67 - - mov rdx, rsi
76-
- - 0.02 0.34 - 0.64 - - mov rax, rdi
73+
- - 0.62 - - 0.38 - - shr rdx
74+
- - 0.62 0.38 - - - - lea rcx, [rdx + 2*rdx]
75+
- - 0.59 0.17 - 0.24 - - or rcx, 3
76+
- - 0.61 0.19 - 0.20 - - add rcx, 9
77+
- - - - - - - - xor eax, eax
78+
- - 0.75 0.24 - 0.01 - - cmp rsi, rcx
79+
- - 1.00 0.03 - 0.97 - - cmovne rdx, rsi
80+
- - 0.99 0.04 - 0.97 - - cmove rax, rdi
81+
- - - - - 1.00 - - ret
82+
- - - - - - - - xor eax, eax
7783
- - - - - 1.00 - - ret
Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
bench_ref_from_bytes_dynamic_size:
2-
mov rdx, rsi
32
cmp rsi, 4
43
setb al
5-
or al, dil
6-
test al, 1
7-
je .LBB5_2
4+
mov ecx, edi
5+
or cl, al
6+
test cl, 1
7+
jne .LBB5_1
8+
lea rcx, [rsi - 4]
9+
mov rdx, rcx
10+
shr rdx
11+
and rcx, -2
12+
add rcx, 4
813
xor eax, eax
14+
cmp rsi, rcx
15+
cmovne rdx, rsi
16+
cmove rax, rdi
917
ret
10-
.LBB5_2:
11-
lea rcx, [rdx - 4]
12-
mov rsi, rcx
13-
and rsi, -2
14-
add rsi, 4
15-
shr rcx
18+
.LBB5_1:
1619
xor eax, eax
17-
cmp rdx, rsi
18-
cmove rdx, rcx
19-
cmove rax, rdi
2020
ret
Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
Iterations: 100
22
Instructions: 1800
3-
Total Cycles: 704
3+
Total Cycles: 606
44
Total uOps: 2000
55

66
Dispatch Width: 4
7-
uOps Per Cycle: 2.84
8-
IPC: 2.56
7+
uOps Per Cycle: 3.30
8+
IPC: 2.97
99
Block RThroughput: 5.0
1010

1111

@@ -18,23 +18,23 @@ Instruction Info:
1818
[6]: HasSideEffects (U)
1919

2020
[1] [2] [3] [4] [5] [6] Instructions:
21-
1 1 0.33 mov rdx, rsi
2221
1 1 0.33 cmp rsi, 4
2322
1 1 0.50 setb al
24-
1 1 0.33 or al, dil
25-
1 1 0.33 test al, 1
26-
1 1 1.00 je .LBB5_2
23+
1 1 0.33 mov ecx, edi
24+
1 1 0.33 or cl, al
25+
1 1 0.33 test cl, 1
26+
1 1 1.00 jne .LBB5_1
27+
1 1 0.50 lea rcx, [rsi - 4]
28+
1 1 0.33 mov rdx, rcx
29+
1 1 0.50 shr rdx
30+
1 1 0.33 and rcx, -2
31+
1 1 0.33 add rcx, 4
2732
1 0 0.25 xor eax, eax
33+
1 1 0.33 cmp rsi, rcx
34+
2 2 0.67 cmovne rdx, rsi
35+
2 2 0.67 cmove rax, rdi
2836
1 1 1.00 U ret
29-
1 1 0.50 lea rcx, [rdx - 4]
30-
1 1 0.33 mov rsi, rcx
31-
1 1 0.33 and rsi, -2
32-
1 1 0.33 add rsi, 4
33-
1 1 0.50 shr rcx
3437
1 0 0.25 xor eax, eax
35-
1 1 0.33 cmp rdx, rsi
36-
2 2 0.67 cmove rdx, rcx
37-
2 2 0.67 cmove rax, rdi
3838
1 1 1.00 U ret
3939

4040

@@ -51,25 +51,25 @@ Resources:
5151

5252
Resource pressure per iteration:
5353
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
54-
- - 5.97 5.98 - 6.05 - -
54+
- - 6.00 6.00 - 6.00 - -
5555

5656
Resource pressure by instruction:
5757
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
58-
- - 0.97 0.01 - 0.02 - - mov rdx, rsi
59-
- - 0.01 0.02 - 0.97 - - cmp rsi, 4
60-
- - 0.03 - - 0.97 - - setb al
61-
- - 0.01 0.02 - 0.97 - - or al, dil
62-
- - - 0.98 - 0.02 - - test al, 1
63-
- - - - - 1.00 - - je .LBB5_2
58+
- - 0.99 - - 0.01 - - cmp rsi, 4
59+
- - 1.00 - - - - - setb al
60+
- - 0.98 0.02 - - - - mov ecx, edi
61+
- - 0.98 0.01 - 0.01 - - or cl, al
62+
- - 0.01 0.99 - - - - test cl, 1
63+
- - - - - 1.00 - - jne .LBB5_1
64+
- - 0.99 0.01 - - - - lea rcx, [rsi - 4]
65+
- - 0.02 0.98 - - - - mov rdx, rcx
66+
- - - - - 1.00 - - shr rdx
67+
- - 0.99 0.01 - - - - and rcx, -2
68+
- - - 1.00 - - - - add rcx, 4
6469
- - - - - - - - xor eax, eax
70+
- - 0.02 0.98 - - - - cmp rsi, rcx
71+
- - 0.01 1.00 - 0.99 - - cmovne rdx, rsi
72+
- - 0.01 1.00 - 0.99 - - cmove rax, rdi
6573
- - - - - 1.00 - - ret
66-
- - 0.98 0.02 - - - - lea rcx, [rdx - 4]
67-
- - 0.01 0.99 - - - - mov rsi, rcx
68-
- - - 0.98 - 0.02 - - and rsi, -2
69-
- - 0.98 0.01 - 0.01 - - add rsi, 4
70-
- - 0.99 - - 0.01 - - shr rcx
7174
- - - - - - - - xor eax, eax
72-
- - 0.02 0.97 - 0.01 - - cmp rdx, rsi
73-
- - 0.99 0.99 - 0.02 - - cmove rdx, rcx
74-
- - 0.98 0.99 - 0.03 - - cmove rax, rdi
7575
- - - - - 1.00 - - ret
Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
bench_ref_from_bytes_with_elems_dynamic_padding:
2-
movabs rax, 3074457345618258598
3-
cmp rdx, rax
4-
seta cl
2+
movabs rcx, 3074457345618258598
3+
cmp rdx, rcx
4+
ja .LBB5_3
55
mov rax, rdi
66
test al, 3
7-
setne dil
8-
or dil, cl
9-
jne .LBB5_2
7+
jne .LBB5_3
108
lea rcx, [rdx + 2*rdx]
119
or rcx, 3
1210
add rcx, 9
1311
cmp rsi, rcx
14-
je .LBB5_3
15-
.LBB5_2:
12+
jne .LBB5_3
13+
ret
14+
.LBB5_3:
1615
xor eax, eax
1716
mov rdx, rsi
18-
.LBB5_3:
1917
ret
Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
Iterations: 100
2-
Instructions: 1600
3-
Total Cycles: 539
4-
Total uOps: 1700
2+
Instructions: 1500
3+
Total Cycles: 505
4+
Total uOps: 1500
55

66
Dispatch Width: 4
7-
uOps Per Cycle: 3.15
7+
uOps Per Cycle: 2.97
88
IPC: 2.97
9-
Block RThroughput: 4.3
9+
Block RThroughput: 5.0
1010

1111

1212
Instruction Info:
@@ -18,19 +18,18 @@ Instruction Info:
1818
[6]: HasSideEffects (U)
1919

2020
[1] [2] [3] [4] [5] [6] Instructions:
21-
1 1 0.33 movabs rax, 3074457345618258598
22-
1 1 0.33 cmp rdx, rax
23-
2 2 1.00 seta cl
21+
1 1 0.33 movabs rcx, 3074457345618258598
22+
1 1 0.33 cmp rdx, rcx
23+
1 1 1.00 ja .LBB5_3
2424
1 1 0.33 mov rax, rdi
2525
1 1 0.33 test al, 3
26-
1 1 0.50 setne dil
27-
1 1 0.33 or dil, cl
28-
1 1 1.00 jne .LBB5_2
26+
1 1 1.00 jne .LBB5_3
2927
1 1 0.50 lea rcx, [rdx + 2*rdx]
3028
1 1 0.33 or rcx, 3
3129
1 1 0.33 add rcx, 9
3230
1 1 0.33 cmp rsi, rcx
33-
1 1 1.00 je .LBB5_3
31+
1 1 1.00 jne .LBB5_3
32+
1 1 1.00 U ret
3433
1 0 0.25 xor eax, eax
3534
1 1 0.33 mov rdx, rsi
3635
1 1 1.00 U ret
@@ -49,23 +48,22 @@ Resources:
4948

5049
Resource pressure per iteration:
5150
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
52-
- - 5.33 5.32 - 5.35 - -
51+
- - 4.49 4.49 - 5.02 - -
5352

5453
Resource pressure by instruction:
5554
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
56-
- - 0.01 0.98 - 0.01 - - movabs rax, 3074457345618258598
57-
- - - 1.00 - - - - cmp rdx, rax
58-
- - 1.98 - - 0.02 - - seta cl
59-
- - 0.02 0.98 - - - - mov rax, rdi
60-
- - - 0.67 - 0.33 - - test al, 3
61-
- - 0.67 - - 0.33 - - setne dil
62-
- - 0.99 - - 0.01 - - or dil, cl
63-
- - - - - 1.00 - - jne .LBB5_2
64-
- - 0.01 0.99 - - - - lea rcx, [rdx + 2*rdx]
65-
- - - 0.01 - 0.99 - - or rcx, 3
66-
- - 0.65 0.02 - 0.33 - - add rcx, 9
67-
- - 0.99 0.01 - - - - cmp rsi, rcx
68-
- - - - - 1.00 - - je .LBB5_3
55+
- - 0.97 0.02 - 0.01 - - movabs rcx, 3074457345618258598
56+
- - 0.50 0.50 - - - - cmp rdx, rcx
57+
- - - - - 1.00 - - ja .LBB5_3
58+
- - 0.50 0.50 - - - - mov rax, rdi
59+
- - 0.02 0.97 - 0.01 - - test al, 3
60+
- - - - - 1.00 - - jne .LBB5_3
61+
- - 0.97 0.03 - - - - lea rcx, [rdx + 2*rdx]
62+
- - 0.50 0.50 - - - - or rcx, 3
63+
- - 0.03 0.97 - - - - add rcx, 9
64+
- - 0.03 0.97 - - - - cmp rsi, rcx
65+
- - - - - 1.00 - - jne .LBB5_3
66+
- - - - - 1.00 - - ret
6967
- - - - - - - - xor eax, eax
70-
- - 0.01 0.66 - 0.33 - - mov rdx, rsi
68+
- - 0.97 0.03 - - - - mov rdx, rsi
7169
- - - - - 1.00 - - ret

benches/ref_from_bytes_with_elems_dynamic_size.x86-64

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
bench_ref_from_bytes_with_elems_dynamic_size:
2-
movabs rax, 4611686018427387901
3-
cmp rdx, rax
4-
seta cl
2+
movabs rcx, 4611686018427387901
3+
cmp rdx, rcx
4+
ja .LBB5_2
55
mov rax, rdi
6-
or dil, cl
7-
test dil, 1
8-
jne .LBB5_2
96
lea rcx, [2*rdx + 4]
10-
cmp rsi, rcx
7+
and edi, 1
8+
xor rcx, rsi
9+
or rcx, rdi
1110
je .LBB5_3
1211
.LBB5_2:
1312
xor eax, eax

0 commit comments

Comments
 (0)