11Iterations: 100
2- Instructions: 1900
3- Total Cycles: 645
4- Total uOps: 2000
2+ Instructions: 2200
3+ Total Cycles: 783
4+ Total uOps: 2500
55
66Dispatch Width: 4
7- uOps Per Cycle: 3.10
8- IPC: 2.95
9- Block RThroughput: 5.0
7+ uOps Per Cycle: 3.19
8+ IPC: 2.81
9+ Block RThroughput: 6.3
1010
1111
1212Instruction Info:
@@ -18,24 +18,27 @@ Instruction Info:
1818[6]: HasSideEffects (U)
1919
2020[1] [2] [3] [4] [5] [6] Instructions:
21- 1 1 0.33 test dil, 3
22- 1 1 1.00 jne .LBB5_3
2321 1 1 0.33 movabs rax, 9223372036854775804
2422 1 1 0.33 and rax, rsi
2523 1 1 0.33 cmp rax, 9
26- 1 1 1.00 jb .LBB5_3
24+ 1 1 0.50 setb cl
25+ 1 1 0.33 test dil, 3
26+ 1 1 0.50 setne dl
27+ 1 1 0.33 or dl, cl
28+ 1 1 1.00 jne .LBB5_1
2729 1 1 0.33 add rax, -9
2830 1 1 0.33 movabs rcx, -6148914691236517205
2931 2 4 1.00 mul rcx
3032 1 1 0.50 shr rdx
31- 1 1 0.50 lea rax, [rdx + 2*rdx]
32- 1 1 0.33 or rax, 3
33- 1 1 0.33 add rax, 9
34- 1 1 0.33 cmp rsi, rax
35- 1 1 1.00 je .LBB5_4
36- 1 0 0.25 xor edi, edi
37- 1 1 0.33 mov rdx, rsi
38- 1 1 0.33 mov rax, rdi
33+ 1 1 0.50 lea rcx, [rdx + 2*rdx]
34+ 1 1 0.33 or rcx, 3
35+ 1 1 0.33 add rcx, 9
36+ 1 0 0.25 xor eax, eax
37+ 1 1 0.33 cmp rsi, rcx
38+ 2 2 0.67 cmovne rdx, rsi
39+ 2 2 0.67 cmove rax, rdi
40+ 1 1 1.00 U ret
41+ 1 0 0.25 xor eax, eax
3942 1 1 1.00 U ret
4043
4144
@@ -52,26 +55,29 @@ Resources:
5255
5356Resource pressure per iteration:
5457[0] [1] [2] [3] [4] [5] [6.0] [6.1]
55- - - 6.32 6.33 - 6.35 - -
58+ - - 7.65 7.67 - 7.68 - -
5659
5760Resource pressure by instruction:
5861[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
59- - - 0.64 0.35 - 0.01 - - test dil, 3
60- - - - - - 1.00 - - jne .LBB5_3
61- - - 0.34 0.65 - 0.01 - - movabs rax, 9223372036854775804
62- - - 0.35 0.65 - - - - and rax, rsi
63- - - 0.33 0.34 - 0.33 - - cmp rax, 9
64- - - - - - 1.00 - - jb .LBB5_3
65- - - 0.35 - - 0.65 - - add rax, -9
66- - - 0.97 0.01 - 0.02 - - movabs rcx, -6148914691236517205
62+ - - - 0.99 - 0.01 - - movabs rax, 9223372036854775804
63+ - - 0.04 0.95 - 0.01 - - and rax, rsi
64+ - - 0.09 0.85 - 0.06 - - cmp rax, 9
65+ - - 0.50 - - 0.50 - - setb cl
66+ - - 0.01 0.95 - 0.04 - - test dil, 3
67+ - - 0.36 - - 0.64 - - setne dl
68+ - - 0.47 0.12 - 0.41 - - or dl, cl
69+ - - - - - 1.00 - - jne .LBB5_1
70+ - - - 0.95 - 0.05 - - add rax, -9
71+ - - - 0.81 - 0.19 - - movabs rcx, -6148914691236517205
6772 - - 1.00 1.00 - - - - mul rcx
68- - - 0.99 - - 0.01 - - shr rdx
69- - - 0.33 0.67 - - - - lea rax, [rdx + 2*rdx]
70- - - 0.34 0.66 - - - - or rax, 3
71- - - 0.33 0.66 - 0.01 - - add rax, 9
72- - - 0.01 0.99 - - - - cmp rsi, rax
73- - - - - - 1.00 - - je .LBB5_4
74- - - - - - - - - xor edi, edi
75- - - 0.32 0.01 - 0.67 - - mov rdx, rsi
76- - - 0.02 0.34 - 0.64 - - mov rax, rdi
73+ - - 0.62 - - 0.38 - - shr rdx
74+ - - 0.62 0.38 - - - - lea rcx, [rdx + 2*rdx]
75+ - - 0.59 0.17 - 0.24 - - or rcx, 3
76+ - - 0.61 0.19 - 0.20 - - add rcx, 9
77+ - - - - - - - - xor eax, eax
78+ - - 0.75 0.24 - 0.01 - - cmp rsi, rcx
79+ - - 1.00 0.03 - 0.97 - - cmovne rdx, rsi
80+ - - 0.99 0.04 - 0.97 - - cmove rax, rdi
81+ - - - - - 1.00 - - ret
82+ - - - - - - - - xor eax, eax
7783 - - - - - 1.00 - - ret
0 commit comments