Consider the following code, in C++:
#include <cstdlib>
std::size_t count(std::size_t n)
{
std::size_t i = 0;
while (i < n) {
asm volatile("": : :"memory");
++i;
}
return i;
}
int main(int argc, char* argv[])
{
return count(argc > 1 ? std::atoll(argv[1]) : 1);
}
It is just a loop that is incrementing its value, and returns it at the end. The asm volatile
prevents the loop from being optimized away. We compile it under g++ 8.1
and clang++ 5.0
with the arguments -Wall -Wextra -std=c++11 -g -O3
.
Now, if we look at what compiler explorer is producing, we have, for g++
:
count(unsigned long):
mov rax, rdi
test rdi, rdi
je .L2
xor edx, edx
.L3:
add rdx, 1
cmp rax, rdx
jne .L3
.L2:
ret
main:
mov eax, 1
xor edx, edx
cmp edi, 1
jg .L25
.L21:
add rdx, 1
cmp rdx, rax
jb .L21
mov eax, edx
ret
.L25:
push rcx
mov rdi, QWORD PTR [rsi+8]
mov edx, 10
xor esi, esi
call strtoll
mov rdx, rax
test rax, rax
je .L11
xor edx, edx
.L12:
add rdx, 1
cmp rdx, rax
jb .L12
.L11:
mov eax, edx
pop rdx
ret
and for clang++:
count(unsigned long): # @count(unsigned long)
test rdi, rdi
je .LBB0_1
mov rax, rdi
.LBB0_3: # =>This Inner Loop Header: Depth=1
dec rax
jne .LBB0_3
mov rax, rdi
ret
.LBB0_1:
xor edi, edi
mov rax, rdi
ret
main: # @main
push rbx
cmp edi, 2
jl .LBB1_1
mov rdi, qword ptr [rsi + 8]
xor ebx, ebx
xor esi, esi
mov edx, 10
call strtoll
test rax, rax
jne .LBB1_3
mov eax, ebx
pop rbx
ret
.LBB1_1:
mov eax, 1
.LBB1_3:
mov rcx, rax
.LBB1_4: # =>This Inner Loop Header: Depth=1
dec rcx
jne .LBB1_4
mov rbx, rax
mov eax, ebx
pop rbx
ret
Understanding the code generated by g++, is not that complicated, the loop being:
.L3:
add rdx, 1
cmp rax, rdx
jne .L3
every iteration increments rdx
, and compares it to rax
that stores the size of the loop.
Now, I have no idea of what clang++ is doing. Apparently it uses dec
, which is weird to me, and I don't even understand where the actual loop is. My question is the following: what is clang doing?
(I am looking for comments about the clang assembly code to describe what is done at each step and how it actually works).