I am trying to figure out why g++ does not generate a SIMD code.
Info GCC / OS / CPU:
$ gcc -v
gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)
$ cat /proc/cpuinfo
...
model name : Intel(R) Core(TM)2 Duo CPU P8600 @ 2.40GHz
...
and here is my C++ code:
#include <iostream>
#include <cstdlib>
//function that fills an array with random numbers
template<class T>
void fillArray(T *array, int n){
srand(1);
for (int i = 0; i < n; i++) {
array[i] = (float) (rand() % 10);
}
}
// function that computes the dotprod of two vectors (loop unrolled)
float dotCPP(float *src1, float *src2, int n){
float dest = 0;
for (int i = 0; i < n; i+=2) {
dest += (src1[i] * src2[i]) + (src1[i+1] * src2[i+1]);
}
return dest;
}
int main(int argc, char *argv[])
{
const int n = 1200000;
float *a = new float[n]; //allocate data on the heap
float something_else; //store result
fillArray<float>(a,n); //function that fills the array with random numbers
something_else = dotCPP(a, a, n); //call function and store return value
return 0;
}
I compile the code with:
makefile:
CXX = g++
CXXFLGS = -g -Wall -std=c++11 -msse2 -O3
SRC = main.o dot.o
EXEC = dot
$(EXEC): $(SRC)
$(CXX) $(CXXFLGS) $(SRC) -o $(EXEC)
main.o: dot.cpp
$(CXX) $(CXXFLGS) -c dot.cpp -o main.o
and use gdb to inspect the generated code:
$gdb dot
...
(gdb) b dotCPP
(gdb) r
...
(gdb) disass
Dump of assembler code for function dotCPP(float*, float*, int):
=> 0x08048950 <+0>: push %ebx
0x08048951 <+1>: mov 0x10(%esp),%ebx
0x08048955 <+5>: mov 0x8(%esp),%edx
0x08048959 <+9>: mov 0xc(%esp),%ecx
0x0804895d <+13>: test %ebx,%ebx
0x0804895f <+15>: jle 0x8048983 <dotCPP(float*, float*, int)+51>
0x08048961 <+17>: xor %eax,%eax
0x08048963 <+19>: fldz
0x08048965 <+21>: lea 0x0(%esi),%esi
0x08048968 <+24>: flds (%edx,%eax,4)
0x0804896b <+27>: fmuls (%ecx,%eax,4)
0x0804896e <+30>: flds 0x4(%edx,%eax,4)
0x08048972 <+34>: fmuls 0x4(%ecx,%eax,4)
0x08048976 <+38>: add $0x2,%eax
0x08048979 <+41>: cmp %eax,%ebx
0x0804897b <+43>: faddp %st,%st(1)
0x0804897d <+45>: faddp %st,%st(1)
0x0804897f <+47>: jg 0x8048968 <dotCPP(float*, float*, int)+24>
0x08048981 <+49>: pop %ebx
0x08048982 <+50>: ret
0x08048983 <+51>: fldz
0x08048985 <+53>: pop %ebx
0x08048986 <+54>: ret
End of assembler dump.
Now am I missing something or should gcc make use of the xmm registers?
I would really appreciate any suggestions that would help me understand why gcc does not generate code that uses the xmm registers.
Please let me know if you need further information on anything.