I wrote this code for Matrix multiplication in SIMD which i was able to compile in Visual Studio, but now I need to compile it in Ubuntu using gcc/g++.
Which commands should I use to compile this? Do I need to make any changes to the code itself?
#include <stdio.h>
#include <stdlib.h>
#include <xmmintrin.h>
#include <iostream>
#include <conio.h>
#include <math.h>
#include <ctime>
using namespace std;
#define MAX_NUM 1000
#define MAX_DIM 252
int main()
{
int l = MAX_DIM, m = MAX_DIM, n = MAX_DIM;
__declspec(align(16)) float a[MAX_DIM][MAX_DIM], b[MAX_DIM][MAX_DIM],c[MAX_DIM][MAX_DIM],d[MAX_DIM][MAX_DIM];
srand((unsigned)time(0));
for(int i = 0; i < l; ++i)
{
for(int j = 0; j < m; ++j)
{
a[i][j] = rand()%MAX_NUM;
}
}
for(int i = 0; i < m; ++i)
{
for(int j = 0; j < n; ++j)
{
b[i][j] = rand()%MAX_NUM;
}
}
clock_t Time1 = clock();
for(int i = 0; i < m; ++i)
{
for(int j = 0; j < n; ++j)
{
d[i][j] = b[j][i];
}
}
for(int i = 0; i < l; ++i)
{
for(int j = 0; j < n; ++j)
{
__m128 *m3 = (__m128*)a[i];
__m128 *m4 = (__m128*)d[j];
float* res;
c[i][j] = 0;
for(int k = 0; k < m; k += 4)
{
__m128 m5 = _mm_mul_ps(*m3,*m4);
res = (float*)&m5;
c[i][j] += res[0]+res[1]+res[2]+res[3];
m3++;
m4++;
}
}
//cout<<endl;
}
clock_t Time2 = clock();
double TotalTime = ((double)Time2 - (double)Time1)/CLOCKS_PER_SEC;
cout<<"Time taken by SIMD implmentation is "<<TotalTime<<"s\n";
Time1 = clock();
for(int i = 0; i < l; ++i)
{
for(int j = 0; j < n; ++j)
{
c[i][j] = 0;
for(int k = 0; k < m; k += 4)
{
c[i][j] += a[i][k] * b[k][j];
c[i][j] += a[i][k+1] * b[k+1][j];
c[i][j] += a[i][k+2] * b[k+2][j];
c[i][j] += a[i][k+3] * b[k+3][j];
}
}
}
Time2 = clock();
TotalTime = ((double)Time2 - (double)Time1)/CLOCKS_PER_SEC;
cout<<"Time taken by normal implmentation is "<<TotalTime<<"s\n";
getch();
return 0;
}
You need to enable SSE, e.g.
You will also need to change:
which is Windows-specific, to the more portable: