I need to calculate euclidean distance between two points in the fastest way possible. In C.
My code is this and seems a little bit slow:
float distance(int py, int px, int jy, int jx){
return sqrtf((float)((px)*(px)+(py)*(py)));
}
Thanks in advance.
EDIT:
I'm sorry I hadn't been clear. I'd better specify the context: I'm working with images and I need the euclidean distance from each pixel to all the other pixels. So I have to calculate it a lot of times. I can't use the square of the distance. I'll add a little bit more code to be more clear:
for (jy=0; jy<sizeY; jy++) {
for (jx=0; jx<sizeX; jx++) {
if (jx==px && jy==py) {
;
}
else{
num+=rfun(imgI[py][px].red-imgI[jy][jx].red)/distance(py, px, jy, jx);
den+=RMAX/distance(py, px, jy, jx);
}
}
}
float distance(int py, int px, int jy, int jx){
return sqrtf((float)((px-jx)*(px-jx)+(py-jy)*(py-jy)));
}
That's what I have t do. And I have to do it with all the pixels (px, py)
EDIT2: I'm sorry i wasn't clear but I tried to keep the question as general as I could. I'm writing a program to process images with an algorithm. The big problem is the time because I have to do it really really fast. Now what I need to optimize is this function: `float normC(int py, int px, int color, pixel** imgI, int sizeY, int sizeX){
int jx, jy;
float num=0, den=0;
if (color==R) {
for (jy=0; jy<sizeY; jy++) {
for (jx=0; jx<sizeX; jx++) {
if (jx==px && jy==py) {
;
}
else{
num+=rfun(imgI[py][px].red-imgI[jy][jx].red)/distance(py, px, jy, jx);
den+=RMAX/distance(py, px, jy, jx);
}
}
}
}
else if (color==B){
for (jy=0; jy<sizeY; jy++) {
for (jx=0; jx<sizeX; jx++) {
if (jx==px && jy==py) {
;
}
else{
num+=rfun(imgI[py][px].blue-imgI[jy][jx].blue)/distance(py, px, jy, jx);
den+=RMAX/distance(py, px, jy, jx);
}
}
}
}
else if (color==G){
for (jy=0; jy<sizeY; jy++) {
for (jx=0; jx<sizeX; jx++) {
if (jx==px && jy==py) {
;
}
else{
num+=rfun(imgI[py][px].green-imgI[jy][jx].green)/distance(py, px, jy, jx);
den+=RMAX/distance(py, px, jy, jx);
}
}
}
}
return num/den;
} `
This function is called in a loop for every pixel(px; py) so it's called a lot of times and it takes al lot of time to compute this.
The rfun
function is not optimizable, because is already really simple and fast.
what I need to do is to make faster the distance function.
1)I tried hypotf
but it was slower than the distance
function
2)I increased the optimization settings of the compiler and that made the process 2x faster!
3) I tried with a macro #define DIST(x, y) sqrtf((float)((x)*(x)+(y)*(y)))
but nothing changed (as I expected)
EDIT3:
In the end I found that the fastest way was to calculate all the possible distances and store them in an array before starting computing in a loop the normC function. To make it faster I calculated the inverses of the distances so that I could use the product and not the quotient:
float** DIST
DIST=malloc(500*sizeof(float*)); //allocating memory for the 2d array
for (i=0; i<500; i++) {
DIST[i]=malloc(500*sizeof(float));
}
for(i=0; i<500; i++){ //storing the inverses of the distances
for (p=0; p<500; p++) {
DIST[i][p]= 1.0/sqrtf((float)((i)*(i)+(p)*(p)));
}
}
float normC(int py, int px, int color, pixel** imgI, int sizeY, int sizeX){
int jx=0, jy=0;
float num=0, den=0;
if (color==R) {
for (jy=0; jy<sizeY; jy++) {
for (jx=0; jx<sizeX; jx++) {
if (jx==px && jy==py) {
;
}
else if (py>=jy && px>=jx){
num+=rfun(imgI[py][px].red-imgI[jy][jx].red)*DIST[py-jy][px-jx];
den+=DIST[py-jy][px-jx];
}
else if (jy>py && px>=jx){
num+=rfun(imgI[py][px].red-imgI[jy][jx].red)*DIST[jy-py][px-jx];
den+=DIST[jy-py][px-jx];
}
else if (jy>py && jx>px){
num+=rfun(imgI[py][px].red-imgI[jy][jx].red)*DIST[jy-py][jx-px];
den+=DIST[jy-py][jx-px];
}
}
}
}
else if (color==B){
for (jy=0; jy<sizeY; jy++) {
for (jx=0; jx<sizeX; jx++) {
if (jx==px && jy==py) {
;
}
else if (py>=jy && px>=jx){
num+=rfun(imgI[py][px].blue-imgI[jy][jx].blue)*DIST[py-jy][px-jx];
den+=DIST[py-jy][px-jx];
}
else if (jy>py && px>=jx){
num+=rfun(imgI[py][px].blue-imgI[jy][jx].blue)*DIST[jy-py][px-jx];
den+=DIST[jy-py][px-jx];
}
else if (jy>py && jx>px){
num+=rfun(imgI[py][px].blue-imgI[jy][jx].blue)*DIST[jy-py][jx-px];
den+=DIST[jy-py][jx-px];
}
}
}
}
else if (color==G){
for (jy=0; jy<sizeY; jy++) {
for (jx=0; jx<sizeX; jx++) {
if (jx==px && jy==py) {
;
}
else if (py>=jy && px>=jx){
num+=rfun(imgI[py][px].green-imgI[jy][jx].green)*DIST[py-jy][px-jx];
den+=DIST[py-jy][px-jx];
}
else if (jy>py && px>=jx){
num+=rfun(imgI[py][px].green-imgI[jy][jx].green)*DIST[jy-py][px-jx];
den+=DIST[jy-py][px-jx];
}
else if (jy>py && jx>px){
num+=rfun(imgI[py][px].green-imgI[jy][jx].green)*DIST[jy-py][jx-px];
den+=DIST[jy-py][jx-px];
}
}
}
}
return num/(den*RMAX);
}