I'm using bicubic filtering to smoothen my heightmap, I implemented it in GLSL:
Bicubic interpolation: (see interpolate()
function bellow)
float interpolateBicubic(sampler2D tex, vec2 t)
{
vec2 offBot = vec2(0,-1);
vec2 offTop = vec2(0,1);
vec2 offRight = vec2(1,0);
vec2 offLeft = vec2(-1,0);
vec2 f = fract(t.xy * 1025);
vec2 bot0 = (floor(t.xy * 1025)+offBot+offLeft)/1025;
vec2 bot1 = (floor(t.xy * 1025)+offBot)/1025;
vec2 bot2 = (floor(t.xy * 1025)+offBot+offRight)/1025;
vec2 bot3 = (floor(t.xy * 1025)+offBot+2*offRight)/1025;
vec2 mbot0 = (floor(t.xy * 1025)+offLeft)/1025;
vec2 mbot1 = (floor(t.xy * 1025))/1025;
vec2 mbot2 = (floor(t.xy * 1025)+offRight)/1025;
vec2 mbot3 = (floor(t.xy * 1025)+2*offRight)/1025;
vec2 mtop0 = (floor(t.xy * 1025)+offTop+offLeft)/1025;
vec2 mtop1 = (floor(t.xy * 1025)+offTop)/1025;
vec2 mtop2 = (floor(t.xy * 1025)+offTop+offRight)/1025;
vec2 mtop3 = (floor(t.xy * 1025)+offTop+2*offRight)/1025;
vec2 top0 = (floor(t.xy * 1025)+2*offTop+offLeft)/1025;
vec2 top1 = (floor(t.xy * 1025)+2*offTop)/1025;
vec2 top2 = (floor(t.xy * 1025)+2*offTop+offRight)/1025;
vec2 top3 = (floor(t.xy * 1025)+2*offTop+2*offRight)/1025;
float h[16];
h[0] = texture(tex,bot0).r;
h[1] = texture(tex,bot1).r;
h[2] = texture(tex,bot2).r;
h[3] = texture(tex,bot3).r;
h[4] = texture(tex,mbot0).r;
h[5] = texture(tex,mbot1).r;
h[6] = texture(tex,mbot2).r;
h[7] = texture(tex,mbot3).r;
h[8] = texture(tex,mtop0).r;
h[9] = texture(tex,mtop1).r;
h[10] = texture(tex,mtop2).r;
h[11] = texture(tex,mtop3).r;
h[12] = texture(tex,top0).r;
h[13] = texture(tex,top1).r;
h[14] = texture(tex,top2).r;
h[15] = texture(tex,top3).r;
float H_ix[4];
H_ix[0] = interpolate(f.x,h[0],h[1],h[2],h[3]);
H_ix[1] = interpolate(f.x,h[4],h[5],h[6],h[7]);
H_ix[2] = interpolate(f.x,h[8],h[9],h[10],h[11]);
H_ix[3] = interpolate(f.x,h[12],h[13],h[14],h[15]);
float H_iy = interpolate(f.y,H_ix[0],H_ix[1],H_ix[2],H_ix[3]);
return H_iy;
}
This is my version of it, the texture size(1025) is still hardcoded. Using this in vertex shader and/or in tessellation evaluation shader, it affects performance very badly (20-30fps). But when I change the last line of this function to:
return 0;
the performance increases just like if I used bilinear or nearest/without filtering.
The same happens with: (I mean the performance remains good)
return h[...]; //...
return f.x; //...
return H_ix[...]; //...
The interpolation function:
float interpolate(float x, float v0, float v1, float v2,float v3)
{
double c1,c2,c3,c4; //changed to float, see EDITs
c1 = spline_matrix[0][1]*v1;
c2 = spline_matrix[1][0]*v0 + spline_matrix[1][2]*v2;
c3 = spline_matrix[2][0]*v0 + spline_matrix[2][1]*v1 + spline_matrix[2][2]*v2 + spline_matrix[2][3]*v3;
c4 = spline_matrix[3][0]*v0 + spline_matrix[3][1]*v1 + spline_matrix[3][2]*v2 + spline_matrix[3][3]*v3;
return(c4*x*x*x + c3*x*x +c2*x + c1);
};
The fps only decreases when I return the final, H_iy
value.
How does the return value affects the performance?
EDIT I've just realized that I used double
in the interpolate()
function to declare c1
, c2
...ect.
I've changed it to float
, and the performance now remains good with the proper return value.
So the question changes a bit:
How does a double
precision variable affects the performance of the hardware, and why didn't the other interpolation function trigger this performance loss, only the last one, since the H_ix[]
array was float
too, just like the H_iy
?