From what I have read on the web, one of the most efficient algorithms for image stabilization is to use Gray coded bit plane matching. However, I'm having trouble understanding it (Gray codes themselves are not that complex, it's the rest of it). Can anyone point me to a resource on this subject (or another good method of stabalization) that is a little below the level of most published papers? Sample code beats abstract generalized equations.
For my purpose, there will be no panning or zooming of the video and no moving objects in the frames.
You can try some simple approaches first, I've suggested some recently here: Video Stabilization with OpenCV
In your case (no pan, no zoom, static scene), phase-correlation might be already sufficient and it's quite easy (see e.g. wikipedia on that).
If I recall correctly, there are several video stabilization filters/plug-ins available for avisynth that use phase-correlation - you can try them out first.
For my needs, I've implemented a simple tool that goes the SURF/Homography route to align several images (aligns images, not videos). You might want to try that out to see if this suffices your needs as well: http://web.archive.org/web/20101004234404/http://ioctl.eu/wiki/applications/ImageAlign (Heh, I hope that code still runs...)
I wrote a simple function that stabilizes three buffers of a video frame, one for Red, Green, and Blue. It's not the fastest, but for an NTSC DVD frame (720x480) it can run at about 1.7 frames per second on an E-machines t6412. All you have to do is pick a spot in the center of the image and compare it to the last frame by accumulating how many bits in each pixel match. In a nested loop offset the search area of the most recent frame by one pixel for each loop iteration. I tried analog FIR correlation but that didn't work nearly as good as picking the alignment offset with the most matching bits. And this algorithm also makes the output video fill the screen instead of waving black bars that follow the camera shake.
#define min(a, b) (((a) < (b)) ? (a) : (b))
#define max(a, b) (((a) > (b)) ? (a) : (b))
int cubic(int x,int *v)
{
int q1,q2,q3,q4;
q1=( -1792 * v[0] + 5376 * v[1] - 5376 * v[2] + 1792 * v[3] ) >> 8;
q2=( 3840 * v[0] - 9216 * v[1] + 6912 * v[2] - 1536 * v[3] ) >> 8;
q3=( -2304 * v[0] + 2304 * v[2] ) >> 8;
q4=(v[0] + 4096 * v[1] + v[2]) >> 8;
return (((((((((q1 * x) >> 8) + q2 ) * x ) >> 8)+ q3 ) * x) >> 8) + q4 ) >> 4;
}
double get_mono_cubic_row(unsigned char * redbuf,unsigned long width, unsigned long height,signed long x,signed long y,signed int offset)
{
int m[4]={0,0,0,0};
if(x+3<width && x>=0 && y<height && y>0)
{
m[0]=redbuf[x+y*width];
m[1]=redbuf[(x+1)+y*width];
m[2]=redbuf[(x+2)+y*width];
m[3]=redbuf[(x+3)+y*width];
}
else
{
m[0]=255;
m[1]=255;
m[2]=255;
m[3]=255;
}
return cubic(offset,m);
}
unsigned char get_mono_bicubic (unsigned char *redbuf, unsigned long width,unsigned long height,double x, double y)
{
int xi=0,yi=0;
int dx=0,dy =0;
int m[4]={0.0,0.0,0.0,0.0}; /* four of one mono pixel */
xi=floor(x);
yi=floor(y);
dx=(x-xi) * 256;
dy=(y-yi) * 256;
if(yi+3<height && xi>0 && yi>0 && xi<width)
{
m[0]=get_mono_cubic_row(redbuf,width,height,xi-1,yi-1,dx);
m[1]=get_mono_cubic_row(redbuf,width,height,xi-1,yi,dx);
m[2]=get_mono_cubic_row(redbuf,width,height,xi-1,yi+1,dx);
m[3]=get_mono_cubic_row(redbuf,width,height,xi-1,yi+2,dx);
}
else
{
m[0]=255;
m[1]=255;
m[2]=255;
m[3]=255;
}
return clip(cubic(dy,m));
}
void mono_scale_exact(unsigned char *redbuf,unsigned long width, unsigned long height,unsigned long desired_width, unsigned long desired_height)
{
unsigned char *tempbuf=NULL;
double ratio_x=(desired_width * (1.0/(double)width));
double ratio_y=(desired_height * (1.0/(double)height));
unsigned long maxwidth=1;
unsigned long maxheight=1;
double u=0;
int x=0;
int y=0;
double v=0;
maxwidth=max(desired_width,width);
maxheight=max(desired_height,height);
tempbuf=(unsigned char*)malloc(maxwidth * maxheight * sizeof(unsigned char));
if(tempbuf!=NULL)
{
/* first the red */
for(y=0;y<desired_height;y++)
{
for(x=0;x<desired_width;x++)
{
u = x * (1.0/ratio_x);
v = y * (1.0/ratio_y);
tempbuf[x+y*desired_width]=get_mono_bicubic (redbuf,width,height,u,v);
}
}
for(y=0;y<desired_height;y++)
{
for(x=0;x<desired_width;x++)
{
redbuf[x+y*desired_width]=tempbuf[x+y*desired_width];
}
}
free(tempbuf);
}
}
void fatal(void)
{
exit(1);
}
#define DEBUG_STABLE 0
unsigned char digital_image_stabilization(unsigned char *redbuf, unsigned char *greenbuf, unsigned char *bluebuf,
unsigned long width, unsigned long height, unsigned short search_len,unsigned short twiddle)
{
static unsigned char *search_scratch=NULL;
static unsigned char *tempbuf=NULL;
unsigned long in_x=0;
unsigned long in_y=0;
static signed long x_adj=0;
static signed long y_adj=0;
unsigned long out_x=0;
const unsigned int mask[8]={1,2,4,8,16,32,64,128};
unsigned long out_y=0;
signed long mid_x=0;
signed long mid_y=0;
static signed long center_x=0;
static signed long center_y=0;
static signed long end_center_x=0;
static signed long end_center_y=0;
static unsigned char first=1;
int search_x=0;
int search_y=0;
unsigned long peak=0;
static int new_width=0;
static int new_height=0;
int tww_y=0;
int twp_y=0;
static unsigned long *twiddle_scratch=NULL;
if(first==1)
{
center_x=(width/2)-(search_len/2);
if(center_x<twiddle)center_x=twiddle;
center_y=(height/2)-(search_len/2);
if(center_y<twiddle)center_y=twiddle;
if((search_len+center_x)>width)fatal();
if((search_len+center_y)>height)fatal();
end_center_y=center_y+search_len;
end_center_x=center_x+search_len;
new_width=width-twiddle;
new_height=height-twiddle;
search_scratch=(unsigned char *)malloc((search_len * search_len) * sizeof(unsigned char));
tempbuf=(unsigned char *)malloc((width * height) * sizeof(unsigned char));
twiddle_scratch=(unsigned long *)malloc((twiddle * twiddle) * sizeof(unsigned long));
if(search_scratch==NULL || tempbuf==NULL || twiddle_scratch==NULL)fatal();
first=0;
}
for(search_y=0;search_y<twiddle;search_y++)
{
for(search_x=0;search_x<twiddle;search_x++)
{
twiddle_scratch[search_x+search_y]=0;
}
}
/* Multiply-accumulate */
for(mid_y=0;mid_y<twiddle;mid_y++)
{
int twp_x=0;
for(mid_x=0;mid_x<twiddle;mid_x++)
{
unsigned long acc=0;
int tw_y=0;
for(in_y=center_y;in_y<end_center_y;in_y++)
{
int tw_x=0;
for(in_x=center_x;in_x<end_center_x;in_x++)
{
unsigned long bmpptr=(in_x+mid_x)+(in_y+mid_y)*width;
unsigned int cur_gray=((((77 * redbuf[bmpptr])+(151 * greenbuf[bmpptr]) + (28 * bluebuf[bmpptr])) >> 8) & 255);
unsigned int last_gray=search_scratch[tw_x+tw_y*search_len];
acc+=(!((last_gray ^ cur_gray) & mask[0]));
acc+=(!(((last_gray ^ cur_gray) & mask[1]) >> 1));
acc+=(!(((last_gray ^ cur_gray) & mask[2]) >> 2));
acc+=(!(((last_gray ^ cur_gray) & mask[3]) >> 3));
acc+=(!(((last_gray ^ cur_gray) & mask[4]) >> 4));
acc+=(!(((last_gray ^ cur_gray) & mask[5]) >> 5));
acc+=(!(((last_gray ^ cur_gray) & mask[6]) >> 6));
acc+=(!(((last_gray ^ cur_gray) & mask[7]) >> 7));
tw_x++;
}
tw_y++;
}
//acc/=(search_len * search_len);
twiddle_scratch[twp_x+twp_y*twiddle]=acc;
twp_x++;
}
twp_y++;
}
for(search_y=0;search_y<twiddle;search_y++)
{
for(search_x=0;search_x<twiddle;search_x++)
{
if(twiddle_scratch[search_x+search_y*twiddle]>peak)
{
peak=twiddle_scratch[search_x+search_y*twiddle];
x_adj=search_x;
y_adj=search_y;
}
}
}
/* update the scratch area with the stabilized image */
tww_y=0;
for(in_y=center_y;in_y<end_center_y;in_y++)
{
int tww_x=0;
for(in_x=center_x;in_x<end_center_x;in_x++)
{
unsigned long out_bmpptr=tww_x+tww_y*search_len;
#if !DEBUG_STABLE
signed long safe_x=(in_x+x_adj);
signed long safe_y=(in_y+y_adj);
#endif
#if DEBUG_STABLE
signed long safe_x=(in_x-x_adj);
signed long safe_y=(in_y-y_adj);
#endif
unsigned long in_bmpptr=0;
unsigned char cur_gray=0;
if(safe_x<0)safe_x=0;
if(safe_y<0)safe_y=0;
in_bmpptr=safe_x+safe_y*width;
cur_gray=((77 * redbuf[in_bmpptr])+(151 * greenbuf[in_bmpptr]) + (28 * bluebuf[in_bmpptr])) >> 8;
search_scratch[out_bmpptr]=cur_gray;
tww_x++;
}
tww_y++;
}
/* move red */
for(out_y=twiddle;out_y<height;out_y++)
{
for(out_x=twiddle;out_x<width;out_x++)
{
signed long out_bmpptr=(out_x-twiddle)+(out_y-twiddle)*new_width;
#if !DEBUG_STABLE
signed long safe_x=((out_x-twiddle)+x_adj);
signed long safe_y=((out_y-twiddle)+y_adj);
#endif
#if DEBUG_STABLE
signed long safe_x=(out_x-x_adj);
signed long safe_y=(out_y-y_adj);
unsigned long bad_bmpptr=out_x+out_y*width;
#endif
signed long in_bmpptr=0;
if(safe_x<0)safe_x=0;
if(safe_y<0)safe_y=0;
if(safe_x>width)safe_x=width;
if(safe_y>height)safe_y=height;
in_bmpptr=safe_x+safe_y*width;
#if DEBUG_STABLE
tempbuf[out_bmpptr]=((safe_x>center_x-8 && safe_x<center_x+8) && (safe_y>center_y-8 && safe_y<center_y+8)) ? 255 :redbuf[bad_bmpptr];
#endif
#if !DEBUG_STABLE
tempbuf[out_bmpptr]=redbuf[in_bmpptr];
#endif
}
}
mono_scale_exact(tempbuf,new_width,new_height,width,height);
for(out_y=0;out_y<height;out_y++)
{
for(out_x=0;out_x<width;out_x++)
{
unsigned long bmpptr=out_x+out_y*width;
redbuf[bmpptr]=tempbuf[bmpptr];
}
}
/* move green */
for(out_y=twiddle;out_y<height;out_y++)
{
for(out_x=twiddle;out_x<width;out_x++)
{
signed long out_bmpptr=(out_x-twiddle)+(out_y-twiddle)*new_width;
#if !DEBUG_STABLE
signed long safe_x=((out_x-twiddle)+x_adj);
signed long safe_y=((out_y-twiddle)+y_adj);
#endif
#if DEBUG_STABLE
signed long safe_x=(out_x-x_adj);
signed long safe_y=(out_y-y_adj);
unsigned long bad_bmpptr=out_x+out_y*width;
#endif
signed long in_bmpptr=0;
if(safe_x<0)safe_x=0;
if(safe_y<0)safe_y=0;
if(safe_x>width)safe_x=width;
if(safe_y>height)safe_y=height;
in_bmpptr=safe_x+safe_y*width;
#if DEBUG_STABLE
tempbuf[out_bmpptr]=((safe_x>center_x-8 && safe_x<center_x+8) && (safe_y>center_y-8 && safe_y<center_y+8)) ? 0 :greenbuf[bad_bmpptr];
#endif
#if !DEBUG_STABLE
tempbuf[out_bmpptr]=greenbuf[in_bmpptr];
#endif
}
}
mono_scale_exact(tempbuf,new_width,new_height,width,height);
for(out_y=0;out_y<height;out_y++)
{
for(out_x=0;out_x<width;out_x++)
{
unsigned long bmpptr=out_x+out_y*width;
greenbuf[bmpptr]=tempbuf[bmpptr];
}
}
/* move blue */
for(out_y=twiddle;out_y<height;out_y++)
{
for(out_x=twiddle;out_x<width;out_x++)
{
signed long out_bmpptr=(out_x-twiddle)+(out_y-twiddle)*new_width;
#if !DEBUG_STABLE
signed long safe_x=((out_x-twiddle)+x_adj);
signed long safe_y=((out_y-twiddle)+y_adj);
#endif
#if DEBUG_STABLE
signed long safe_x=(out_x-x_adj);
signed long safe_y=(out_y-y_adj);
unsigned long bad_bmpptr=out_x+out_y*width;
#endif
signed long in_bmpptr=0;
if(safe_x<0)safe_x=0;
if(safe_y<0)safe_y=0;
if(safe_x>width)safe_x=width;
if(safe_y>height)safe_y=height;
in_bmpptr=safe_x+safe_y*width;
#if DEBUG_STABLE
tempbuf[out_bmpptr]=((safe_x>center_x-8 && safe_x<center_x+8) && (safe_y>center_y-8 && safe_y<center_y+8)) ? 255 :bluebuf[bad_bmpptr];
#endif
#if !DEBUG_STABLE
tempbuf[out_bmpptr]=bluebuf[in_bmpptr];
#endif
}
}
mono_scale_exact(tempbuf,new_width,new_height,width,height);
for(out_y=0;out_y<height;out_y++)
{
for(out_x=0;out_x<width;out_x++)
{
unsigned long bmpptr=out_x+out_y*width;
bluebuf[bmpptr]=tempbuf[bmpptr];
}
}
return (x_adj==0 && y_adj==0) ? 0 : 1;
}
To use it, fill *redbuf with the red channel of the image, *greenbuf with the green channel, *bluebuf with the blue channel and call the digital_image_stabilization function. Once the function finishes the buffers will have the stabilized image. I used 96 for the search_len and 32 for the twiddle.