colored image to greyscale image using CUDA parall

2020-05-24 06:47发布

I am trying to solve a problem in which i am supposed to change a colour image to a greyscale image. For this purpose i am using CUDA parallel approach.

The kerne code i am invoking on the GPU is as follows.

__global__
void rgba_to_greyscale(const uchar4* const rgbaImage,
                   unsigned char* const greyImage,
                   int numRows, int numCols)
{
    int absolute_image_position_x = blockIdx.x;  
    int absolute_image_position_y = blockIdx.y;

  if ( absolute_image_position_x >= numCols ||
   absolute_image_position_y >= numRows )
 {
     return;
 }
uchar4 rgba = rgbaImage[absolute_image_position_x + absolute_image_position_y];
float channelSum = .299f * rgba.x + .587f * rgba.y + .114f * rgba.z;
greyImage[absolute_image_position_x + absolute_image_position_y] = channelSum;

}

void your_rgba_to_greyscale(const uchar4 * const h_rgbaImage,
                            uchar4 * const d_rgbaImage,
                            unsigned char* const d_greyImage,
                            size_t numRows,
                            size_t numCols)
{
  //You must fill in the correct sizes for the blockSize and gridSize
  //currently only one block with one thread is being launched
  const dim3 blockSize(numCols/32, numCols/32 , 1);  //TODO
  const dim3 gridSize(numRows/12, numRows/12 , 1);  //TODO
  rgba_to_greyscale<<<gridSize, blockSize>>>(d_rgbaImage,
                                             d_greyImage,
                                             numRows,
                                             numCols);

  cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
}


i see a line of dots in the first pixel line.

error i am getting is

libdc1394 error: Failed to initialize libdc1394
Difference at pos 51 exceeds tolerance of 5
Reference: 255
GPU : 0
my input/output images Can anyone help me with this??? thanks in advance.

12条回答
啃猪蹄的小仙女
2楼-- · 2020-05-24 07:22

You are running following number of block and grids:

  const dim3 blockSize(numCols/32, numCols/32 , 1);  //TODO
  const dim3 gridSize(numRows/12, numRows/12 , 1);  //TODO

yet you are not using any threads in your kernel code!

 int absolute_image_position_x = blockIdx.x;  
 int absolute_image_position_y = blockIdx.y;

think this way, the width of an image can be divide into absolute_image_position_x parts of column and the height of an image can be divide into absolute_image_position_y parts of row. Now the box each of the cross section it creates you need to change/redraw all the pixels in terms of greyImage, parallely. Enough spoiler for an assignment :)

查看更多
我欲成王,谁敢阻挡
3楼-- · 2020-05-24 07:24

the calculation of absolute x & y image positions is perfect. but when u need to access that particular pixel in the coloured image , shouldn't you u use the following code??

uchar4 rgba = rgbaImage[absolute_image_position_x + (absolute_image_position_y * numCols)];

I thought so, when comparing it to a code you'd write to execute the same problem in serial code. Please let me know :)

查看更多
等我变得足够好
4楼-- · 2020-05-24 07:27

same code with with ability to handle non-standard input size images

int idx=blockDim.x*blockIdx.x+threadIdx.x;
int idy=blockDim.y*blockIdx.y+threadIdx.y;

uchar4 rgbcell=rgbaImage[idx*numCols+idy];

   greyImage[idx*numCols+idy]=0.299*rgbcell.x+0.587*rgbcell.y+0.114*rgbcell.z;


  }

  void your_rgba_to_greyscale(const uchar4 * const h_rgbaImage, uchar4 * const d_rgbaImage,
                        unsigned char* const d_greyImage, size_t numRows, size_t numCols)
 {
 //You must fill in the correct sizes for the blockSize and gridSize
 //currently only one block with one thread is being launched

int totalpixels=numRows*numCols;
int factors[]={2,4,8,16,24,32};
vector<int> numbers(factors,factors+sizeof(factors)/sizeof(int));
int factor=1;

   while(!numbers.empty())
  {
 if(totalpixels%numbers.back()==0)
 {
     factor=numbers.back();
     break;
 }
   else
   {
  numbers.pop_back();
   }
 }



 const dim3 blockSize(factor, factor, 1);  //TODO
 const dim3 gridSize(numRows/factor+1, numCols/factor+1,1);  //TODO
 rgba_to_greyscale<<<gridSize, blockSize>>>(d_rgbaImage, d_greyImage,    numRows, numCols);
查看更多
甜甜的少女心
5楼-- · 2020-05-24 07:29

I recently joined this course and tried your solution but it don't work so, i tried my own. You are almost correct. The correct solution is this:

__global__`
void rgba_to_greyscale(const uchar4* const rgbaImage,
               unsigned char* const greyImage,
               int numRows, int numCols)
{`

int pos_x = (blockIdx.x * blockDim.x) + threadIdx.x;
int pos_y = (blockIdx.y * blockDim.y) + threadIdx.y;
if(pos_x >= numCols || pos_y >= numRows)
    return;

uchar4 rgba = rgbaImage[pos_x + pos_y * numCols];
greyImage[pos_x + pos_y * numCols] = (.299f * rgba.x + .587f * rgba.y + .114f * rgba.z); 

}

The rest is same as your code.

查看更多
虎瘦雄心在
6楼-- · 2020-05-24 07:32

You still should have a problem with run time - the conversion will not give a proper result.

The lines:

  1. uchar4 rgba = rgbaImage[absolute_image_position_x + absolute_image_position_y];
  2. greyImage[absolute_image_position_x + absolute_image_position_y] = channelSum;

should be changed to:

  1. uchar4 rgba = rgbaImage[absolute_image_position_x + absolute_image_position_y*numCols];
  2. greyImage[absolute_image_position_x + absolute_image_position_y*numCols] = channelSum;
查看更多
倾城 Initia
7楼-- · 2020-05-24 07:33
__global__
void rgba_to_greyscale(const uchar4* const rgbaImage,
                       unsigned char* const greyImage,
                       int numRows, int numCols)
{
    int rgba_x = blockIdx.x * blockDim.x + threadIdx.x;
    int rgba_y = blockIdx.y * blockDim.y + threadIdx.y;
    int pixel_pos = rgba_x+rgba_y*numCols;

    uchar4 rgba = rgbaImage[pixel_pos];
    unsigned char gray = (unsigned char)(0.299f * rgba.x + 0.587f * rgba.y + 0.114f * rgba.z);
    greyImage[pixel_pos] = gray;
}

void your_rgba_to_greyscale(const uchar4 * const h_rgbaImage, uchar4 * const d_rgbaImage,
                            unsigned char* const d_greyImage, size_t numRows, size_t numCols)
{
    //You must fill in the correct sizes for the blockSize and gridSize
    //currently only one block with one thread is being launched
    const dim3 blockSize(24, 24, 1);  //TODO
    const dim3 gridSize( numCols/24+1, numRows/24+1, 1);  //TODO
    rgba_to_greyscale<<<gridSize, blockSize>>>(d_rgbaImage, d_greyImage, numRows, numCols);

    cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
}
查看更多
登录 后发表回答