setting up a CUDA 2D “unsigned char” texture for l

2019-03-16 18:59发布

I have a linear array of unsigned chars representing a 2D array. I would like to place it into a CUDA 2D texture and perform (floating point) linear interpolation on it, i.e., have the texture call fetch the 4 nearest unsigned char neighbors, internally convert them to float, interpolate between them, and return the resulting floating point value.

I am having some difficulty setting up the texture and binding it to a texture reference. I have been through the CUDA reference manual & appendices, but I'm just not having any luck.

Below is runnable code to set up and bind 1) a floating point texture and 2) an unsigned char texture. The floating point code runs just fine. However, if you uncomment the two commented unsigned char lines toward the bottom, an "invalid argument" error is thrown.

#include <cstdio>
#include <cuda_runtime.h>

typedef unsigned char uchar;

// Define (global) texture references; must use "cudaReadModeNormalizedFloat"
// for ordinal textures
texture<float, cudaTextureType2D, cudaReadModeNormalizedFloat> texRefFloat;
texture<uchar, cudaTextureType2D, cudaReadModeNormalizedFloat> texRefUChar;

// Define size of (row major) textures
size_t const WIDTH  = 1000;
size_t const HEIGHT = 1000;
size_t const TOT_PIX = WIDTH*HEIGHT;

int main(void)
{
   // Set texel formats
   cudaChannelFormatDesc descFloat = cudaCreateChannelDesc<float>();
   cudaChannelFormatDesc descUChar = cudaCreateChannelDesc<uchar>();

   // Choose to perform texture 2D linear interpolation
   texRefFloat.filterMode = cudaFilterModeLinear;
   texRefUChar.filterMode = cudaFilterModeLinear;

   // Allocate texture device memory
   float * d_buffFloat; cudaMalloc(&d_buffFloat, sizeof(float)*TOT_PIX);
   uchar * d_buffUChar; cudaMalloc(&d_buffUChar, sizeof(uchar)*TOT_PIX);

   // Bind texture references to textures
   cudaError_t errFloat = cudaSuccess;
   cudaError_t errUChar = cudaSuccess;

   errFloat = cudaBindTexture2D(0, texRefFloat, d_buffFloat, descFloat,
                  WIDTH, HEIGHT, sizeof(float)*WIDTH);
   // Uncomment the following two lines for an error
   //errUChar = cudaBindTexture2D(0, texRefUChar, d_buffUChar, descUChar,
   //               WIDTH, HEIGHT, sizeof(uchar)*WIDTH);

   // Check for errors during binding
   if (errFloat != cudaSuccess)
   {
      printf("Error binding float texture reference: %s\n",
          cudaGetErrorString(errFloat));
      exit(-1);
   }

   if (errUChar != cudaSuccess)
   {
      printf("Error binding unsigned char texture reference: %s\n",
          cudaGetErrorString(errUChar));
      exit(-1);
   }

   return 0;
}

Any help/insight would be most appreciated!

Aaron

1条回答
Ridiculous、
2楼-- · 2019-03-16 19:08

Each row of a texture must be properly aligned. This cannot be guaranteed in general if you bind the texture to a plain array (as opposed to a CUDA array). To bind plain memory to a 2D texture, you would want to allocate the memory with cudaMallocPitch(). This sets the row pitch such that it is suitable for binding to a texture. Note that it is not good practice to pass 0 as the first argument to a texture binding API call. This argument is for CUDA to return an offset to the app. If the offset is non-zero you will need to add it to the texture coordinate during texture access.

Here is a quick example that shows how to read interpolated values from a texture whose elements are unsigned char.

#include <stdlib.h>
#include <stdio.h>

// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call)                                          \
do {                                                                  \
    cudaError_t err = call;                                           \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR()                                          \
do {                                                                  \
    /* Check synchronous errors, i.e. pre-launch */                   \
    cudaError_t err = cudaGetLastError();                             \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
    /* Check asynchronous errors, i.e. kernel failed (ULF) */         \
    err = cudaThreadSynchronize();                                    \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString( err) );      \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)

texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex;

__global__ void kernel (int m, int n, float shift_x, float shift_y) 
{
    float val;
    for (int row = 0; row < m; row++) {
        for (int col = 0; col < n; col++) {
            val = tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
            printf ("%.2f  ", val);
        }
        printf ("\n");
    }
}

int main (void)
{
    int m = 4; // height = #rows
    int n = 3; // width  = #columns
    size_t pitch, tex_ofs;
    unsigned char arr[4][3]= {{11,12,13},{21,22,23},{31,32,33},{251,252,253}};
    unsigned char *arr_d = 0;

    CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
    CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
                                n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
    tex.normalized = false;
    tex.filterMode = cudaFilterModeLinear;
    CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
                                       n, m, pitch));
    if (tex_ofs !=0) {
        printf ("tex_ofs = %zu\n", tex_ofs);
        return EXIT_FAILURE;
    }
    printf ("reading array straight\n");
    kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted in x-direction\n");
    kernel<<<1,1>>>(m, n, 0.5f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted in y-direction\n");
    kernel<<<1,1>>>(m, n, 0.0f, 0.5f);
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    CUDA_SAFE_CALL (cudaFree (arr_d));
    return EXIT_SUCCESS;
}

The output of this program is as follows:

reading array straight
0.04  0.05  0.05
0.08  0.09  0.09
0.12  0.13  0.13
0.98  0.99  0.99
reading array shifted in x-direction
0.05  0.05  0.05
0.08  0.09  0.09
0.12  0.13  0.13
0.99  0.99  0.99
reading array shifted in y-direction
0.06  0.07  0.07
0.10  0.11  0.11
0.55  0.56  0.56
0.98  0.99  0.99
查看更多
登录 后发表回答