Passing struct to GPU with OpenCL that contains an

2020-02-29 10:20发布

问题:

I currently have some data that I would like to pass to my GPU and the multiply it by 2.

I have created a struct which can be seen here:

struct GPUPatternData
{
    cl_int nInput,nOutput,patternCount, offest;
    cl_float* patterns;
};

This struct should contain an array of floats. The array of floats I will not know untill run time as it is specified by the user.

The host code:

typedef struct GPUPatternDataContatiner
{

    int nodeInput,nodeOutput,patternCount, offest;
    float* patterns;
} GPUPatternData; 
__kernel void patternDataAddition(__global GPUPatternData* gpd,__global GPUPatternData* output)
{
    int index = get_global_id(0);
    if(index < gpd->patternCount)
    {
        output.patterns[index] = gpd.patterns[index]*2;
    }
}

Here is the Host code:

GPUPattern::GPUPatternData gpd;    
gpd.nodeInput = ptSet->getInputCount();
gpd.nodeOutput = ptSet->getOutputCount();
gpd.offest = gpd.nodeInput+gpd.nodeOutput;
gpd.patternCount = ptSet->getCount();
gpd.patterns = new cl_float [gpd.patternCount*gpd.offest];

GPUPattern::GPUPatternData gridC;
gridC.nodeInput = ptSet->getInputCount();
gridC.nodeOutput = ptSet->getOutputCount();
gridC.offest = gpd.nodeInput+gpd.nodeOutput;
gridC.patternCount = ptSet->getCount();
gridC.patterns = new cl_float [gpd.patternCount*gpd.offest];

All the data is initialized then initialized with values and then passed to the GPU

int elements = gpd.patternCount;
size_t ofsdf = sizeof(gridC);
size_t dataSize = sizeof(GPUPattern::GPUPatternData)+ (sizeof(cl_float)*elements);

cl_mem bufferA = clCreateBuffer(gpu.context,CL_MEM_READ_ONLY,dataSize,NULL,&err);
openCLErrorCheck(&err);
//Copy the buffer to the device
err = clEnqueueWriteBuffer(queue,bufferA,CL_TRUE,0,dataSize,(void*)&gpd,0,NULL,NULL);

//This buffer is being written to only
cl_mem bufferC = clCreateBuffer(gpu.context,CL_MEM_WRITE_ONLY,dataSize,NULL,&err);
openCLErrorCheck(&err);
err = clEnqueueWriteBuffer(queue,bufferC,CL_TRUE,0,dataSize,(void*)&gridC,0,NULL,NULL);

Everything is built which I check just watching the error which stays at 0

cl_program program = clCreateProgramWithSource(gpu.context,1, (const char**) &kernelSource,NULL,&err);

////Build program
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

char build[2048];
clGetProgramBuildInfo(program, gpu.device, CL_PROGRAM_BUILD_LOG, 2048, build, NULL);

////Create kernal
cl_kernel kernal = clCreateKernel(program, "patternDataAddition",&err);

////Set kernal arguments
err  = clSetKernelArg(kernal,  0, sizeof(cl_mem), &bufferA);
err |= clSetKernelArg(kernal,  1, sizeof(cl_mem), &bufferC);

It is then kicked off

size_t globalWorkSize = 1024;
size_t localWorkSize = 512;

err = clEnqueueNDRangeKernel(queue, kernal, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); 

clFinish(queue);

Its at this point it all goes wrong

err = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, dataSize, &gridC, 0, NULL, NULL);
clFinish(queue);

The error in this case is -5 (CL_OUT_OF_RESOURCES).

Also if I change the line:

err = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, dataSize, &gridC, 0, NULL, 

to:

err = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, dataSize*1000, &gridC, 0, NULL, NULL);

I get the error -30 (CL_INVALID_VALUE).

So my question is why am i getting the errors I am when reading back the buffer. Also I am not sure if I am unable to use a pointer to my float array as could this be giving me the wrong sizeof() used for datasize which gives me the wrong buffer size.

回答1:

You cannot pass a struct that contains pointers into OpenCL

http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf (Section 6.9)

You can either correct as Eric Bainville pointed out or if you are not very restrict on memory you can do something like

struct GPUPatternData
{
    cl_int nInput,nOutput,patternCount, offest;
    cl_float patterns[MAX_SIZE];
};

EDIT: OK if memory is an issue. Since you only use the patterns and patternCount you can copy the patterns from the struct and pass them to the kernel separately.

struct GPUPatternData
    {
        cl_int nInput,nOutput,patternCount, offest;
        cl_float patterns*;
    };

copy patterns to GPU from gpd and allocate space for patterns in gridC on GPU. then

You can pass the buffers separately

__kernel void patternDataAddition(int gpd_patternCount,
    __global const float * gpd_Patterns,
    __global float * gridC_Patterns) {

    int index = get_global_id(0);
    if(index < gpd_patternCount)
    {
        gridC_Patterns[index] = gpd_Patterns[index]*2;
    }
}

when you come back from the kernel copy the data back to gridC.patterns directly


One more :

You don't have to change your CPU struct. It stays the same. However this part

size_t dataSize = sizeof(GPUPattern::GPUPatternData)+ (sizeof(cl_float)*elements);

cl_mem bufferA = clCreateBuffer(gpu.context,CL_MEM_READ_ONLY,dataSize,NULL,&err);
openCLErrorCheck(&err);
//Copy the buffer to the device
err = clEnqueueWriteBuffer(queue,bufferA,CL_TRUE,0,dataSize,(void*)&gpd,0,NULL,NULL);

should be changed to something like

size_t dataSize = (sizeof(cl_float)*elements);  // HERE
float* gpd_dataPointer = gpd.patterns;    // HERE

cl_mem bufferA = clCreateBuffer(gpu.context,CL_MEM_READ_ONLY,dataSize,NULL,&err);
openCLErrorCheck(&err);

// Now use the gpd_dataPointer
err = clEnqueueWriteBuffer(queue,bufferA,CL_TRUE,0,dataSize,(void*)&(gpd_dataPointer),0,NULL,NULL);

Same thing goes for the gridC

And when you copy back, copy it to gridC_dataPointer AKA gridC.dataPointer

And then continue using the struct as if nothing happened.



回答2:

The problem is probably with the pointer inside your struct.

In this case, I would suggest to pass nInput,nOutput,patternCount,offset as kernel args, and the patterns as a buffer of float:

__kernel void patternDataAddition(int nInput,int nOutput,
    int patternCount,int offset,
    __global const float * inPatterns,
    __global float * outPatterns)


回答3:

I know that it is not actual now, but i passed this problem in other way: Your code for allocation memory for struct with data stay same, but struct should bu changed to

typedef struct GPUPatternDataContatiner
{
    int nodeInput, nodeOutput, patternCount, offest;
    float patterns[0];
} GPUPatternData;

Using this "feature" i have created vectors for OpenCL



标签: c arrays opencl