Compile and build .cl file using NVIDIA's nvcc

Is it possible to compile .cl file using NVIDIA's nvcc compiler?? I am trying to set up visual studio 2010 to code Opencl under CUDA platform. But when I select CUDA C/C++ Compiler to compile and build .cl file, it gives me errors like nvcc does not exist. What is the issue?

标签： opencl gpgpu nvidia gpu-programming

1条回答

来，给爷笑一个

2楼-- · 2019-05-19 12:45

You should be able to use nvcc to compile OpenCL codes. Normally, I would suggest using a filename extension of .c for a C-compliant code, and .cpp for a C++ compliant code(*), however nvcc has filename extension override options (-x ...) so that we can modify the behavior. Here is a worked example using CUDA 8.0.61, RHEL 7, Tesla K20x:

$ cat t4.cpp
#include <CL/opencl.h>
#include <stdint.h>
#include <stdio.h>
#include <inttypes.h>
#include <stdlib.h>

const char source[] =
"__kernel void test_rotate(__global ulong *d_count, ulong loops, ulong patt)"
"{"
"  ulong n = patt;"
"  for (ulong i = 0; i<loops; i++)"
"    n &= (107 << (patt+(i%7)));"
"  d_count[0] = n + loops;"
"}"
;

int main(int argc, char *argv[])
{
  cl_platform_id platform;
  cl_device_id device;
  cl_context context;
  cl_command_queue queue1, queue2;
  cl_program program;
  cl_mem mem1, mem2;
  cl_kernel kernel;

  bool two_kernels = false;
  unsigned long long loops = 1000;
  if (argc > 1) loops *= atoi(argv[1]);
  if (argc > 2) two_kernels = true;
  if (two_kernels) printf("running two kernels\n");
  else printf("running one kernel\n");
  printf("running  %lu loops\n", loops);
  unsigned long long pattern = 1;
  clGetPlatformIDs(1, &platform, NULL);
  clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
  context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
  queue1 = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, NULL);
  queue2 = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, NULL);

  const char *sources[1] = {source};
  program = clCreateProgramWithSource(context, 1, sources, NULL, NULL);
  clBuildProgram(program, 1, &device, NULL, NULL, NULL);
  mem1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 1*sizeof(cl_ulong), NULL, NULL);
  mem2 = clCreateBuffer(context, CL_MEM_READ_WRITE, 1*sizeof(cl_ulong), NULL, NULL);
  kernel = clCreateKernel(program, "test_rotate", NULL);
  const size_t work_size[1] = {1};
  clSetKernelArg(kernel, 0, sizeof(mem1), &mem1);
  clSetKernelArg(kernel, 1, sizeof(loops), &loops);
  clSetKernelArg(kernel, 2, sizeof(pattern), &pattern);

  clEnqueueNDRangeKernel(queue1, kernel, 1, NULL, work_size, work_size, 0, NULL, NULL);
  if (two_kernels){
    clSetKernelArg(kernel, 0, sizeof(mem2), &mem2);
    clSetKernelArg(kernel, 1, sizeof(loops), &loops);
    clSetKernelArg(kernel, 2, sizeof(pattern), &pattern);

    clEnqueueNDRangeKernel(queue2, kernel, 1, NULL, work_size, work_size, 0, NULL, NULL);
    }
  cl_ulong *buf1 = (cl_ulong *)clEnqueueMapBuffer(queue1, mem1, true, CL_MAP_READ, 0, 1*sizeof(cl_ulong), 0, NULL, NULL, NULL);
  cl_ulong *buf2 = (cl_ulong *)clEnqueueMapBuffer(queue2, mem2, true, CL_MAP_READ, 0, 1*sizeof(cl_ulong), 0, NULL, NULL, NULL);
  printf("result1: %lu\n", buf1[0]);
  printf("result2: %lu\n", buf2[0]);
  clEnqueueUnmapMemObject(queue1, mem1, buf1, 0, NULL, NULL);
  clEnqueueUnmapMemObject(queue2, mem2, buf2, 0, NULL, NULL);
  return 0;
}
$ nvcc -arch=sm_35 -o t4 t4.cpp -lOpenCL
$ ./t4
running one kernel
running  1000 loops
result1: 1000
result2: 0
$ cp t4.cpp t4.cl
$ nvcc -arch=sm_35 -x cu -o t4 t4.cl -lOpenCL
$ ./t4
running one kernel
running  1000 loops
result1: 1000
result2: 0
$

Note that the code here doesn't do anything sensible or significant, so I'd prefer to avoid questions. It's just for demonstration of compilation of a C++ compliant OpenCL code.

(*)(Because such files could also be readily processed by an ordinary host compiler, e.g. gnu compilers, with appropriate switches for include and link options.)

0人赞添加讨论(0) 举报

Compile and build .cl file using NVIDIA's nvcc

采纳回答

编辑标签

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

付费偷看金额在0.1-10元之间