CUDA 5.0 separate compilation of library with cmak

2020-02-03 07:49发布

问题:

The buildtime of my cuda library is increasing and so I thought that separate compilation introduced in CUDA 5.0 might help me. I couldn't figure out how to achieve separate compilation with cmake. I looked into the NVCC documentation and found how to compile device object (using the -dc option) and how to link them (using the -dlink). My attempts to get it running using cmake failed. I'm using cmake 2.8.10.2 and the head of the trunk of the FindCUDA.cmake. I couldn't however find out how to specify which files should be compiled and how to link them into a library. Especially the syntax of the function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file_var cuda_target options object_files source_files) is unclear to me because I don't know what the output_file_var and the cuda_target are. Here the not working results of my attemps:

cuda_compile(DEVICEMANAGER_O devicemanager.cu OPTIONS -dc)
cuda_compile(BLUB_O blub.cu OPTIONS -dc)
CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS(TEST_O gpuacceleration
                                          ""  DEVICEMANGER_O BLUB_O)
set(LIB_TYPE SHARED)
#cuda_add_library(gpuacceleration ${LIB_TYPE} 
  #${gpuacc_SRCS} 
  #devicemanager.cu
  # blub.cu
  #DEVICEMANAGER_O
#  TEST_O
#)

Does anyone know how to compile and link a cuda library using cmake? Thanks in advance.

EDIT: After a friend consulted the developer of the FindCUDA.cmake, a bug got fixed in the example provided with FindCUDA.cmake (https://gforge.sci.utah.edu/gf/project/findcuda/scmsvn/?action=browse&path=%2Fcheckout%2Ftrunk%2FFindCuda.html). I'm now able to build the example. In my project I can build the library as needed using the following (cmake 2.8.10 required):

set(LIB_TYPE SHARED)
set(CUDA_SEPARABLE_COMPILATION ON)
cuda_add_library(gpuacceleration ${LIB_TYPE} 
 blub.cu
 blab.cu
)

BUT: I cannot link against this library. When I builded the lib without separate compilation i was able to link against it. Now getting the following error:

 undefined reference to `__cudaRegisterLinkedBinary_53_tmpxft_00005ab4_00000000_6_blub_cpp1_ii_d07d5695'

for every file with a function used in the interface. Seems strange since it builds without any warning etc. Any ideas how to get this working?

EDIT: I finally figured out how to do this. See @PHD's and my answer for details.

回答1:

I finally got it running ;)

In Addition to the answer of @PHD and my comment on it I modified: set(BUILD_SHARED_LIBS OFF) in my CMakeLists.txt since shared libs are not supported for separate compilation according to the nvcc manually v5.0 page 40.

In addition to that use the latest rev (1223) from the repository instead of rev 1221. I contacted the developer and he fixed some issue blocking this. This revision doesn't set the nvcc -arch=sm_xx flag correctly, so I added this manually for my project and informed the developer of FindCUDA.cmake. So this might get fixed in the future.

Don't forget to get cmake > 2.8.10 for this to work.

Hope this helps anyone but me ;)

Here is my CMakeLists.txt:

#Required for CUDA-Check
cmake_minimum_required(VERSION 2.8.10)

project(gpulib)

set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake/cuda" ${CMAKE_MODULE_PATH})
# ============================================
# === Target
# ============================================
file(GLOB_RECURSE gpuacc_SRCS "*.cu")
include_directories(.)

# ---------------------------------------
# Find Cuda
find_package(CUDA REQUIRED)

set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON)

set(BUILD_SHARED_LIBS OFF)

set(CUDA_SEPARABLE_COMPILATION ON)
#list(APPEND CUDA_NVCC_FLAGS -arch=sm_20)

set(LIB_NAME "gpuacceleration")
cuda_add_library(${LIB_NAME}
  ${gpuacc_SRCS} 
  OPTIONS -DSTUFF="blah blah"
  RELEASE -DNDEBUG
  DEBUG -g -DDEBUG
)

set(PUBLIC_HEADERS "myheader1.h;myheader2.h")

INSTALL(FILES ${PUBLIC_HEADERS} DESTINATION include)
INSTALL(FILES "${CMAKE_BINARY_DIR}/src/lib${LIB_NAME}.a" DESTINATION lib)

EDIT: this is not working! The problem is that there are undefined references to all cuda functions (eg. cudaMalloc) when linking the generated library when building a executable in the main project.

Still working on it



回答2:

EDIT (2016-03-15): Yes, it is confirmed as a bug in FindCUDA: https://cmake.org/Bug/view.php?id=15157


TL;DR: This seems to be a bug in FindCUDA, which makes objects loose info on external definitions before the final linking.

The problem is that, even if separable compilation is enabled, a linking step is still performed for all the targets individually before the final linking.

For instance, I have module.cu with:

#include "module.h"
#include <cstdio>

double arr[10] = {1,2,3,4,5,6,7,8,9,10};
__constant__ double carr[10];

void init_carr() {
  cudaMemcpyToSymbol(carr,arr,10*sizeof(double));
}

__global__ void pkernel() {
  printf("(pkernel) carr[%d]=%g\n",threadIdx.x,carr[threadIdx.x]);
}

void print_carr() {
  printf("in print_carr\n");
  pkernel<<<1,10>>>();
}

and module.h with:

extern __constant__ double carr[10];
extern double arr[10];

void print_carr();
void init_carr();

and finally main.cu with:

#include "module.h"

#include <cstdio>

__global__ void kernel() {
  printf("(kernel) carr[%d]=%g\n",threadIdx.x,carr[threadIdx.x]);
}


int main(int argc, char *argv[]) {
  printf("arr: %g %g %g ..\n",arr[0],arr[1],arr[2]);

  kernel<<<1,10>>>();
  cudaDeviceSynchronize();
  print_carr();
  cudaDeviceSynchronize();
  init_carr();
  cudaDeviceSynchronize();
  kernel<<<1,10>>>();
  cudaDeviceSynchronize();
  print_carr();
  cudaDeviceSynchronize();

  return 0;
}

This then works fine with the following Makefile:

NVCC=nvcc
NVCCFLAGS=-arch=sm_20
LIB=libmodule.a
OBJS=module.o main.o
PROG=extern

$(PROG): main.o libmodule.a
    $(NVCC) $(NVCCFLAGS) -o $@ $^

%.o: %.cu
    $(NVCC) $(NVCCFLAGS) -dc -c -o $@ $^

$(LIB): module.o
    ar cr $@ $^

clean:
    $(RM) $(PROG) $(OBJS) $(LIB)

But then I try to use the following CMakeLists.txt:

CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)

PROJECT(extern)

FIND_PACKAGE(CUDA REQUIRED)
SET(CUDA_SEPARABLE_COMPILATION ON)

SITE_NAME(HOSTNAME)

SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch=sm_20)

cuda_add_library(module module.cu)

CUDA_ADD_EXECUTABLE(extern main.cu)
TARGET_LINK_LIBRARIES(extern module)

When then compiling, what then happens is that the following:

$ cmake ..
-- The C compiler identification is GNU 4.9.2
...
$ make VERBOSE=1
...
[ 25%] Building NVCC (Device) object CMakeFiles/module.dir//./module_generated_module.cu.o
...
-- Generating <...>/build/CMakeFiles/module.dir//./module_generated_module.cu.o
/usr/local/cuda/bin/nvcc <...>/module.cu -dc -o <...>/build/CMakeFiles/module.dir//./module_generated_module.cu.o -ccbin /usr/bin/cc -m64 -Xcompiler ,\"-g\" -arch=sm_20 -DNVCC -I/usr/local/cuda/include
[ 50%] Building NVCC intermediate link file CMakeFiles/module.dir/./module_intermediate_link.o
/usr/local/cuda/bin/nvcc -arch=sm_20 -m64 -ccbin "/usr/bin/cc" -dlink <...>/build/CMakeFiles/module.dir//./module_generated_module.cu.o -o <...>/build/CMakeFiles/module.dir/./module_intermediate_link.o
...
/usr/bin/ar cr libmodule.a  CMakeFiles/module.dir/./module_generated_module.cu.o CMakeFiles/module.dir/./module_intermediate_link.o
/usr/bin/ranlib libmodule.a
...
[ 50%] Built target module
[ 75%] Building NVCC (Device) object CMakeFiles/extern.dir//./extern_generated_main.cu.o
...
-- Generating <...>/build/CMakeFiles/extern.dir//./extern_generated_main.cu.o
/usr/local/cuda/bin/nvcc <...>/main.cu -dc -o <...>/build/CMakeFiles/extern.dir//./extern_generated_main.cu.o -ccbin /usr/bin/cc -m64 -Xcompiler ,\"-g\" -arch=sm_20 -DNVCC -I/usr/local/cuda/include -I/usr/local/cuda/include
...
[100%] Building NVCC intermediate link file CMakeFiles/extern.dir/./extern_intermediate_link.o
/usr/local/cuda/bin/nvcc -arch=sm_20 -m64 -ccbin "/usr/bin/cc" -dlink <...>/build/CMakeFiles/extern.dir//./extern_generated_main.cu.o -o <...>/build/CMakeFiles/extern.dir/./extern_intermediate_link.o
nvlink error   : Undefined reference to 'carr' in '<...>/build/CMakeFiles/extern.dir//./extern_generated_main.cu.o'

Clearly, the problem are the nvcc -dlink obj.o -o obj_intermediate_link.o lines. Then, I guess, the info on external definitions are lost. So, the question is, it is possible to make CMake/FindCUDA not do this extra linking step?

Otherwise, I would argue that this is a bug. Do you agree? I can file a bug report with CMake.



回答3:

Tested it with nvcc version:

nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2012 NVIDIA
Corporation Built on Fri_Sep_21_17:28:58_PDT_2012 Cuda compilation
tools, release 5.0, V0.2.1221

and svn revision:

URL: https://gforge.sci.utah.edu/svn/findcuda/trunk
Repository Root: https://gforge.sci.utah.edu/svn/findcuda
Repository UUID: 81322f20-870f-0410-845c-a4cd4664c908
Revision: 1221
Node Kind: directory
Schedule: normal
Last Changed Rev: 1221
Last Changed Date: 2013-01-28 22:31:07 +0100 (Mon, 28 Jan 2013)

In this example includes following classes:

  • lib.h / lib.cu
  • kernel.h / kernel.cu

kernel.cu contains a simple CUDA kernel and a class with a public method to call the CUDA kernel. The class lib contains an instance of the class kernel and a method calling the public method of class kernel.

Following CMakeLists.txt works with this configuration:

cmake_minimum_required(VERSION 2.6.2)

project(Cuda-project)

set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake/cuda" ${CMAKE_MODULE_PATH})

find_package(CUDA QUIET REQUIRED)

set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)

set(BUILD_SHARED_LIBS ON)

list(APPEND CUDA_NVCC_FLAGS -DBLAH="he he" -DTEST1="this is a test")

CUDA_ADD_LIBRARY(test_lib
  kernel.cu
  lib.cu
  # SHARED
  # STATIC
  OPTIONS -DSTUFF="blah blah"
  RELEASE --use_fast_math -DNDEBUG
  DEBUG -g -DDEBUG
  )


INSTALL(FILES lib.h kernel.h
  DESTINATION include)
INSTALL(FILES "${CMAKE_BINARY_DIR}/libtest_lib.so" 
  DESTINATION lib)


回答4:

I couldn't make it works using CUDA_ADD_EXECUTABLE so I created a function that makes a custom target to do so.

function(add_cuda_exe_lib name files libraries is_lib)
    set (obj_list)
    foreach(file ${files})
        add_custom_command(
            OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${file}.o
            DEPENDS ${file} 
            COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --blue "Building NVCC Device object ${CMAKE_CURRENT_SOURCE_DIR}/${file}"
            COMMAND ${CUDA_NVCC_EXECUTABLE} ${CUDA_NVCC_FLAGS} -dc "${CMAKE_CURRENT_SOURCE_DIR}/${file}" -o "${CMAKE_CURRENT_BINARY_DIR}/${file}.o"
            COMMENT "Building ${CMAKE_CURRENT_SOURCE_DIR}/${file}"
            VERBATIM
        )

        LIST(APPEND obj_list ${CMAKE_CURRENT_BINARY_DIR}/${file}.o)
    endforeach()

    set (lib_list)
    LIST(APPEND lib_list "-lcudadevrt")
    foreach(library_name ${libraries})
        LIST(APPEND lib_list "-l${library_name}")
    endforeach()

    set (flags ${CUDA_NVCC_FLAGS})
    if (is_lib)
        LIST(APPEND flags "-dlink")
        set (obj_name "${CMAKE_CURRENT_BINARY_DIR}/${name}.so")
    else()
        set (obj_name "${CMAKE_CURRENT_BINARY_DIR}/${name}")
    endif()

    add_custom_target(${name} ALL 
        COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --red "Linking CXX executable ${name}"
        COMMAND ${CUDA_NVCC_EXECUTABLE} ${flags} ${obj_list} ${lib_list} -o ${obj_name}
        DEPENDS ${obj_list}
        COMMENT "Linking ${name}"
    )
endfunction()

function(add_cuda_exe name files libraries)
    add_cuda_exe_lib(${name} "${files}" "${libraries}" OFF)
endfunction()

function(add_cuda_lib name files libraries)
    add_cuda_exe_lib(${name} "${files}" "${libraries}" ON)
endfunction()

Now, to generate a lib, just use:

add_cuda_lib(testar "devicemanager.cu;blub.cu" "")

Or this to generate an executable:

add_cuda_exe(testar "devicemanager.cu;blub.cu" "")

The last param is a list of libs to be attached.

I hope it helps.