I am using the following command line for clang:
clang -Dcl_clang_storage_class_specifiers -isystem $LIBCLC/generic/include -include clc/clc.h -target nvptx--nvidiacl -x cl some_kernel.cl -emit-llvm -S -o some_kernel.ll
the result is:
; ModuleID = 'kernel.cl'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx--nvidiacl"
; Function Attrs: noinline nounwind
define void @vector_add(float addrspace(1)* nocapture %vec1, float addrspace(1)* nocapture %vec2, float addrspace(1)* nocapture %vec3) #0 {
%1 = tail call i32 @get_global_id(i32 0) #2
%2 = getelementptr inbounds float addrspace(1)* %vec1, i32 1
%3 = load float addrspace(1)* %2, align 4, !tbaa !2
%4 = getelementptr inbounds float addrspace(1)* %vec2, i32 2
%5 = load float addrspace(1)* %4, align 4, !tbaa !2
%6 = fadd float %3, %5
%7 = fmul float %6, 5.000000e+00
%8 = getelementptr inbounds float addrspace(1)* %vec3, i32 %1
store float %7, float addrspace(1)* %8, align 4, !tbaa !2
ret void
}
declare i32 @get_global_id(i32) #1
attributes #0 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
!opencl.kernels = !{!0}
!nvvm.annotations = !{!1}
!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @vector_add}
!1 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @vector_add, metadata !"kernel", i32 1}
!2 = metadata !{metadata !"float", metadata !3}
!3 = metadata !{metadata !"omnipotent char", metadata !4}
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
As you may see in the parameter list it generates a nocapture ABI.
But when I try to do this with this code:
/*Create a diagnostics*/
mCompilerInst.createDiagnostics();
/*Creating NVIDIA-CL*/
clang::TargetOptions TO;
TO.Triple = "nvptx--nvidiacl";
TargetInfo* feTarget =
clang::TargetInfo::CreateTargetInfo(mCompilerInst.getDiagnostics(), &TO);
mCompilerInst.setTarget(feTarget);
/*Commandline Parameters*/
std::vector<const char*> args;
args.push_back("-Dcl_clang_storage_class_specifiers");
args.push_back("-include");
args.push_back("clc/clc.h");
args.push_back("-xcl");
args.push_back("-S");
/* Create the compiler invocation */
clang::CompilerInvocation::CreateFromArgs(mCompilerInst.getInvocation(), &args[0], &args[0] + args.size(), mCompilerInst.getDiagnostics());
CodeGenerator* llvmCodeGen = clang::CreateLLVMCodeGen(
mCompilerInst.getDiagnostics(), "kernel.cl", mCompilerInst.getCodeGenOpts(),mCompilerInst.getTargetOpts(),ctx);
ParseAST(mCompilerInst.getPreprocessor(),llvmCodeGen,mCompilerInst.getASTContext());
mCompilerInst.getDiagnosticClient().EndSourceFile();
if(mCompilerInst.getDiagnostics().hasErrorOccurred()) {
llvm::outs() << "CAN'T COMPILE THE CODE";
}
/*GOING FOR THE LLVM BYTE_CODE*/
llvm::Module* module = llvmCodeGen->GetModule();
std::ofstream myfile;
myfile.open ("result",(std::ios::binary|std::ios::out));
llvm::AssemblyAnnotationWriter writer;
llvm::raw_os_ostream raw_stream(myfile);
module->print(raw_stream, &writer);
myfile.flush();
myfile.close();
what is being outputted is:
; ModuleID = 'kernel.cl'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx--nvidiacl"
; Function Attrs: noinline nounwind
define void @mat_ex(float addrspace(1)* %vec1, float addrspace(1)* %vec2, float addrspace(1)* %vec3) #0 {
%1 = alloca float addrspace(1)*, align 4
%2 = alloca float addrspace(1)*, align 4
%3 = alloca float addrspace(1)*, align 4
%den = alloca i32, align 4
%offset = alloca i32, align 4
%i = alloca i32, align 4
store float addrspace(1)* %vec1, float addrspace(1)** %1, align 4, !tbaa !2
store float addrspace(1)* %vec2, float addrspace(1)** %2, align 4, !tbaa !2
store float addrspace(1)* %vec3, float addrspace(1)** %3, align 4, !tbaa !2
%4 = call i32 @get_global_id(i32 0)
store i32 %4, i32* %i, align 4, !tbaa !5
store i32 4, i32* %den, align 4, !tbaa !5
%5 = load i32* %den, align 4, !tbaa !5
%6 = sdiv i32 2, %5
%7 = mul nsw i32 %6, 4
%8 = add nsw i32 5, %7
store i32 %8, i32* %offset, align 4, !tbaa !5
%9 = load i32* %i, align 4, !tbaa !5
%10 = load float addrspace(1)** %1, align 4, !tbaa !2
%11 = getelementptr inbounds float addrspace(1)* %10, i32 %9
%12 = load float addrspace(1)* %11, align 4, !tbaa !6
%13 = load i32* %i, align 4, !tbaa !5
%14 = load float addrspace(1)** %2, align 4, !tbaa !2
%15 = getelementptr inbounds float addrspace(1)* %14, i32 %13
%16 = load float addrspace(1)* %15, align 4, !tbaa !6
%17 = fadd float %12, %16
%18 = load i32* %offset, align 4, !tbaa !5
%19 = sitofp i32 %18 to float
%20 = fmul float %17, %19
%21 = load i32* %i, align 4, !tbaa !5
%22 = load float addrspace(1)** %3, align 4, !tbaa !2
%23 = getelementptr inbounds float addrspace(1)* %22, i32 %21
store float %20, float addrspace(1)* %23, align 4, !tbaa !6
ret void
}
declare i32 @get_global_id(i32) #1
attributes #0 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
!opencl.kernels = !{!0}
!nvvm.annotations = !{!1}
!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @mat_ex}
!1 = metadata !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*)* @mat_ex, metadata !"kernel", i32 1}
!2 = metadata !{metadata !"any pointer", metadata !3}
!3 = metadata !{metadata !"omnipotent char", metadata !4}
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
!5 = metadata !{metadata !"int", metadata !3}
!6 = metadata !{metadata !"float", metadata !3}
basically it mismatches, and I can't figure out why. Why are there are no "nocaptures"? I think the extra code is generated due to missing "nocapture".