I need to implement a library that compiles C code to eBPF bytecode using LLVM/Clang as backend. The codes will be read from memory and I need to get the resultant assembly code in memory too.
Until now, I have been able to compile to LLVM IR using the following code:
#include <string>
#include <vector>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Basic/DiagnosticOptions.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Basic/TargetInfo.h>
#include <llvm/Support/TargetSelect.h>
using namespace std;
using namespace clang;
using namespace llvm;
int main() {
constexpr auto testCodeFileName = "test.cpp";
constexpr auto testCode = "int test() { return 2+2; }";
// Prepare compilation arguments
vector<const char *> args;
args.push_back(testCodeFileName);
// Prepare DiagnosticEngine
DiagnosticOptions DiagOpts;
TextDiagnosticPrinter *textDiagPrinter =
new clang::TextDiagnosticPrinter(errs(),
&DiagOpts);
IntrusiveRefCntPtr<clang::DiagnosticIDs> pDiagIDs;
DiagnosticsEngine *pDiagnosticsEngine =
new DiagnosticsEngine(pDiagIDs,
&DiagOpts,
textDiagPrinter);
// Initialize CompilerInvocation
CompilerInvocation *CI = new CompilerInvocation();
CompilerInvocation::CreateFromArgs(*CI, &args[0], &args[0] + args.size(), *pDiagnosticsEngine);
// Map code filename to a memoryBuffer
StringRef testCodeData(testCode);
unique_ptr<MemoryBuffer> buffer = MemoryBuffer::getMemBufferCopy(testCodeData);
CI->getPreprocessorOpts().addRemappedFile(testCodeFileName, buffer.get());
// Create and initialize CompilerInstance
CompilerInstance Clang;
Clang.setInvocation(CI);
Clang.createDiagnostics();
// Set target (I guess I can initialize only the BPF target, but I don't know how)
InitializeAllTargets();
const std::shared_ptr<clang::TargetOptions> targetOptions = std::make_shared<clang::TargetOptions>();
targetOptions->Triple = string("bpf");
TargetInfo *pTargetInfo = TargetInfo::CreateTargetInfo(*pDiagnosticsEngine,targetOptions);
Clang.setTarget(pTargetInfo);
// Create and execute action
// CodeGenAction *compilerAction = new EmitLLVMOnlyAction();
CodeGenAction *compilerAction = new EmitAssemblyAction();
Clang.ExecuteAction(*compilerAction);
buffer.release();
}
To compile I use the following CMakeLists.txt:
cmake_minimum_required(VERSION 3.3.2)
project(clang_backend CXX)
set(CMAKE_CXX_COMPILER "clang++")
execute_process(COMMAND llvm-config --cxxflags OUTPUT_VARIABLE LLVM_CONFIG OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND llvm-config --libs OUTPUT_VARIABLE LLVM_LIBS OUTPUT_STRIP_TRAILING_WHITESPACE)
set(CMAKE_CXX_FLAGS ${LLVM_CONFIG})
set(CLANG_LIBS clang clangFrontend clangDriver clangSerialization clangParse
clangCodeGen clangSema clangAnalysis clangEdit clangAST clangLex
clangBasic )
add_executable(clang_backend main.cpp)
target_link_libraries(clang_backend ${CLANG_LIBS})
target_link_libraries(clang_backend ${LLVM_LIBS})
If I understood correctly, I should be able to generate assembly code if I change the compiler action to EmitAssemblyAction(), but I'm probably not initializing something as I'm getting a segmentation fault in llvm::TargetPassConfig::addPassesToHandleExceptions (this=this@entry=0x6d8d30) at /tmp/llvm-3.7.1.src/lib/CodeGen/Passes.cpp:419
The code at this line is:
switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
Does anyone have an example or knows what I'm missing?
So, if you compile LLVM with asserts on, the error is much clearer, and it will actually tell you what you need to do:
x: .../src/llvm/lib/CodeGen/LLVMTargetMachine.cpp:63:
void llvm::LLVMTargetMachine::initAsmInfo():
Assertion `TmpAsmInfo && "MCAsmInfo not initialized. "
"Make sure you include the correct TargetSelect.h"
"and that InitializeAllTargetMCs() is being invoked!"' failed.
(I added some line-breaks to that, since it printed as a single long line).
After adding the required InitializeAllTargetMCs()
at the beginning of main
, I got another error. Looking at the object file generation of my compiler, I "guessed" that it was a problem with another InitializeAll*
call. A little bit of testing, and it turns out that you also need InitializeAllAsmPrinters();
- which makes sense given that you want to produce assembly code.
I'm not entirely sure how to "see" the results from your code, but adding those two to the beginning of main
makes it run to completion rather than assert, exit with an error or crash - which is typically a good step in the right direction.
So this is what main
looks like in "my" code:
int main() {
constexpr auto testCodeFileName = "test.cpp";
constexpr auto testCode = "int test() { return 2+2; }";
InitializeAllTargetMCs();
InitializeAllAsmPrinters();
// Prepare compilation arguments
vector<const char *> args;
args.push_back(testCodeFileName);
// Prepare DiagnosticEngine
DiagnosticOptions DiagOpts;
TextDiagnosticPrinter *textDiagPrinter =
new clang::TextDiagnosticPrinter(errs(),
&DiagOpts);
IntrusiveRefCntPtr<clang::DiagnosticIDs> pDiagIDs;
DiagnosticsEngine *pDiagnosticsEngine =
new DiagnosticsEngine(pDiagIDs,
&DiagOpts,
textDiagPrinter);
// Initialize CompilerInvocation
CompilerInvocation *CI = new CompilerInvocation();
CompilerInvocation::CreateFromArgs(*CI, &args[0], &args[0] + args.size(), *pDiagnosticsEngine);
// Map code filename to a memoryBuffer
StringRef testCodeData(testCode);
unique_ptr<MemoryBuffer> buffer = MemoryBuffer::getMemBufferCopy(testCodeData);
CI->getPreprocessorOpts().addRemappedFile(testCodeFileName, buffer.get());
// Create and initialize CompilerInstance
CompilerInstance Clang;
Clang.setInvocation(CI);
Clang.createDiagnostics();
// Set target (I guess I can initialize only the BPF target, but I don't know how)
InitializeAllTargets();
const std::shared_ptr<clang::TargetOptions> targetOptions = std::make_shared<clang::TargetOptions>();
targetOptions->Triple = string("bpf");
TargetInfo *pTargetInfo = TargetInfo::CreateTargetInfo(*pDiagnosticsEngine,targetOptions);
Clang.setTarget(pTargetInfo);
// Create and execute action
// CodeGenAction *compilerAction = new EmitLLVMOnlyAction();
CodeGenAction *compilerAction = new EmitAssemblyAction();
Clang.ExecuteAction(*compilerAction);
buffer.release();
}
I strongly suggest that if you want to develop with clang&LLVM, that you build a debug version of Clang&LLVM - this will help both in tracking down "why" and also catch problems early and where it is more obvious. Use -DCMAKE_BUILD_TYPE=Debug
with cmake
to get that flavour.
My complete script for getting LLVM & Clang to build:
export CC=clang
export CXX=clang++
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=/usr/local/llvm-debug -DLLVM_TAR
GETS_TO_BUILD=X86 ../llvm
[I was using a late pre-release of 3.8 to test this, but I very much doubt that it's much different from 3.7.1 in this respect]
If anyone is facing a similar problem I have been able to compile from/to memory sending the code through stdin and getting the output from stdout.
I don't know if there is some other way to achieve this, maybe using the clang::Driver, but reading Clang/LLVM source code I have found that the action that I needed to execute to get an object is EmitObjAction() and seems that this action always produce a .o file if the input is not received from stdin.
Therefore, I replaced stdin/stdout for pipes before executing the action and this way I avoid to generate a file.
#include <string>
#include <vector>
#include <sstream>
#include <iostream>
#include <cstdio>
#include <unistd.h>
#include <fcntl.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Basic/DiagnosticOptions.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Basic/TargetInfo.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/IR/Module.h>
using namespace std;
using namespace clang;
using namespace llvm;
int main(int argc, char *argv[])
{
// code to compile for the eBPF virtual machine
constexpr auto testCode = "int main() { return get_nbs(); }";
// Send code through a pipe to stdin
int codeInPipe[2];
pipe2(codeInPipe, O_NONBLOCK);
write(codeInPipe[1], (void *) testCode, strlen(testCode));
close(codeInPipe[1]); // We need to close the pipe to send an EOF
dup2(codeInPipe[0], STDIN_FILENO);
// Prepare reception of code through stdout
int codeOutPipe[2];
pipe(codeOutPipe);
dup2(codeOutPipe[1], STDOUT_FILENO);
// Initialize various LLVM/Clang components
InitializeAllTargetMCs();
InitializeAllAsmPrinters();
InitializeAllTargets();
// Prepare compilation arguments
vector<const char *> args;
args.push_back("--target=bpf"); // Target is bpf assembly
args.push_back("-xc"); // Code is in c language
args.push_back("-"); // Read code from stdin
CompilerInvocation *CI = createInvocationFromCommandLine(makeArrayRef(args) , NULL);
// Create CompilerInstance
CompilerInstance Clang;
Clang.setInvocation(CI);
// Initialize CompilerInstace
Clang.createDiagnostics();
// Create and execute action
CodeGenAction *compilerAction;
compilerAction = new EmitObjAction();
Clang.ExecuteAction(*compilerAction);
// Get compiled object (be carefull with buffer size)
close(codeInPipe[0]);
char objBuffer[2048];
read(codeOutPipe[0], objBuffer, 2048);
return 0;
}