LLVM pass for generating object code is giving segmentation fault - c++

I'm writing a C-subset compiler using the LLVM C++ API, and using a pass for generating object code as well. But the object code generation step is giving segfault when the source files include an if-else block. Below are the codes I'm using
Compiler code for generating the if-else block
llvm::Value *Conditional::generateCode(CodeKit &kit) {
auto *cond = condition->generateCode(kit);
if (cond == nullptr) {
return nullptr;
}
llvm::Function *func = kit.builder.GetInsertBlock()->getParent();
auto *ifBlock = llvm::BasicBlock::Create(kit.context, "if", func);
auto *elseBlock = llvm::BasicBlock::Create(kit.context, "else");
auto *mergeBlock = llvm::BasicBlock::Create(kit.context, "ifelsemerge");
kit.builder.CreateCondBr(cond, ifBlock, elseBlock);
kit.builder.SetInsertPoint(ifBlock);
if (ifstmt != nullptr) {
kit.symbolTable.enterScope();
ifstmt->generateCode(kit);
kit.symbolTable.exitScope();
}
kit.builder.CreateBr(mergeBlock);
ifBlock = kit.builder.GetInsertBlock();
func->getBasicBlockList().push_back(elseBlock);
kit.builder.SetInsertPoint(elseBlock);
if (elsestmt != nullptr) {
kit.symbolTable.enterScope();
elsestmt->generateCode(kit);
kit.symbolTable.exitScope();
}
kit.builder.CreateBr(mergeBlock);
elseBlock = kit.builder.GetInsertBlock();
func->getBasicBlockList().push_back(mergeBlock);
kit.builder.SetInsertPoint(mergeBlock);
return nullptr;
Code for generating object code from the llvm module.
void emitCode(CodeKit &kit) {
kit.module.print(llvm::outs(), nullptr);
auto irFile = "output.bc";
error_code ec;
llvm::raw_fd_ostream irFileStream(irFile, ec, llvm::sys::fs::F_None);
llvm::WriteBitcodeToFile(kit.module, irFileStream);
irFileStream.flush();
auto targetTriple = llvm::sys::getDefaultTargetTriple();
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllAsmPrinters();
string error;
auto target = llvm::TargetRegistry::lookupTarget(targetTriple, error);
auto cpu = "generic";
auto features = "";
llvm::TargetOptions opt;
auto rm = llvm::Optional<llvm::Reloc::Model>();
auto targetMachine =
target->createTargetMachine(targetTriple, cpu, features, opt, rm);
kit.module.setDataLayout(targetMachine->createDataLayout());
kit.module.setTargetTriple(targetTriple);
auto objectFile = "output.o";
llvm::raw_fd_ostream objectFileStream(objectFile, ec,
llvm::sys::fs::OF_None);
llvm::legacy::PassManager pass;
auto fileType = llvm::CGFT_ObjectFile;
targetMachine->addPassesToEmitFile(pass, objectFileStream, nullptr,
fileType);
pass.run(kit.module);
objectFileStream.flush();
}
Source file on which I'm testing at the moment
int factorial(int n) {
if (n <= 0)
return 1;
else
return n * factorial(n - 1);
}
Generated IR code
; ModuleID = 'bootleg c compiler'
source_filename = "bootleg c compiler"
define i32 #factorial(i32 %n) {
entry:
%n1 = alloca i32
store i32 %n, i32* %n1
%n2 = load i32, i32* %n1
%"Less or Equal" = icmp sle i32 %n2, 0
br i1 %"Less or Equal", label %if, label %else
if: ; preds = %entry
ret i32 1
br label %ifelsemerge
else: ; preds = %entry
%n3 = load i32, i32* %n1
%n4 = load i32, i32* %n1
%Subtract = sub i32 %n4, 1
%call = call i32 #factorial(i32 %Subtract)
%Multiply = mul i32 %n3, %call
ret i32 %Multiply
br label %ifelsemerge
ifelsemerge: ; preds = %else, %if
}
GDB stack trace of the segfault
#0 0x00007ffff4386bc0 in llvm::Instruction::getNumSuccessors() const () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#1 0x00007ffff4fd643c in llvm::BranchProbabilityInfo::computePostDominatedByUnreachable(llvm::Function const&, llvm::PostDominatorTree*) ()
from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#2 0x00007ffff4fdb175 in llvm::BranchProbabilityInfo::calculate(llvm::Function const&, llvm::LoopInfo const&, llvm::TargetLibraryInfo const*) ()
from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#3 0x00007ffff4fdb9c4 in llvm::BranchProbabilityInfoWrapperPass::runOnFunction(llvm::Function&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#4 0x00007ffff43a7d76 in llvm::FPPassManager::runOnFunction(llvm::Function&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#5 0x00007ffff43a7ff3 in llvm::FPPassManager::runOnModule(llvm::Module&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#6 0x00007ffff43a84a0 in llvm::legacy::PassManagerImpl::run(llvm::Module&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#7 0x0000555555565b73 in emitCode (kit=...) at c.ast.cpp:88
#8 0x0000555555579957 in generateCode (ast=...) at cc.cpp:25
#9 0x0000555555579a83 in main (argc=2, argv=0x7fffffffdfe8) at cc.cpp:41

ret i32 1
br label %ifelsemerge
is invalid IR, you must have exactly one terminator instruction (e.g. br or ret) in a BasicBlock. You can avoid this by checking for a terminator instruction before inserting the branch,
if (ifBlock->size() == 0 || !ifBlock->back().isTerminator()) {
kit.builder.CreateBr(mergeBlock);
}
// ...
if (elseBlock->size() == 0 || !elseBlock->back().isTerminator()) {
kit.builder.CreateBr(mergeBlock);
}
Note that empty blocks are also invalid so in case both branches of the conditional exit the function, you probably shouldn't generate mergeBlock. AFAIK if you have no branches to an empty block it's fine to leave it in.
Usually you can catch these kinds of bugs in your front-end by adding a verification pass or using llvm::verifyFunction or llvm::verifyModule. Also running llc with the generated IR code will give a more detailed error message.

Related

LLVM beginner: instrument, string type and metadata

I am a beginner in LLVM, and I wanna get the true value of a given varibale name and line number by using LLVM pass. I have several problems.
correctly get the metadata;
get the type of String;
get dynamic value.
For example, I wanna get the value of b after line 7:
#include <iostream>
int main() {
int b;
int a;
std::cin >> a;
b = a; // line 7
return 0;
}
IR of the main function:
; Function Attrs: mustprogress noinline norecurse optnone uwtable
define dso_local i32 #main() #4 !dbg !857 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
store i32 0, i32* %1, align 4
call void #llvm.dbg.declare(metadata i32* %2, metadata !858, metadata !DIExpression()), !dbg !859
call void #llvm.dbg.declare(metadata i32* %3, metadata !860, metadata !DIExpression()), !dbg !861
%4 = call nonnull align 8 dereferenceable(16) %"class.std::basic_istream"* #_ZNSirsERi(%"class.std::basic_istream"* nonnull align 8 dereferenceable(16) #_ZSt3cin, i32* nonnull align 4 dereferenceable(4) %3), !dbg !862
%5 = load i32, i32* %3, align 4, !dbg !863
store i32 %5, i32* %2, align 4, !dbg !864
ret i32 0, !dbg !865
}
; METADATA
!857 = distinct !DISubprogram(name: "main", scope: !8, file: !8, line: 11, type: !539, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !9)
!858 = !DILocalVariable(name: "b", scope: !857, file: !8, line: 12, type: !20)
!859 = !DILocation(line: 12, column: 9, scope: !857)
!860 = !DILocalVariable(name: "a", scope: !857, file: !8, line: 13, type: !20)
!861 = !DILocation(line: 13, column: 9, scope: !857)
!862 = !DILocation(line: 14, column: 14, scope: !857)
!863 = !DILocation(line: 15, column: 9, scope: !857)
!864 = !DILocation(line: 15, column: 7, scope: !857)
!865 = !DILocation(line: 16, column: 5, scope: !857)
I need to read dynamic value, so I instrument logvar function:
#include <iostream>
#include <string>
extern "C" void logvar(int i, std::string name) {
std::cout << "Num: " << i << "; Name: " << name << std::endl;
}
And now, my pass.cpp is as follows:
virtual bool runOnFunction(Function &F) {
// Get the function to call from our runtime library.
LLVMContext &Ctx = F.getContext();
std::vector<Type*> paramTypes = {
Type::getInt32Ty(Ctx),
// Here I need a string type to match logvar function,
// but I don't know how to write it.
// Maybe a pointerType?
};
Type *retType = Type::getVoidTy(Ctx);
FunctionType *logFuncType = FunctionType::get(retType, paramTypes, false);
FunctionCallee logFunc =
F.getParent()->getOrInsertFunction("logvar", logFuncType);
for (auto &B : F) {
for (auto &I : B) {
if (auto *op = dyn_cast<LoadInst>(&I)) { // timo_schalachter
int number;
auto alloca = dyn_cast<AllocaInst>(op->getOperand(0));
for (auto user: alloca->users()) {
if (auto store = dyn_cast<StoreInst>(user)) {
auto constant_int = dyn_cast<ConstantInt>(store->getOperand(0));
number = constant_int->getSExtValue();
errs() << number << "\n";
}
}
}
if (auto *op = dyn_cast<StoreInst>(&I)) {
errs() << *op << ".StoreInst\n";
Value *val = op->getValueOperand();
if (auto constant_int = dyn_cast<ConstantInt>(val)) {
int number = constant_int->getSExtValue();
errs() << number << ".\n\n";
} else if (auto constant_fp = dyn_cast<ConstantFP>(val)) {
float number = constant_fp->???;
// I cannot find the function to get the value of float point here.
} // and how to deal with constant string?
// metadata:
// store i32 %4, i32* %2, align 4, !dbg !863
Value *arg1 = op->getOperand(0); // %4 = xxx
Value *arg2 = op->getOperand(1); // %2 = xxx
unsigned mk = op->getContext().getMDKindID("dbg");
MDNode *mdn = op->getMetadata(mk);
if (mdn) {
Metadata *mds = mdn->getOperand(0);
StringRef str;
if (MDString::classof(mds)) {
str = (cast<MDString>(*mds)).getString();
errs() << str;
}
} else {
errs() << "no dbg!\n";
// when I run this code, it always says: "no dbg!"
// I don't know why...
}
// instrumentation
IRBuilder<> builder(op);
builder.SetInsertPoint(&B, ++builder.GetInsertPoint());
Value* args[] = {arg1, ???};
// I cannot find the name of variable.
// I think it should be str in metadata.
// Still the problem: how to write STRING's type in LLVM?
builder.CreateCall(logFunc, args);
}
}
}
return false;
}
Problems are in code. I cannot make myself understood clearly by using English.
I think String maybe i8*. You can read this link https://llvm.org/docs/LangRef.html#function-type to get function type.

Why can't I get topological sort over the control-flow graph (CFG)?

Final goal: Trying to generate CFG related information(such as topological sort) using LLVM.
Status: I'm pretty new to LLVM and kind of lost - any kind of information or blogs to help me get started towards my final goal is great!
My Question: After reading Eli's code and blog post, I get the .ll file first and run the code but I got no result.
Here is the .ll file example:
; ModuleID = 'CWE15_External_Control_of_System_or_Configuration_Setting__w32_83a.cpp'
source_filename = "CWE15_External_Control_of_System_or_Configuration_Setting__w32_83a.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad" = type { i8* }
%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B" = type { i8* }
; Function Attrs: noinline optnone uwtable
define void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_833badEv() #0 {
%1 = alloca i8*, align 8
%2 = alloca [100 x i8], align 16
%3 = alloca %"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad", align 8
%4 = bitcast [100 x i8]* %2 to i8*
call void #llvm.memset.p0i8.i64(i8* %4, i8 0, i64 100, i32 16, i1 false)
%5 = getelementptr inbounds [100 x i8], [100 x i8]* %2, i32 0, i32 0
store i8* %5, i8** %1, align 8
%6 = load i8*, i8** %1, align 8
call void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"* %3, i8* %6)
call void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"* %3) #4
ret void
}
; Function Attrs: argmemonly nounwind
declare void #llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1
declare void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"*, i8*) unnamed_addr #2
; Function Attrs: nounwind
declare void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"*) unnamed_addr #3
; Function Attrs: noinline optnone uwtable
define void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_834goodEv() #0 {
call void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_83L7goodG2BEv()
ret void
}
; Function Attrs: noinline optnone uwtable
define internal void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_83L7goodG2BEv() #0 {
%1 = alloca i8*, align 8
%2 = alloca [100 x i8], align 16
%3 = alloca %"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B", align 8
%4 = bitcast [100 x i8]* %2 to i8*
call void #llvm.memset.p0i8.i64(i8* %4, i8 0, i64 100, i32 16, i1 false)
%5 = getelementptr inbounds [100 x i8], [100 x i8]* %2, i32 0, i32 0
store i8* %5, i8** %1, align 8
%6 = load i8*, i8** %1, align 8
call void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"* %3, i8* %6)
call void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"* %3) #4
ret void
}
declare void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"*, i8*) unnamed_addr #2
; Function Attrs: nounwind
declare void #_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"*) unnamed_addr #3
attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (tags/RELEASE_500/final 375507)"}
and here is the .cpp file which generate sort information:
//------------------------------------------------------------------------------
// bb_toposort_sccs LLVM sample. Demonstrates:
//
// * How to implement DFS & topological sort over the control-flow graph (CFG)
// of a function.
// * How to use po_iterator for post-order iteration over basic blocks.
// * How to use scc_iterator for post-order iteration over strongly-connected
// components in the graph of basic blocks.
//
// Eli Bendersky (eliben#gmail.com)
// This code is in the public domain
//------------------------------------------------------------------------------
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Pass.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <vector>
using namespace llvm;
// Runs a topological sort on the basic blocks of the given function. Uses
// the simple recursive DFS from "Introduction to algorithms", with 3-coloring
// of vertices. The coloring enables detecting cycles in the graph with a simple
// test.
class TopoSorter {
public:
void runToposort(const Function &F) {
outs() << "Topological sort of " << F.getName() << ":\n";
// Initialize the color map by marking all the vertices white.
for (Function::const_iterator I = F.begin(), IE = F.end(); I != IE; ++I) {
ColorMap[&*I] = TopoSorter::WHITE;
}
// The BB graph has a single entry vertex from which the other BBs should
// be discoverable - the function entry block.
bool success = recursiveDFSToposort(&F.getEntryBlock());
if (success) {
// Now we have all the BBs inside SortedBBs in reverse topological order.
for (BBVector::const_reverse_iterator RI = SortedBBs.rbegin(),
RE = SortedBBs.rend();
RI != RE; ++RI) {
outs() << " " << (*RI)->getName() << "\n";
}
} else {
outs() << " Sorting failed\n";
}
}
private:
enum Color { WHITE, GREY, BLACK };
// Color marks per vertex (BB).
typedef DenseMap<const BasicBlock *, Color> BBColorMap;
// Collects vertices (BBs) in "finish" order. The first finished vertex is
// first, and so on.
typedef SmallVector<const BasicBlock *, 32> BBVector;
BBColorMap ColorMap;
BBVector SortedBBs;
// Helper function to recursively run topological sort from a given BB.
// Returns true if the sort succeeded and false otherwise; topological sort
// may fail if, for example, the graph is not a DAG (detected a cycle).
bool recursiveDFSToposort(const BasicBlock *BB) {
ColorMap[BB] = TopoSorter::GREY;
// For demonstration, using the lowest-level APIs here. A BB's successors
// are determined by looking at its terminator instruction.
const TerminatorInst *TInst = BB->getTerminator();
for (unsigned I = 0, NSucc = TInst->getNumSuccessors(); I < NSucc; ++I) {
BasicBlock *Succ = TInst->getSuccessor(I);
Color SuccColor = ColorMap[Succ];
if (SuccColor == TopoSorter::WHITE) {
if (!recursiveDFSToposort(Succ))
return false;
} else if (SuccColor == TopoSorter::GREY) {
// This detects a cycle because grey vertices are all ancestors of the
// currently explored vertex (in other words, they're "on the stack").
outs() << " Detected cycle: edge from " << BB->getName() << " to "
<< Succ->getName() << "\n";
return false;
}
}
// This BB is finished (fully explored), so we can add it to the vector.
ColorMap[BB] = TopoSorter::BLACK;
SortedBBs.push_back(BB);
return true;
}
};
class AnalyzeBBGraph : public FunctionPass {
public:
AnalyzeBBGraph(const std::string &AnalysisKind)
: FunctionPass(ID), AnalysisKind(AnalysisKind) {}
virtual bool runOnFunction(Function &F) {
if (AnalysisKind == "-topo") {
TopoSorter TS;
TS.runToposort(F);
} else if (AnalysisKind == "-po") {
// Use LLVM's post-order iterator to produce a reverse topological sort.
// Note that this doesn't detect cycles so if the graph is not a DAG, the
// result is not a true topological sort.
outs() << "Basic blocks of " << F.getName() << " in post-order:\n";
for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
IE = po_end(&F.getEntryBlock());
I != IE; ++I) {
outs() << " " << (*I)->getName() << "\n";
}
} else if (AnalysisKind == "-scc") {
// Use LLVM's Strongly Connected Components (SCCs) iterator to produce
// a reverse topological sort of SCCs.
outs() << "SCCs for " << F.getName() << " in post-order:\n";
for (scc_iterator<Function *> I = scc_begin(&F), IE = scc_end(&F);
I != IE; ++I) {
// Obtain the vector of BBs in this SCC and print it out.
const std::vector<BasicBlock *> &SCCBBs = *I;
outs() << " SCC: ";
for (std::vector<BasicBlock *>::const_iterator BBI = SCCBBs.begin(),
BBIE = SCCBBs.end();
BBI != BBIE; ++BBI) {
outs() << (*BBI)->getName() << " ";
}
outs() << "\n";
}
} else {
outs() << "Unknown analysis kind: " << AnalysisKind << "\n";
}
return false;
}
// The address of this member is used to uniquely identify the class. This is
// used by LLVM's own RTTI mechanism.
static char ID;
private:
std::string AnalysisKind;
};
char AnalyzeBBGraph::ID = 0;
int main(int argc, char **argv) {
if (argc < 3) {
// Using very basic command-line argument parsing here...
errs() << "Usage: " << argv[0] << " -[topo|po|scc] <IR file>\n";
return 1;
}
// Parse the input LLVM IR file into a module.
SMDiagnostic Err;
LLVMContext Context;
std::unique_ptr<Module> Mod(parseIRFile(argv[2], Err, Context));
if (!Mod) {
Err.print(argv[0], errs());
return 1;
}
// Create a pass manager and fill it with the passes we want to run.
legacy::PassManager PM;
PM.add(new AnalyzeBBGraph(std::string(argv[1])));
PM.run(*Mod);
return 0;
}
here is my result looks like when I try to get topo sort:
Topological sort of _ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_833badEv:
Topological sort of _ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_834goodEv:
Topological sort of _ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_83L7goodG2BEv:
I tried something else and I found out all the "getName()" print is nothing,
Is there something wrong with my code or somrthing wrong with my llvm IR ?
for(BasicBlock &BB : F){
outs() << "BB:" <<BB.getName() <<" has" <<BB.size() <<"instructions.\n";
for(Instruction &I : BB){
outs() << "details: "<<I <<"name: "<<I.getName() <<"\n";
for(Use &U : I.operands()){
Value *v = U.get();
outs() <<"value:"<< v<<"name:" << v->getName() <<"\n";
}
}
}
Any thoughts are appreciated!
You might want to run the instruction namer pass after you obtain your IR (the .ll file) in order to assign names to the basic blocks and the registers, because currently, they do not have any names apart from their numeric assignment (e.g. %1, etc). You can run this with:
opt -instnamer foo.bc -o foo-named.bc
The 3 functions definitions have only one basic block in them, so you are not going to get anything interesting, but it is a good start.
Another point that you might want to address now before moving on, is the optimization level. All your functions are annotated with optnone that will disable any kind of optimization from other LLVM passes. A more flexible way to get unoptimized IR, but one that will allow further optimizations to take effect on it is to extracted using:
clang -emit-llvm -O1 -Xclang -disable-llvm-passes foo.c

LLVM IR - Can someone explain this behavior?

I'm trying to build a compiler for my language at the moment. In my language, I want to have implicit pointer usage for objects/structs just like in Java. In the program below, I am testing out this feature. However, the program does not run as I had expected. I do not expect you guys to read through my entire compiler code because that would be a waste of time. Instead I was hoping I could explain what I intended for the program to do and you guys could spot in the llvm ir what went wrong. That way, I can adjust the compiler to generate proper llvm ir.
Flow:
[Function] Main - [Return: Int] {
-> Allocates space for structure of one i32
-> Calls createObj function and stores the returning value inside previous allocated space
-> Returns the i32 of the structure
}
[Function] createObj - [Return: struct { i32 }] {
-> Allocates space for structure of one i32
-> Calls Object function on this space (pointer really)
-> Returns this space (pointer really)
}
[Function] Object - [Return: void] {
-> Stores the i32 value of 5 inside of the struct pointer argument
}
The program is that main keeps returning some random number instead of 5. One such number is 159383856. I'm guessing that this is the decimal representation of a pointer address, but I'm not sure why it is printing out the pointer address.
; ModuleID = 'main'
%Object = type { i32 }
define i32 #main() {
entry:
%0 = call %Object* #createObj()
%o = alloca %Object*
store %Object* %0, %Object** %o
%1 = load %Object** %o
%2 = getelementptr inbounds %Object* %1, i32 0, i32 0
%3 = load i32* %2
ret i32 %3
}
define %Object* #createObj() {
entry:
%0 = alloca %Object
call void #-Object(%Object* %0)
%o = alloca %Object*
store %Object* %0, %Object** %o
%1 = load %Object** %o
ret %Object* %1
}
define void #-Object(%Object* %this) {
entry:
%0 = getelementptr inbounds %Object* %this, i32 0, i32 0
store i32 5, i32* %0
ret void
}
This llvm ir is generated from this syntax.
func () > main > (int) {
Object o = createObj();
return o.id;
}
// Create an object and returns it
func () > createObj > (Object) {
Object o = make Object < ();
return o;
}
// Object decl
tmpl Object {
int id; // Property
// This is run every time an object is created.
constructor < () {
this.id = 5;
}
}
It seems like in createObj you're returning a pointer to a stack variable which will no longer be valid after function return.
If you're doing implicit object pointers like Java at minimum you're going to need a call to a heap allocation like malloc which I don't think you have.

llvm: How to get the label of Basic Blocks

I have written a pass to detect and print the label of basicblocks in a function, for I want to use splitBasicBlock() further. I wrote that like this:
virtual bool runOnModule(Module &M)
{
for(Module::iterator F = M.begin(), E = M.end(); F!= E; ++F)
{
errs()<<"Function:"<<F->getName()<<"\n";
//for(Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (iplist<BasicBlock>::iterator iter = F->getBasicBlockList().begin();
iter != F->getBasicBlockList().end();
iter++)
{
BasicBlock* currBB = iter;
errs() << "BasicBlock: " << currBB->getName() << "\n";
}
}
return true;
}
IR file looks like this:
; <label>:63 ; preds = %43
%64 = load i32* %j, align 4
%65 = sext i32 %64 to i64
%66 = load i8** %tempdst, align 8
%67 = getelementptr inbounds i8* %66, i64 %65
store i8 -1, i8* %67, align 1
br label %73
; <label>:68 ; preds = %43
%69 = load i32* %j, align 4
%70 = sext i32 %69 to i64
%71 = load i8** %tempdst, align 8
%72 = getelementptr inbounds i8* %71, i64 %70
store i8 0, i8* %72, align 1
br label %73
; <label>:73 ; preds = %68, %63
br label %74
However, I got nothing about the label:
Function:main
BasicBlock:
BasicBlock:
BasicBlock:
What's wrong with these "unnamed" basic block? What should I do?
While BasicBlocks may be with no name (as indicated by hasName() method) one may print unique BasicBlock identifier by using currBB->printAsOperand(errs(), false) instead of streaming into errs() the value of currBB->getName(). For unnamed BasicBlock this would provide the numerical basic block representation, such as %68 .
Values in LLVM IR are not required to have a name; and indeed, those basic blocks don't have names, which is why you get an empty string from currBB->getName().
The reason that they have names in the LLVM IR printout is because when you print to the textual format of LLVM IR (as it appears in .ll files), you have to assign a name to them to make them referable, so the printer assigns sequential numeric names to basic blocks (and other values). Those numeric names are only created by the printer, though, and don't actually exist in the module.
While compiling source code to bitcode using clang use the below flag
-fno-discard-value-names
You will get the name of basic block as a unique string
I think the behavior of LLVM now is different.
I use similar lines of code and can get the label's name on LLVM-4.0
for (auto &funct : m) {
for (auto &basic_block : funct) {
StringRef bbName(basic_block.getName());
errs() << "BasicBlock: " << bbName << "\n";
}
}
As ElazarR said, currBB->printAsOperand(errs(), false) will print such ID in the error stream, but it is possible to store it in a string as well if this is more interesting to your logic.
In the LLVM CFG generation pass -dot-cfg, they always name the basic block using the BB's name (if any) or its representation as a string. This logic is present in the CFGPrinter.h header (http://llvm.org/doxygen/CFGPrinter_8h_source.html#l00063):
static std::string getSimpleNodeLabel(const BasicBlock *Node,
const Function *) {
if (!Node->getName().empty())
return Node->getName().str();
std::string Str;
raw_string_ostream OS(Str);
Node->printAsOperand(OS, false);
return OS.str();
}
You can use this logic to always return a valid name for the basic block.

llvm exceptions; catch handler not handling, cleanup not called

I i'm trying to create a exception handler inside JIT llvm code. the current documentation regarding exception handling in LLVM is very handwavy at the moment, so i've been trying to reuse most of the snippets i get from http://llvm.org/demo in order to get a working example, but i'm not sure if those are up to date with llvm 2.9 (the version i am using).
This is what the module looks after Module::dump();
; ModuleID = 'testModule'
declare i32 #myfunc()
define i32 #test_function_that_invokes_another() {
entryBlock:
%0 = alloca i8*
%1 = alloca i32
%someName = invoke i32 #myfunc()
to label %exitBlock unwind label %unwindBlock
exitBlock: ; preds = %entryBlock
ret i32 1
unwindBlock: ; preds = %entryBlock
%2 = call i8* #llvm.eh.exception()
store i8* %2, i8** %0
%3 = call i32 (i8*, i8*, ...)* #llvm.eh.selector(i8* %2, i8* bitcast (i32 (...)* #__gxx_personality_v0 to i8*), i8* null)
store i32 1, i32* %1
%4 = load i8** %0
%5 = call i32 (...)* #__cxa_begin_catch(i8* %4) nounwind
%cleanup_call = call i32 #myCleanup()
%6 = call i32 (...)* #__cxa_end_catch()
ret i32 1
}
declare i32 #__gxx_personality_v0(...)
declare i32 #__cxa_begin_catch(...)
declare i32 #__cxa_end_catch(...)
declare i8* #llvm.eh.exception() nounwind readonly
declare i32 #llvm.eh.selector(i8*, i8*, ...) nounwind
declare i32 #myCleanup()
and this is what happens when i try to execute the function:
inside JIT calling C/C++ call
terminate called after throwing an instance of 'int'
Aborted
this shows that the function that throws gets called, it throws, but i never land in the cleanup call. (my cleanup call should have said 'inside JIT calling C/C++ Cleanup')
The function that invokes and (attempts) to catch a thrown exception is:
const inline llvm::FunctionType* getTestFunctionSignature(llvm::LLVMContext& context) {
return llvm::TypeBuilder< unsigned int(), false > ::get(context);
}
llvm::Function* createFunctionThatInvokesAnother( llvm::LLVMContext& ctx, llvm::Module* mod , llvm::Function* another ) {
llvm::Function* result = llvm::Function::Create(getTestFunctionSignature(ctx),
llvm::GlobalValue::ExternalLinkage,
"test_function_that_invokes_another",
mod);
llvm::BasicBlock* entry_block = llvm::BasicBlock::Create(ctx, "entryBlock", result);
llvm::BasicBlock* exit_block = llvm::BasicBlock::Create(ctx, "exitBlock", result);
llvm::BasicBlock* unwind_block = llvm::BasicBlock::Create(ctx, "unwindBlock", result);
llvm::IRBuilder<> builder(entry_block);
llvm::ConstantInt* ci = llvm::ConstantInt::get( mod->getContext() , llvm::APInt( 32 , llvm::StringRef("1"), 10));
llvm::PointerType* pty3 = llvm::PointerType::get(llvm::IntegerType::get(mod->getContext(), 8), 0);
llvm::AllocaInst* ptr_24 = new llvm::AllocaInst(pty3, "", entry_block);
llvm::AllocaInst* ptr_25 = new llvm::AllocaInst(llvm::IntegerType::get(mod->getContext(), 32), "", entry_block);
llvm::Twine name("someName");
builder.CreateInvoke( another , exit_block , unwind_block , "someName" );
builder.SetInsertPoint( exit_block );
builder.CreateRet(ci);
builder.SetInsertPoint( unwind_block );
llvm::Function* func___gxx_personality_v0 = func__gxx_personality_v0(mod);
llvm::Function* func___cxa_begin_catch = func__cxa_begin_catch(mod);
llvm::Function* func___cxa_end_catch = func__cxa_end_catch(mod);
llvm::Function* func_eh_ex = func_llvm_eh_exception(mod);
llvm::Function* func_eh_sel = func__llvm_eh_selector(mod);
llvm::Constant* const_ptr_17 = llvm::ConstantExpr::getCast(llvm::Instruction::BitCast, func___gxx_personality_v0, pty3);
llvm::ConstantPointerNull* const_ptr_18 = llvm::ConstantPointerNull::get(pty3);
llvm::CallInst* get_ex = llvm::CallInst::Create(func_eh_ex, "", unwind_block);
get_ex->setCallingConv(llvm::CallingConv::C);
get_ex->setTailCall(false);
new llvm::StoreInst(get_ex, ptr_24, false, unwind_block);
std::vector<llvm::Value*> int32_37_params;
int32_37_params.push_back(get_ex);
int32_37_params.push_back(const_ptr_17);
int32_37_params.push_back(const_ptr_18);
llvm::CallInst* eh_sel = llvm::CallInst::Create(func_eh_sel, int32_37_params.begin(), int32_37_params.end(), "", unwind_block);
eh_sel->setCallingConv(llvm::CallingConv::C);
eh_sel->setTailCall(false);
new llvm::StoreInst(ci, ptr_25, false, unwind_block);
llvm::LoadInst* ptr_29 = new llvm::LoadInst(ptr_24, "", false, unwind_block);
llvm::CallInst* ptr_30 = llvm::CallInst::Create(func___cxa_begin_catch, ptr_29, "", unwind_block);
ptr_30->setCallingConv(llvm::CallingConv::C);
ptr_30->setTailCall(false);
llvm::AttrListPtr ptr_30_PAL;
{
llvm::SmallVector<llvm::AttributeWithIndex, 4 > Attrs;
llvm::AttributeWithIndex PAWI;
PAWI.Index = 4294967295U;
PAWI.Attrs = 0 | llvm::Attribute::NoUnwind;
Attrs.push_back(PAWI);
ptr_30_PAL = llvm::AttrListPtr::get(Attrs.begin(), Attrs.end());
}
ptr_30->setAttributes(ptr_30_PAL);
llvm::Function* cleanup = call_myCleanup( mod );
builder.CreateCall( cleanup , "cleanup_call");
llvm::CallInst* end_catch = llvm::CallInst::Create(func___cxa_end_catch, "", unwind_block);
builder.CreateRet(ci);
//createCatchHandler( mod , unwind_block );
return result;
}
This gets called like the usual business:
testMain() {
llvm::LLVMContext ctx;
llvm::InitializeNativeTarget();
llvm::StringRef idRef("testModule");
llvm::Module* module = new llvm::Module(idRef, ctx);
std::string jitErrorString;
llvm::ExecutionEngine* execEngine = executionEngine( module , jitErrorString );
llvm::FunctionPassManager* OurFPM = new llvm::FunctionPassManager(module);
llvm::Function *thr = call_my_func_that_throws( module );
llvm::Function* result = createFunctionThatInvokesAnother(ctx, module ,thr);
std::string errorInfo;
llvm::verifyModule(* module, llvm::PrintMessageAction, & errorInfo);
module->dump();
void *fptr = execEngine->getPointerToFunction(result);
unsigned int (*fp)() = (unsigned int (*)())fptr;
try {
unsigned int value = fp();
} catch (...) {
std::cout << " handled a throw from JIT function" << std::endl;
}
}
where my function that throws is:
int myfunc() {
std::cout << " inside JIT calling C/C++ call" << std::endl;
throw 0;
};
llvm::Function* call_my_func_that_throws (llvm::Module* mod) {
std::vector< const llvm::Type* > FuncTy_ex_args;
llvm::FunctionType* FuncTy_ex = llvm::FunctionType::get( llvm::IntegerType::get( mod->getContext() , 32) , FuncTy_ex_args , false);
llvm::Function* result = llvm::Function::Create(FuncTy_ex, llvm::GlobalValue::ExternalLinkage, "myfunc", mod);
result->setCallingConv( llvm::CallingConv::C );
llvm::AttrListPtr PAL;
result->setAttributes( PAL );
llvm::sys::DynamicLibrary::AddSymbol( "myfunc" , (void*) &myfunc );
return result;
}
and my cleanup function is defined in a similar way:
int myCleanup() {
std::cout << " inside JIT calling C/C++ Cleanup" << std::endl;
return 18;
};
llvm::Function* call_myCleanup (llvm::Module* mod) {
std::vector< const llvm::Type* > FuncTy_ex_args;
llvm::FunctionType* FuncTy_ex = llvm::FunctionType::get( llvm::IntegerType::get( mod->getContext() , 32) , FuncTy_ex_args , false);
llvm::Function* result = llvm::Function::Create(FuncTy_ex, llvm::GlobalValue::ExternalLinkage, "myCleanup", mod);
result->setCallingConv( llvm::CallingConv::C );
llvm::AttrListPtr PAL;
result->setAttributes( PAL );
llvm::sys::DynamicLibrary::AddSymbol( "myCleanup" , (void*) &myCleanup );
return result;
}
I've also read this document regarding recent exception handling changes in LLVM, but is not clear how those changes translate to actual, you know, code
Right now the EH code is undergoing a large amount of revision. The demo, if I recall correctly, is not version 2.9, but current development sources - meaning trying to do something with 2.9 is going to be a world of hurt if you try that way.
That said, the EH representation is much better now and numerous patches have gone in to improve the documentation just this week. If you are trying to write a language that uses exceptions via llvm I highly suggest you migrate your code to current development sources.
All of that said, I'm not sure how well exception handling works in the JIT at all right now. It's nominally supported, but you may need to debug the unwind tables that are put into memory to make sure they're correct.