Executing LLVM code results with Segmentation fault - llvm

I have the following code:
#.str_specifier = constant [4 x i8] c"%s\0A\00"
#.int_specifier = constant [4 x i8] c"%d\0A\00"
#.string_var1 = constant [2 x i8] c"f\00"
#.string_var2 = constant [6 x i8] c"Error\00"
; >>> Start Program
declare i32 #printf(i8*, ...)
declare void #exit(i32)
define void #print(i8*) {
call i32 (i8*, ...) #printf(i8* getelementptr ([4 x i8], [4 x i8]* #.str_specifier, i32 0, i32 0), i8* %0)
ret void
}
define void #printi(i32) {
call i32 (i8*, ...) #printf(i8* getelementptr ([4 x i8], [4 x i8]* #.int_specifier, i32 0, i32 0), i32 %0)
ret void
}
declare i8* #malloc(i32)
declare void #free(i8*)
declare void #llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
define void #main()
{ ; >>> Adding function scope
%funcArgs1 = alloca [50 x i32]
; >>> Adding function arguments allocation
; >>> Function body of main
call void #print(i8* getelementptr ([2 x i8], [2 x i8]* #.string_var1, i32 0, i32 0))
%register1 = call i8* #malloc(i32 48)
%register2 = bitcast i8* %register1 to i32*
%register3 = getelementptr inbounds [50 x i32], [50 x i32]* %funcArgs1, i32 0, i32 0
%register4 = ptrtoint i32* %register2 to i32
store i32 %register4, i32* %register3
%register5 = getelementptr inbounds i32, i32* %register2, i32 0
%register6 = add i32 0, 12
store i32 %register6, i32* %register5
%register7 = getelementptr inbounds i32, i32* %register2, i32 1
%register8 = add i32 0, 2
store i32 %register8, i32* %register7
%register9 = getelementptr inbounds i32, i32* %register2, i32 2
store i32 0, i32* %register9
%register10 = getelementptr inbounds i32, i32* %register2, i32 3
store i32 0, i32* %register10
%register11 = getelementptr inbounds i32, i32* %register2, i32 4
store i32 0, i32* %register11
%register12 = getelementptr inbounds i32, i32* %register2, i32 5
store i32 0, i32* %register12
%register13 = getelementptr inbounds i32, i32* %register2, i32 6
store i32 0, i32* %register13
%register14 = getelementptr inbounds i32, i32* %register2, i32 7
store i32 0, i32* %register14
%register15 = getelementptr inbounds i32, i32* %register2, i32 8
store i32 0, i32* %register15
%register16 = getelementptr inbounds i32, i32* %register2, i32 9
store i32 0, i32* %register16
%register17 = getelementptr inbounds i32, i32* %register2, i32 10
store i32 0, i32* %register17
%register18 = getelementptr inbounds i32, i32* %register2, i32 11
store i32 0, i32* %register18
%register19 = load i32, i32* %register3 ; Get variable x
%register20 = add i32 0, 2
%register21 = inttoptr i32 %register20 to i32*
%register22 = getelementptr inbounds i32, i32* %register21, i32 1
%register23 = load i32, i32* %register22
%register24 = getelementptr inbounds i32, i32* %register21, i32 0
%register25 = load i32, i32* %register24
%register26 = add i32 %register23, %register25
%register27 = sub i32 %register26, 4
%register28 = icmp sgt i32 %register20, %register27
br i1 %register28, label %label1, label %label_cont1
label_cont1:
br label %label2
label1:
call void #print(i8* getelementptr ([6 x i8], [6 x i8]* #.string_var2, i32 0, i32 0))
call void #exit(i32 1)
%register200 = add i32 0, 2
br label %label2
label2:
ret void
} ; >>> Closing function scope
For some reason when I run it, it fails with Segmentation fault (core dumped) without printing an understandable error. The strange thing is if I comment the commands in label1 and keep it:
;call void #print(i8* getelementptr ([6 x i8], [6 x i8]* #.string_var2, i32 0, i32 0))
;call void #exit(i32 1)
;%register200 = add i32 0, 2
br label %label2
It does not result with Segmentation fault. If I comment out at least one of those commands (for example print or the sum), it will fail. Why does it happen?
EDIT: I think I'm getting the same result here. (Here with comments)
I understand that "Segmentation fault" means that I tried to access memory that
I do not have access to. but why can't I even create an new register with some value?
EDIT2: It looks like br i1 %register28, label %label1, label %label_cont1 is the real reason.
Edit3: The actual full code I'm trying to figure can be found here. The problem is that changing it to alloca i32 will result with Error (instead of printing 1). It also contains the C code I'm trying to copy to LLVM.

The segfault originates from this line
%register21 = inttoptr i32 %register20 to i32*
After the cast, register21 supposedly points to some memory location. But what memory location ?? It's value is a non existent address that wasn't gotten through a an alloca instr or malloc call.
Therefore all the other registers that try to dereference this pointer get disappointed.
I've altered the inttptr line

Related

LLVM getelementptr indices use/meaning

I just started learning LLVM and I am wondering why we have two indices in getelementptr? what are the first and second indices (0 and 0) used for?
#tmp = global [18 x i8] c"Hello world!: %d\0A\00"
declare i32 #printf(i8* %0, ...)
define i32 #fact(i32 %x) {
0:
%1 = icmp sle i32 %x, 0
br i1 %1, label %2, label %3
2:
ret i32 1
3:
%4 = sub i32 %x, 1
%5 = call i32 #fact(i32 %4)
%6 = mul i32 %x, %5
ret i32 %6
}
define i32 #main() {
entry:
%0 = getelementptr [18 x i8], [18 x i8]* #tmp, i32 0, i32 0 ; <---- HERE
%1 = call i32 #fact(i32 23)
%2 = call i32 (i8*, ...) #printf(i8* %0, i32 %1)
ret i32 1
}
enter code here

LLVM IR wired behavior on memory alignment of struct?

I get wired behavior on memory alignment of struct(LLVM 10), it doesn't match my learning of memory alignment.
For below c++ code:
struct CC {
char c1 = 'a';
double d1 = 2.0;
int i1 = 12;
bool b1 = true;
int i2 = 13;
bool b2 = true;
} cc1;
int main() {
CC cc2;
}
And it will generate IR like:
%struct.CC = type <{ i8, [7 x i8], double, i32, i8, [3 x i8], i32, i8, [3 x i8] }>
#cc1 = global { i8, double, i32, i8, i32, i8 } { i8 97, double 2.000000e+00, i32 12, i8 1, i32 13, i8 1 }, align 8
define linkonce_odr void #_ZN2CCC2Ev(%struct.CC*) unnamed_addr #1 align 2 {
%2 = alloca %struct.CC*, align 8
store %struct.CC* %0, %struct.CC** %2, align 8
%3 = load %struct.CC*, %struct.CC** %2, align 8
%4 = getelementptr inbounds %struct.CC, %struct.CC* %3, i32 0, i32 0
store i8 97, i8* %4, align 8
%5 = getelementptr inbounds %struct.CC, %struct.CC* %3, i32 0, i32 2
store double 2.000000e+00, double* %5, align 8
%6 = getelementptr inbounds %struct.CC, %struct.CC* %3, i32 0, i32 3
store i32 12, i32* %6, align 8
%7 = getelementptr inbounds %struct.CC, %struct.CC* %3, i32 0, i32 4
store i8 1, i8* %7, align 4
%8 = getelementptr inbounds %struct.CC, %struct.CC* %3, i32 0, i32 6
store i32 13, i32* %8, align 8
%9 = getelementptr inbounds %struct.CC, %struct.CC* %3, i32 0, i32 7
store i8 1, i8* %9, align 4
ret void
}
My Questions:
Must %struct.CC have to add extra data type([7xi8], [3xi8]) for alignment? Is there another way to align struct type?
Why #cc1 doesn't use %struct.CC?
Why #cc1 doesn't add extra data type for alignment?
Why i1 aligns 8 not 4 while using store? What if aligns 4?
Why i2 aligns 8 not 4 while using store?
Too many questions, very very grateful if anyone can answer some of them.
The answer to almost all questions is the same: Platform C/C++ ABI. LLVM (or rather clang frontend) does the necessary thing to do the struct layout as prescribed by the ABI. In order to do so it could add necessary padding as struct members should have proper alignments.

I want to change the index of GEP by the result of previous instruction

I'm writing a LLVM IR pass that changes the index operand of GetElementPtr instruction at runtime.
I succeeded replacing the GEP index with constant integers. For example,
the code below will replace every last index of GEP instructions with 0.
// For each instruction in the function
for(inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I){
// Find GEP instruction
if(auto *GI = dyn_cast<GetElementPtrInst>(&*I)){
GI->setOperand(GI->getNumIndices(), ConstantInt::get(Type::getInt32Ty(I->getContext()), 0));
}
}
the result IR is like this.
Original: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 0
Replace: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 0
Original: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 1
Replace: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 0
The problem is, when I try to change the index by the result of Instruction on runtime, it fails.
Modified pass:
for(inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I){
// Find GEP instruction
if(auto *GI = dyn_cast<GetElementPtrInst>(&*I)){
IRBuilder<> Builder(I);
Instruction* X = Builder.CreateCall(...)
GI->setOperand(GI->getNumIndices(), X);
}
}
Result of the modified pass:
Original: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 0
Replace: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, void <badref>
Original: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 1
Replace: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, void <badref>
GEP indexes must be integers
%7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, void <badref>
GEP indexes must be integers
%9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, void <badref>
I also tried to get the constant integer value of the returned value by
I->setOperand(I->getNumIndices(), ConstantInt::get(Type::getInt32Ty(I->getContext()), cast<ConstantInt>(X)->getZExtValue()));
but also doesn't work.
Original: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 0
Replace: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 784505880
Original: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 1
Replace: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 784506264
Invalid indices for GEP pointer type!
%7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 784505880
Invalid indices for GEP pointer type!
%9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 784506264
I think the reason is that it is impossible to set the GEP index by the runtime results. Then what should I do to change every indices of GEP on runtime?
Do I need to replace the GEP instruction with some address additions and memory access instruction?
Note the error message: GEP indexes must be integers. If the call is to a function that returns int, then it can work. It doesn't always work — you can call foo() and use the result to get the foo()'th element of an array, but when you're retrieving a struct field, you have to have a constant.
In your second case, you're asking for the 784505880th field of the struct. That's either a bug or an amazingly wide struct ;)
AFAIK, using setOperand() directly is unsafe. Instead, get a pointer to the operand you want to change and call GI->replaceUsesOfWith(oldOp, newOp).

Create local string using LLVM

I'm trying to create a local variable using LLVM to store strings, but my code is currently throwing a syntax error.
lli: test2.ll:8:23: error: constant expression type mismatch
%1 = load [6 x i8]* c"hello\00"
My IR code that allocates and store the string:
#.string = private constant [4 x i8] c"%s\0A\00"
define void #main() {
entry:
%a = alloca [255 x i8]
%0 = bitcast [255 x i8]* %a to i8*
%1 = load [6 x i8]* c"hello\00"
%2 = bitcast [6 x i8]* %1 to i8*
%3 = tail call i8* #strncpy(i8* %0, i8* %2, i64 255) nounwind
%4 = getelementptr inbounds [6 x i8]* %a, i32 0, i32 0
%5 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([4 x i8]* #.string, i32 0, i32 0), i8* %4)
ret void
}
declare i32 #printf(i8*, ...)
declare i8* #strncpy(i8*, i8* nocapture, i64) nounwind
Using llc I could see that the way llvm implements is allocating and assigning to a global variable, but I want it to be local (inside a basic block). The code below works, but I don't want to create this var "#.str"...
#str = global [1024 x i8] zeroinitializer, align 16
#.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
#.string = private constant [4 x i8] c"%s\0A\00"
define i32 #main() nounwind uwtable {
%1 = tail call i8* #strncpy(i8* getelementptr inbounds ([1024 x i8]* #str, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* #.str, i64 0, i64 0), i64 1024) nounwind
%2 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([4 x i8]* #.string, i32 0, i32 0), i8* %1)
ret i32 0
}
declare i8* #strncpy(i8*, i8* nocapture, i64) nounwind
declare i32 #printf(i8*, ...) #2
Thanks
I figured out by myself after messing more with my previous code.
Below is the code, so people who had the same problem as I had can check
#.string = private constant [4 x i8] c"%s\0A\00"
define void #main() {
entry:
%a = alloca [6 x i8]
store [6 x i8] [i8 104,i8 101,i8 108,i8 108, i8 111, i8 0], [6 x i8]* %a
%0 = bitcast [6 x i8]* %a to i8*
%1 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([4 x i8]* #.string, i32 0, i32 0), i8* %0)
ret void
}
declare i32 #printf(i8*, ...)
Basically, I had to store each of the characters individually in the array and then bitcast to i8* so I could use the printf function. I couldn't use the c" ... " method which is the one shown in LLVM webpage http://llvm.org/docs/LangRef.html#id669 . It seems it is a special case in the language specification of the IR and they required to be in the global scope.
UPDATE: I was working on the same code again and I found out that the best way was to store a constant instead of each of the i8 symbols. So the line 6, will be replaced by:
store [6 x i8] c"hello\00", [6 x i8]* %0
It is easier to generate code using llvm and it's more readable!

LLVM intrinsic functions

When building a project with LLVM, some function calls will be replaced by intrinsic functions. Is the replacement completed by the front-end (e.g. clang) or the LLVM back-end?
Discussions through the Internet indicate that the intrinsic functions replacement is related to optimization options. So does it mean if there is no optimization option, then no intrinsic replacement will happen? Or in fact, there are some default intrinsic functions replacement that cannot be disabled?
If there is any method to disable all the intrinsic functions, how should I do that?
It depends. Intrinsics written in code are emitted through the front-end directly. Intrinsics like llvm.memset are introduced to the code during optimization at IR level (eigther the front-end nor the back-end perform this optimizations).
Here is a (quite stupid) example:
int main(int argc, char** argv)
{
int a[8];
for (int i = 0; i != 8; ++i)
a[i] = 0;
for (int i = 7; i >= 0; --i)
a[i] = a[i+1] + argc;
return a[0];
}
Compiled with clang 3.5 (clang -S -emit-llvm) you will get the following IR without any intrinsics:
; Function Attrs: nounwind uwtable
define i32 #main(i32 %argc, i8** %argv) #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i8**, align 8
%a = alloca [8 x i32], align 16
%i = alloca i32, align 4
%i1 = alloca i32, align 4
store i32 0, i32* %1
store i32 %argc, i32* %2, align 4
store i8** %argv, i8*** %3, align 8
store i32 0, i32* %i, align 4
br label %4
; <label>:4 ; preds = %11, %0
%5 = load i32* %i, align 4
%6 = icmp ne i32 %5, 8
br i1 %6, label %7, label %14
; <label>:7 ; preds = %4
%8 = load i32* %i, align 4
%9 = sext i32 %8 to i64
%10 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 %9
store i32 0, i32* %10, align 4
br label %11
; <label>:11 ; preds = %7
%12 = load i32* %i, align 4
%13 = add nsw i32 %12, 1
store i32 %13, i32* %i, align 4
br label %4
; <label>:14 ; preds = %4
store i32 7, i32* %i1, align 4
br label %15
; <label>:15 ; preds = %29, %14
%16 = load i32* %i1, align 4
%17 = icmp sge i32 %16, 0
br i1 %17, label %18, label %32
; <label>:18 ; preds = %15
%19 = load i32* %i1, align 4
%20 = add nsw i32 %19, 1
%21 = sext i32 %20 to i64
%22 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 %21
%23 = load i32* %22, align 4
%24 = load i32* %2, align 4
%25 = add nsw i32 %23, %24
%26 = load i32* %i1, align 4
%27 = sext i32 %26 to i64
%28 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 %27
store i32 %25, i32* %28, align 4
br label %29
; <label>:29 ; preds = %18
%30 = load i32* %i1, align 4
%31 = add nsw i32 %30, -1
store i32 %31, i32* %i1, align 4
br label %15
; <label>:32 ; preds = %15
%33 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 0
%34 = load i32* %33, align 4
ret i32 %34
}
Compiled again with clang -emit-llvm -O1 you will see this:
; Function Attrs: nounwind readnone uwtable
define i32 #main(i32 %argc, i8** nocapture readnone %argv) #0 {
.preheader:
%a = alloca [8 x i32], align 16
%a6 = bitcast [8 x i32]* %a to i8*
call void #llvm.memset.p0i8.i64(i8* %a6, i8 0, i64 32, i32 4, i1 false)
br label %0
; <label>:0 ; preds = %.preheader, %0
%indvars.iv = phi i64 [ 7, %.preheader ], [ %indvars.iv.next, %0 ]
%1 = add nsw i64 %indvars.iv, 1
%2 = getelementptr inbounds [8 x i32]* %a, i64 0, i64 %1
%3 = load i32* %2, align 4, !tbaa !1
%4 = add nsw i32 %3, %argc
%5 = getelementptr inbounds [8 x i32]* %a, i64 0, i64 %indvars.iv
store i32 %4, i32* %5, align 4, !tbaa !1
%indvars.iv.next = add nsw i64 %indvars.iv, -1
%6 = trunc i64 %indvars.iv to i32
%7 = icmp sgt i32 %6, 0
br i1 %7, label %0, label %8
; <label>:8 ; preds = %0
%9 = getelementptr inbounds [8 x i32]* %a, i64 0, i64 0
%10 = load i32* %9, align 16, !tbaa !1
ret i32 %10
}
The initialization loop was replaced by the llvm.memset intrinsic. The back-end is free to handle the intrinsic as it want's but commonly llvm.memset is lowered to a libc library call.
To answer your first question: Yes, if you don't optimize your code, then you will not get intrinsics in your IR.
To prevent intrinsics being introduced in your code all you have to do is find the optimization pass on your IR and don't run it. Here is a related question how to find out what passes are done on the IR: Where to find the optimization sequence for clang -OX?
for -O1 we get:
prune-eh -inline-cost -always-inline -functionattrs -sroa -domtree
-early-cse -lazy-value-info -jump-threading -correlated-propagation -simplifycfg -instcombine -tailcallelim -simplifycfg -reassociate -domtree -loops -loop-simplify -lcssa -loop-rotate -licm -loop-unswitch -instcombine -scalar-evolution -lcssa -indvars -loop-idiom -loop-deletion -loop-unroll -memdep -memcpyopt -sccp -instcombine -lazy-value-info -jump-threading -correlated-propagation -domtree -memdep -dse -adce -simplifycfg -instcombine -barrier -domtree -loops -loop-simplify -lcssa -branch-prob -block-freq -scalar-evolution -loop-vectorize -instcombine -simplifycfg -strip-dead-prototypes -verify
A wild guess: instcombine is introducing the llvm.memset. I run the passes without instcombine and opt on the unoptimized IR and get this:
; Function Attrs: nounwind readnone uwtable
define i32 #main(i32 %argc, i8** %argv) #0 {
%a = alloca [8 x i32], align 16
%1 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 8
%2 = load i32* %1, align 4
%3 = add nsw i32 %2, %argc
%4 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 7
store i32 %3, i32* %4, align 4
%5 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 7
%6 = load i32* %5, align 4
%7 = add nsw i32 %6, %argc
%8 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 6
store i32 %7, i32* %8, align 4
%9 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 6
%10 = load i32* %9, align 4
%11 = add nsw i32 %10, %argc
%12 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 5
store i32 %11, i32* %12, align 4
%13 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 5
%14 = load i32* %13, align 4
%15 = add nsw i32 %14, %argc
%16 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 4
store i32 %15, i32* %16, align 4
%17 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 4
%18 = load i32* %17, align 4
%19 = add nsw i32 %18, %argc
%20 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 3
store i32 %19, i32* %20, align 4
%21 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 3
%22 = load i32* %21, align 4
%23 = add nsw i32 %22, %argc
%24 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 2
store i32 %23, i32* %24, align 4
%25 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 2
%26 = load i32* %25, align 4
%27 = add nsw i32 %26, %argc
%28 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 1
store i32 %27, i32* %28, align 4
%29 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 1
%30 = load i32* %29, align 4
%31 = add nsw i32 %30, %argc
%32 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 0
store i32 %31, i32* %32, align 4
%33 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 0
%34 = load i32* %33, align 4
ret i32 %34
}
No instructions. So to prevent (at least the memset) intrinsics in your code don't run instcombine on your IR. However, instcombine is a mighty opt pass that realy shortens the code.
Now you have two options:
don't use opt passes that introduce intrinsics
write your own llvm
opt pass that transforms intrinsics back to whatever they could be
replaced with an run it after optimization and before the back-end
starts working
I hope this helps you somehow. Cheers!