I have the following code:
#.str_specifier = constant [4 x i8] c"%s\0A\00"
#.int_specifier = constant [4 x i8] c"%d\0A\00"
#.string_var1 = constant [2 x i8] c"f\00"
#.string_var2 = constant [6 x i8] c"Error\00"
; >>> Start Program
declare i32 #printf(i8*, ...)
declare void #exit(i32)
define void #print(i8*) {
call i32 (i8*, ...) #printf(i8* getelementptr ([4 x i8], [4 x i8]* #.str_specifier, i32 0, i32 0), i8* %0)
ret void
}
define void #printi(i32) {
call i32 (i8*, ...) #printf(i8* getelementptr ([4 x i8], [4 x i8]* #.int_specifier, i32 0, i32 0), i32 %0)
ret void
}
declare i8* #malloc(i32)
declare void #free(i8*)
declare void #llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
define void #main()
{ ; >>> Adding function scope
%funcArgs1 = alloca [50 x i32]
; >>> Adding function arguments allocation
; >>> Function body of main
call void #print(i8* getelementptr ([2 x i8], [2 x i8]* #.string_var1, i32 0, i32 0))
%register1 = call i8* #malloc(i32 48)
%register2 = bitcast i8* %register1 to i32*
%register3 = getelementptr inbounds [50 x i32], [50 x i32]* %funcArgs1, i32 0, i32 0
%register4 = ptrtoint i32* %register2 to i32
store i32 %register4, i32* %register3
%register5 = getelementptr inbounds i32, i32* %register2, i32 0
%register6 = add i32 0, 12
store i32 %register6, i32* %register5
%register7 = getelementptr inbounds i32, i32* %register2, i32 1
%register8 = add i32 0, 2
store i32 %register8, i32* %register7
%register9 = getelementptr inbounds i32, i32* %register2, i32 2
store i32 0, i32* %register9
%register10 = getelementptr inbounds i32, i32* %register2, i32 3
store i32 0, i32* %register10
%register11 = getelementptr inbounds i32, i32* %register2, i32 4
store i32 0, i32* %register11
%register12 = getelementptr inbounds i32, i32* %register2, i32 5
store i32 0, i32* %register12
%register13 = getelementptr inbounds i32, i32* %register2, i32 6
store i32 0, i32* %register13
%register14 = getelementptr inbounds i32, i32* %register2, i32 7
store i32 0, i32* %register14
%register15 = getelementptr inbounds i32, i32* %register2, i32 8
store i32 0, i32* %register15
%register16 = getelementptr inbounds i32, i32* %register2, i32 9
store i32 0, i32* %register16
%register17 = getelementptr inbounds i32, i32* %register2, i32 10
store i32 0, i32* %register17
%register18 = getelementptr inbounds i32, i32* %register2, i32 11
store i32 0, i32* %register18
%register19 = load i32, i32* %register3 ; Get variable x
%register20 = add i32 0, 2
%register21 = inttoptr i32 %register20 to i32*
%register22 = getelementptr inbounds i32, i32* %register21, i32 1
%register23 = load i32, i32* %register22
%register24 = getelementptr inbounds i32, i32* %register21, i32 0
%register25 = load i32, i32* %register24
%register26 = add i32 %register23, %register25
%register27 = sub i32 %register26, 4
%register28 = icmp sgt i32 %register20, %register27
br i1 %register28, label %label1, label %label_cont1
label_cont1:
br label %label2
label1:
call void #print(i8* getelementptr ([6 x i8], [6 x i8]* #.string_var2, i32 0, i32 0))
call void #exit(i32 1)
%register200 = add i32 0, 2
br label %label2
label2:
ret void
} ; >>> Closing function scope
For some reason when I run it, it fails with Segmentation fault (core dumped) without printing an understandable error. The strange thing is if I comment the commands in label1 and keep it:
;call void #print(i8* getelementptr ([6 x i8], [6 x i8]* #.string_var2, i32 0, i32 0))
;call void #exit(i32 1)
;%register200 = add i32 0, 2
br label %label2
It does not result with Segmentation fault. If I comment out at least one of those commands (for example print or the sum), it will fail. Why does it happen?
EDIT: I think I'm getting the same result here. (Here with comments)
I understand that "Segmentation fault" means that I tried to access memory that
I do not have access to. but why can't I even create an new register with some value?
EDIT2: It looks like br i1 %register28, label %label1, label %label_cont1 is the real reason.
Edit3: The actual full code I'm trying to figure can be found here. The problem is that changing it to alloca i32 will result with Error (instead of printing 1). It also contains the C code I'm trying to copy to LLVM.
The segfault originates from this line
%register21 = inttoptr i32 %register20 to i32*
After the cast, register21 supposedly points to some memory location. But what memory location ?? It's value is a non existent address that wasn't gotten through a an alloca instr or malloc call.
Therefore all the other registers that try to dereference this pointer get disappointed.
I've altered the inttptr line
I'm writing a LLVM IR pass that changes the index operand of GetElementPtr instruction at runtime.
I succeeded replacing the GEP index with constant integers. For example,
the code below will replace every last index of GEP instructions with 0.
// For each instruction in the function
for(inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I){
// Find GEP instruction
if(auto *GI = dyn_cast<GetElementPtrInst>(&*I)){
GI->setOperand(GI->getNumIndices(), ConstantInt::get(Type::getInt32Ty(I->getContext()), 0));
}
}
the result IR is like this.
Original: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 0
Replace: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 0
Original: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 1
Replace: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 0
The problem is, when I try to change the index by the result of Instruction on runtime, it fails.
Modified pass:
for(inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I){
// Find GEP instruction
if(auto *GI = dyn_cast<GetElementPtrInst>(&*I)){
IRBuilder<> Builder(I);
Instruction* X = Builder.CreateCall(...)
GI->setOperand(GI->getNumIndices(), X);
}
}
Result of the modified pass:
Original: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 0
Replace: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, void <badref>
Original: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 1
Replace: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, void <badref>
GEP indexes must be integers
%7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, void <badref>
GEP indexes must be integers
%9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, void <badref>
I also tried to get the constant integer value of the returned value by
I->setOperand(I->getNumIndices(), ConstantInt::get(Type::getInt32Ty(I->getContext()), cast<ConstantInt>(X)->getZExtValue()));
but also doesn't work.
Original: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 0
Replace: %7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 784505880
Original: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 1
Replace: %9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 784506264
Invalid indices for GEP pointer type!
%7 = getelementptr inbounds %struct.A, %struct.A* %6, i32 0, i32 784505880
Invalid indices for GEP pointer type!
%9 = getelementptr inbounds %struct.A, %struct.A* %8, i32 0, i32 784506264
I think the reason is that it is impossible to set the GEP index by the runtime results. Then what should I do to change every indices of GEP on runtime?
Do I need to replace the GEP instruction with some address additions and memory access instruction?
Note the error message: GEP indexes must be integers. If the call is to a function that returns int, then it can work. It doesn't always work — you can call foo() and use the result to get the foo()'th element of an array, but when you're retrieving a struct field, you have to have a constant.
In your second case, you're asking for the 784505880th field of the struct. That's either a bug or an amazingly wide struct ;)
AFAIK, using setOperand() directly is unsafe. Instead, get a pointer to the operand you want to change and call GI->replaceUsesOfWith(oldOp, newOp).
I have a simple C program.
int
1.main(int argc, char **argv) {
2. unsigned buffer[4] = { 0, 0, 0, 0 };
3. return buffer[argc];
4. }
And the IR code is as below
; Function Attrs: norecurse nounwind readnone uwtable
define i32 #main(i32 %argc, i8** nocapture readnone %argv) #0 !dbg !6 {
%buffer = alloca [4 x i32], align 16
tail call void #llvm.dbg.value(metadata i32 %argc, i64 0, metadata !14, metadata !21), !dbg !22
tail call void #llvm.dbg.value(metadata i8** %argv, i64 0, metadata !15, metadata !21), !dbg !23
%1 = bitcast [4 x i32]* %buffer to i8*, !dbg !24
call void #llvm.lifetime.start(i64 16, i8* %1) #3, !dbg !24
tail call void #llvm.dbg.declare(metadata [4 x i32]* %buffer, metadata !16, metadata !21), !dbg !25
call void #llvm.memset.p0i8.i64(i8* %1, i8 0, i64 16, i32 16, i1 false), !dbg !26
%2 = sext i32 %argc to i64, !dbg !28
%3 = getelementptr inbounds [4 x i32], [4 x i32]* %buffer, i64 0, i64 %2, !dbg !28
%4 = load i32, i32* %3, align 4, !dbg !28, !tbaa !29
call void #llvm.lifetime.end(i64 16, i8* %1) #3, !dbg !33
ret i32 %4, !dbg !34
}
I want to compare whether the accessing index at line 3 is a valid index. For this comparison I need to extract the value stored for the argc. Below is piece of code I have written to obain the vvalue of argc
auto gep = llvm::dyn_cast<llvm::GetElementPtrInst>(inst);
auto operand2 = gep->getOperand(2);
outs() << "operand 2 "<<*operand2<<"\n";
auto newOperand =operand2.getOperand(0);
outs()<<"New operand "<<*newOperand<<"\n";
Output :-
operand 2 %2 = sext i32 %argc to i64, !dbg !28
New operand i32 %argc
How can I get the value of %argc?
The return value of getOperand function is Value* object of the argc variable (variable newOperand in your code). You can pass that value to any new instruction that you might want to inject (for example CreateICmpEQ) in the IR to compare value of argc with some constant value.
I am writing a llvm-ir code which involves vector operations. I did a integer vector comparison with 'icmp' instruction which resulted in a vector of bools say <8 x i1>, my problem is I want to convert this 8 bits to its corresponding integer value with out traversing the vector(extracting elements from vector), I tried 'bitcast <8 x i1> to i8' which seems converting first bit of the vector to i8, correct me if am wrong. Can someone suggest me a way to do this.
define i8 #main() #0 {
entry:
%A = alloca [8 x i32], align 16
%B = alloca [8 x i32], align 16
%arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %A, i64 0, i64 0
store i32 90, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds [8 x i32], [8 x i32]* %A, i64 0, i64 1
store i32 91, i32* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds [8 x i32], [8 x i32]* %A, i64 0, i64 2
store i32 92, i32* %arrayidx2, align 8
%arrayidx3 = getelementptr inbounds [8 x i32], [8 x i32]* %A, i64 0, i64 3
store i32 93, i32* %arrayidx3, align 4
%arrayidx4 = getelementptr inbounds [8 x i32], [8 x i32]* %B, i64 0, i64 0
store i32 90, i32* %arrayidx4, align 4
%arrayidx5 = getelementptr inbounds [8 x i32], [8 x i32]* %B, i64 0, i64 1
store i32 1, i32* %arrayidx5, align 4
%arrayidx6 = getelementptr inbounds [8 x i32], [8 x i32]* %B, i64 0, i64 2
store i32 92, i32* %arrayidx6, align 8
%arrayidx7 = getelementptr inbounds [8 x i32], [8 x i32]* %B, i64 0, i64 3
store i32 93, i32* %arrayidx7, align 4
br label %vector.body
vector.body:
%0 = bitcast [8 x i32]* %A to <8 x i32>*
%1 = bitcast [8 x i32]* %B to <8 x i32>*
%2 = load <8 x i32>, <8 x i32>* %0
%3 = load <8 x i32>, <8 x i32>* %1
%4 = icmp eq <8 x i32> %2, %3
%5 = bitcast <8 x i1> %4 to i8
ret i8 %5;
}
am using 'lli' for running this code with out any flags. Output is expected to be 11 but am getting 1 or 0
Thank you so much in advance.
As far as I inderstand, you can't do that without calling a platform specific intrinsic. I noticed that by being unable to write target independant code in c++.
For example, the code below:
typedef int v8i __attribute__((vector_size(32)));
int main() {
v8i a = { 1, 2, 3, 4, 5, 6, 7, 8};
v8i b = { 0, 2, 3, 4, 5, 6, 7, 0};
v8i cmp = (a == b);
char res = *(char*)&cmp;
printf("%d\n", res);
return 0;
}
produces llvm-IR which is quite close from what you wrote (with the appropriate bitcast).
Unfortunately it didn't work as expected.
That's because <8 x i1> doesn't exist on the processor. For example, in x86 AVX2, _mm256_cmpeq_epi32 yields a __m256i.
Bitcasting that to a char will just take the first 8 bits of that register.
I wrote instead intel AVX2 specific code, and found the appropriate instruction : intel intrinsic guide
So this code does what you need:
#include <cstdio>
#include <cstdlib>
#include <immintrin.h>
int main() {
__m256i a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
__m256i b = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 0);
__m256i eq = _mm256_cmpeq_epi32(a, b);
int res = _mm256_movemask_ps(_mm256_castsi256_ps(eq));
printf("res = %d\n", res);
for(int i = 0; i < 8; ++i) {
printf("%d %d -> %d\n", _mm256_extract_epi32(a, i), _mm256_extract_epi32(b, i), !!((res << i) & 0x80));
}
return 0;
}
In terms of ll code, it turns out you need a few additional bitcast (to float), and a call to the intrinsic
#llvm.x86.avx.movmsk.ps.256
rewriting by hand the llvm-IR code leads to :
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
#formatString = private constant [4 x i8] c"%d\0A\00"
define i32 #main() #0 {
%a = alloca <8 x i32>, align 32
%b = alloca <8 x i32>, align 32
store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32>* %a, align 32
store <8 x i32> <i32 0, i32 2, i32 3, i32 0, i32 5, i32 0, i32 7, i32 0>, <8 x i32>* %b, align 32
%1 = load <8 x i32>, <8 x i32>* %a, align 32
%2 = load <8 x i32>, <8 x i32>* %b, align 32
%3 = icmp eq <8 x i32> %1, %2
%4 = sext <8 x i1> %3 to <8 x i32>
%5 = bitcast <8 x i32> %4 to <8 x float>
%res = call i32 #llvm.x86.avx.movmsk.ps.256(<8 x float> %5)
%6 = call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* #formatString, i32 0, i32 0), i32 %res)
ret i32 0
}
declare i32 #llvm.x86.avx.movmsk.ps.256(<8 x float>) #1
declare i32 #printf(i8*, ...) #2
attributes #0 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512pf,-avx512vl,-fma4,-hle,-pku,-prfchw,-rdseed,-rtm,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512pf,-avx512vl,-fma4,-hle,-pku,-prfchw,-rdseed,-rtm,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
The generated assembly (by llc) looks quite optimal:
vmovaps .LCPI0_0(%rip), %ymm0 # ymm0 = [1,2,3,4,5,6,7,8]
vmovaps %ymm0, 32(%rsp)
vmovdqa .LCPI0_1(%rip), %ymm0 # ymm0 = [0,2,3,0,5,0,7,0]
vmovdqa %ymm0, (%rsp)
vpcmpeqd 32(%rsp), %ymm0, %ymm0
vmovmskps %ymm0, %esi
I found this way working.
define i8 #main() #0 {
entry:
%0 = icmp eq <8 x i32> <i32 90,i32 91,i32 92,i32 93, i32 94,i32 95,i32 96,i32 97>, <i32 90,i32 91,i32 92,i32 93, i32 94,i32 95,i32 96,i32 97>
%1 = bitcast <8 x i1> %0 to <1 x i8>
%2 = extractelement <1 x i8> %1, i32 0
ret i8 %2
}
This is similar code as I posted in the question, I checked the result with "echo $?" am getting the result as expected.