LLVM IR get a pointer from a llvalue - llvm

I'm working with the LLVM IR in Ocaml to build a toy language and, now my problem is to convert the variable into the reference to this variable.
In other words, my simple program is this
int main(){
int i;
i = 2;
int *p;
p = &i;
print(*p);
return 0;
}
and my problem is to get the pointer of the variable i in the instruction p = &i;, my actual
IR generated is
define i32 #main() {
entry:
%i = alloca i32
store i32 2, i32* %i
%p = alloca i32*
%0 = getelementptr i32, i32* %i, i32 0
store i32* %0, i32** %p
%1 = load i32*, i32** %p
%2 = load i32, i32* %1
call void #print(i32 %2)
ret i32 0
}
I don't like this line %0 = getelementptr i32, i32* %i, i32 0, and I think that I'm only lucky that my code work as expected.
To summarize my question is, What is the good practice to make this memory operation with a variable like C language? In particular, I need to to the following
i = 2;
int *p;
p = &i;
And also
int *p;
p = &i;
*p = *p + 2;
I'm missing something because when I try to compile code like that *p = *p + 2; I receive some core dump.
I noted also that clang for my first example doesn't use getelementptr, but generate some code like that
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32*, align 8
store i32 0, i32* %1, align 4
store i32 2, i32* %2, align 4
store i32* %2, i32** %3, align 8
%4 = load i32*, i32** %3, align 8
%5 = load i32, i32* %4, align 4
%6 = call i32 (i32, ...) bitcast (i32 (...)* #print to i32 (i32, ...)*)(i32 %5)
ret i32 0
}
In my grammar, the *p is a pointer and I convert it into llvm IR into an llvm pointer type.

Related

How do I eliminate LLVM function calls and replace them with basic instructions?

My Problem
I am new to LLVM and C++.
I am currently creating an LLVM backend compiler and need to replace LLVM function calls with the instructions in its definition.
Is there already an existing pass that accomplishes this?
Examples
For example, I have the following C code, compiled to LLVM IR with clang-14 -S -emit-llvm.
int add(int a, int b) {
return a + b;
}
int main() {
int a = 10;
int b = 20;
int c = add(a, b);
return c;
}
Then, I get a LLVM IR code below.
define dso_local i32 #add(i32 noundef %a, i32 noundef %b) #0 {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 %b, i32* %b.addr, align 4
%0 = load i32, i32* %a.addr, align 4
%1 = load i32, i32* %b.addr, align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #main() #0 {
entry:
%retval = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 10, i32* %a, align 4
store i32 20, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%call = call i32 #add(i32 noundef %0, i32 noundef %1)
store i32 %call, i32* %c, align 4
%2 = load i32, i32* %c, align 4
ret i32 %2
}
I want to replace the function call #add with instructions in it's definition from the code above using opt command, and emit the following new code.
define dso_local i32 #main() #0 {
entry:
%retval = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 10, i32* %a, align 4
store i32 20, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%add = add nsw i32 %0, %1
store i32 %add, i32* %c, align 4
%2 = load i32, i32* %c, align 4
ret i32 %2
}
I searched the following sites for such a path, but could not find one suitable.
https://llvm.org/docs/Passes.html#loops-natural-loop-information

LLVM: Instruction does not dominate all uses - No control flow

I implemented a function pass which iterates over basic block instructions and tracks all instructions that have a type of IntegerTy.
Here is the snippet of the pass that does it:
if (!I->isTerminator()){
Type::TypeID datatype = I->getType()->getTypeID();
if (datatype == llvm::Type::IntegerTyID) {
IRBuilder<> IRB(I);
Value* v_value = IRB.CreateZExt(I, IRB.getInt64Ty());
Value *args[] = {v_value};
IRB.CreateCall(NNT_log_int, args);
}
}
However the IRB.CreateZExt(I, IRB.getInt64Ty()); command seems to create a Instruction does not dominate all uses! problem.
I understand the nature of the issue (here and here there are similar problems).
My point of confusion that I apply this pass to a toy program with no if statements or any other control flow statements, yet I still encounter this problem.
The error message:
Instruction does not dominate all uses!
%2 = load i32, i32* %y, align 4
%1 = zext i32 %2 to i64
Instruction does not dominate all uses!
%4 = load i32, i32* %y, align 4
%3 = zext i32 %4 to i64
Note the fact that the inserted zext instructions name a constant with a counter number less than the previous instruction - I think this is the problem but I have no idea why my pass does this!!!
Here is the IR of my toy program before the application of the pass:
; Function Attrs: noinline nounwind optnone uwtable
define i32 #_Z3fooi(i32 %x) #4 {
entry:
%x.addr = alloca i32, align 4
%y = alloca i32, align 4
%z = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
store i32 0, i32* %y, align 4
%0 = load i32, i32* %x.addr, align 4
%add = add nsw i32 %0, 3
store i32 %add, i32* %y, align 4
%1 = load i32, i32* %y, align 4
store i32 %1, i32* %x.addr, align 4
%2 = load i32, i32* %y, align 4
ret i32 %2
}
; Function Attrs: noinline nounwind optnone uwtable
define i32 #_Z3bari(i32 %panos) #4 {
entry:
%panos.addr = alloca i32, align 4
%y = alloca i32, align 4
store i32 %panos, i32* %panos.addr, align 4
%0 = load i32, i32* %panos.addr, align 4
%add = add nsw i32 %0, 2
store i32 %add, i32* %y, align 4
%1 = load i32, i32* %y, align 4
ret i32 %1
}
Also, note that that the problematic instructions are before a terminator - Again I think that this is related.
Any ideas will be highly appreciated !
Your zext instruction uses I, but you're inserting it before I. When you create the IRBuilder, you should pass in the instruction after I as the insert point. For example like this:
IRBuilder<> IRB(I->getNextNode());

LLVM API optimisation run

I am trying to perform -O2 optimisation with LLVM IR obtained by calling CLANG API. Unfortunately, optimisation works only with IR created with manual calls. I have the following function:
int mult_add(int x, int y){
if(x > 2){
return y + 1 + 2;
} else {
return y - 1 + 2;
}
}
And with these calls:
clang -S -emit-llvm main.cpp
opt main.ll -o opt.ll -S -O2
I get the correct result:
define i32 #_Z8mult_addii(i32, i32) local_unnamed_addr #0 {
%3 = icmp sgt i32 %0, 2
%.sink = select i1 %3, i32 3, i32 1
%4 = add nsw i32 %.sink, %1
ret i32 %4
}
Unfortunately, when I do it through LLVM API with legacy::PassManager and legacy::FunctionPassManager optimisation simply does not work and got long ugly code:
define i32 #_Z8mult_addii(i32, i32) #0 {
%3 = alloca i32, align 4
%4 = alloca i32, align 4
%5 = alloca i32, align 4
store i32 %0, i32* %4, align 4
store i32 %1, i32* %5, align 4
%6 = load i32, i32* %4, align 4
%7 = icmp sgt i32 %6, 2
br i1 %7, label %8, label %12
; <label>:8: ; preds = %2
%9 = load i32, i32* %5, align 4
%10 = add nsw i32 %9, 1
%11 = add nsw i32 %10, 2
store i32 %11, i32* %3, align 4
br label %16
; <label>:12: ; preds = %2
%13 = load i32, i32* %5, align 4
%14 = sub nsw i32 %13, 1
%15 = add nsw i32 %14, 2
store i32 %15, i32* %3, align 4
br label %16
; <label>:16: ; preds = %12, %8
%17 = load i32, i32* %3, align 4
ret i32 %17
}
Seems like CLANG creates IR in some unoptimisable state? Because running the passes on a manual created IR works fine.
By the way, PMBuilder.populateModulePassManager is called, here is the code:
legacy::PassManager Passes;
legacy::FunctionPassManager FPasses(M2.get());
AddOptimizationPasses(Passes, FPasses, &(TheJIT->getTargetMachine()), 2, 0);
Passes.add(createPrintModulePass(outs()));
Passes.run(*M2);
And AddOptimizationPasses is stolen and simplified from opt utility:
static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
legacy::FunctionPassManager &FPM,
TargetMachine *TM, unsigned OptLevel,
unsigned SizeLevel) {
FPM.add(createVerifierPass());
PassManagerBuilder Builder;
Builder.OptLevel = OptLevel;
Builder.SizeLevel = SizeLevel;
Builder.Inliner = createFunctionInliningPass(50);
Builder.DisableUnitAtATime = true;//!UnitAtATime;
Builder.DisableUnrollLoops = false;
if (TM)
TM->adjustPassManager(Builder);
//Builder.populateFunctionPassManager(FPM);
Builder.populateModulePassManager(MPM);
}
By the way, initialisation is following:
InitializeAllTargets();
InitializeAllTargetMCs();
InitializeAllAsmPrinters();
Unfortunately, it does not work.
Did you forget to populate the pass manager?
PassManagerBase& PM = ...; // create the pass manager.
PassManagerBuilder PMBuilder;
PMBuilder.OptLevel = 2;
PMBuilder.DisableUnrollLoops = false;
PMBuilder.Inliner = createFunctionInliningPass(50);
PMBuilder.populateModulePassManager(PM);
Module& = ...; // your IR module here
PM.run(M);
Note that a "FunctionPassManager" may not do what you need. You're likely looking for legacy::PassManager instead (which can hold any type of pass).

LoadInst and StoreInst Values and addresses LLVM

I have a file print.c, which has two functions:
void printLoad(...) {
// print address and value of memory location from which value
printf("address=... value=...", ...);
}
void printStore(...) {
// print address and value of memory location from which value
}
I have an LLVM pass which iterates over the instructions and adds CallInst instruction either printLoad or printStore (depending on the instruction type) after the current one (load/store inst).
In order to call this printStore or printLoad I need to add appropriate arguments to CallInst::Create function, which are the address and the value of the memory location.
This is an example of what I want to achieve:
define void #mains() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 0, i32* %1, align 4
store i32 5, i32* %1, align 4
store i32 2, i32* %2, align 4
store i32 4, i32* %2, align 4
%3 = load i32, i32* %2, align 4
%4 = add nsw i32 %3, 5
store i32 %4, i32* %1, align 4
ret void
}
The output should be:
store instruction:
address=... // address of %1
value=0
...
...
...
load instruction:
address=... // address of %2
value=4
store instruction:
address=... // address of %1
value=9
Progress so far:
I am able to get the addresses of the operands using getPointerOperand() on LoadInst/StoreInst.
I can also get the value of StoreInst in the first 4 store instructions by casting the operand to ConstantInt, but I don't know how to extract the value in the last StoreInst. Is it even possible?
EDITED:
Using
void printLoad(int32_t p)
and
Constant *hookLoadFunc = M.getOrInsertFunction("printLoad", Type::getVoidTy(M.getContext()), Type::getInt32Ty(M.getContext()));
.
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
store i32 0, i32* %1, align 4
call void #printStore(i32 0)
store i32 0, i32* %2, align 4
call void #printStore(i32 0)
store i32 5, i32* %2, align 4
call void #printStore(i32 5)
store i32 2, i32* %3, align 4
call void #printStore(i32 2)
store i32 4, i32* %3, align 4
call void #printStore(i32 4)
%4 = load i32, i32* %3, align 4
%5 = add nsw i32 %4, 5
store i32 %5, i32* %2, align 4
call void #printStore(i32 %5)
ret i32 0
%2 = alloca i32, align 4
store i32 %0, i32* %2, align 4
call void #printStore(i32 %0)
%3 = load i32, i32* %2, align 4
%4 = call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* #.str, i32 0, i32 0), i32 %3)
ret void
%2 = alloca i32, align 4
store i32 %0, i32* %2, align 4
call void #printStore(i32 %0)
%3 = load i32, i32* %2, align 4
%4 = call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* #.str.1, i32 0, i32 0), i32 %3)
ret void
This causes Segmentation fault: 11 when run.
SOLVED:
Figured out that I had infinity loop (due to recursion). printStore actually uses load/store instructions, thus creating another call to printStore and so on.
Assuming that you have an llvm::Function that represents printLoad() and printStore():
llvm::Function * print_load = ....
llvm::Function * print_store = ...
You can emit a CallInst for each LoadInst and StoreInst.
For LoadInst:
LoadInst * some_load = ...
Value * address_of_load = some_load->getOperand(0);
Value * print_load_arguments[] = { address_of_load, some_load };
// Insert a CallInst just after the load.
CallInst::Create(print_load, print_load_arguments )->insertAfter( some_load );
Remember that in llvm the value loaded by the LoadInst is the same thing as the LoadInst itself.
For StoreInst:
StoreInst * some_store = ...
Value * value_to_store = some_store->getOperand(0);
Value * address_of_store = some_store->getOperand(1);
Value * print_store_arguments[] = { address_of_store, value_to_store };
// Insert a CallInst just after the store.
CallInst::Create(print_store, print_store_arguments)->insertAfter(some_store);
This will work if all the types match. Otherwise, you have to insert BitCast instructions just before calling printStore() or printLoad().

How Clang generates code for function parameters?

In a function , I want to know how the parameters is passed into the function body, so that to track the flow of the parameters. I tried a simple code, and find there seems to be an alloc-store pattern for every parameter, I wonder whether it is true or not?
A demo code is
int add(int x, int y){
return x+y;
}
The llvm ir it generated is:
; Function Attrs: nounwind uwtable
define i32 #add(i32 %x, i32 %y) #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 %x, i32* %1, align 4
store i32 %y, i32* %2, align 4
%3 = load i32, i32* %1, align 4
%4 = load i32, i32* %2, align 4
%5 = add nsw i32 %3, %4
ret i32 %5
}
In the example we can see that,
For every parameters, the Clang use a alloc instruction to define a
local variable
Following the alloc instruction, store instructions is used to
assign values?
My questions are:
Is all the function LLVM IR are generated in this alloc and store patterns? Or what exactly LLVM do with the parameters?
The order of parameters is determined by the convention it used?
I think this pattern holds for code that has no compile-time optimizations; however, if you instead compile the code with -O3 (or anything that applies the mem2reg optimization), this pattern is optimized out:
(clang -emit-llvm -S -O0 add.c)
define i32 #add(i32 %x, i32 %y) #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 %x, i32* %1, align 4
store i32 %y, i32* %2, align 4
%3 = load i32, i32* %1, align 4
%4 = load i32, i32* %2, align 4
%5 = add nsw i32 %3, %4
ret i32 %5
}
(opt -mem2reg add.ll -o add_m.ll)
define i32 #add(i32 %x, i32 %y) #0 {
%1 = add nsw i32 %y, %x
ret i32 %1
}
So if you are controlling all of the code that you are analyzing, then you can rely on this pattern. I would instead recommend that you use the LLVM APIs to get the function arguments. The following code iterates through the arguments to a function F and prints them after casting to values.
for (auto AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI)
{
Value* v = &*AI;
errs() << *v << "\n";
}
The values in the above sample are usable in the same way as any other value in the IR.