Why I and j are not aliases - llvm

here is the code:
int main() {
int i = 0;
int &j = i;
j = 10;
return i;
}
and its ir with O0 level optimization:
; Function Attrs: noinline norecurse nounwind optnone uwtable mustprogress
define dso_local i32 #main() #0 !dbg !7 {
entry:
%retval = alloca i32, align 4
%i = alloca i32, align 4
%j = alloca i32*, align 8
store i32 0, i32* %retval, align 4
call void #llvm.dbg.declare(metadata i32* %i, metadata !11, metadata !DIExpression()), !dbg !12
store i32 0, i32* %i, align 4, !dbg !12
call void #llvm.dbg.declare(metadata i32** %j, metadata !13, metadata !DIExpression()), !dbg !15
store i32* %i, i32** %j, align 8, !dbg !15
%0 = load i32*, i32** %j, align 8, !dbg !16
store i32 10, i32* %0, align 4, !dbg !17
%1 = load i32, i32* %i, align 4, !dbg !18
ret i32 %1, !dbg !19
}
and use command:
opt -disable-basic-aa --cfl-steens-aa -aa-eval print-all-alias-modref-info e0.ll
get result:
NoAlias: i32* %i, i32* %retval
NoAlias: i32* %retval, i32** %j
NoAlias: i32* %i, i32** %j
NoAlias: i32* %0, i32* %retval
MayAlias: i32* %0, i32* %i
NoAlias: i32* %0, i32** %j
Does anyone know why the result is 'MayAlias'.

Related

How do I eliminate LLVM function calls and replace them with basic instructions?

My Problem
I am new to LLVM and C++.
I am currently creating an LLVM backend compiler and need to replace LLVM function calls with the instructions in its definition.
Is there already an existing pass that accomplishes this?
Examples
For example, I have the following C code, compiled to LLVM IR with clang-14 -S -emit-llvm.
int add(int a, int b) {
return a + b;
}
int main() {
int a = 10;
int b = 20;
int c = add(a, b);
return c;
}
Then, I get a LLVM IR code below.
define dso_local i32 #add(i32 noundef %a, i32 noundef %b) #0 {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 %b, i32* %b.addr, align 4
%0 = load i32, i32* %a.addr, align 4
%1 = load i32, i32* %b.addr, align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #main() #0 {
entry:
%retval = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 10, i32* %a, align 4
store i32 20, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%call = call i32 #add(i32 noundef %0, i32 noundef %1)
store i32 %call, i32* %c, align 4
%2 = load i32, i32* %c, align 4
ret i32 %2
}
I want to replace the function call #add with instructions in it's definition from the code above using opt command, and emit the following new code.
define dso_local i32 #main() #0 {
entry:
%retval = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 10, i32* %a, align 4
store i32 20, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%add = add nsw i32 %0, %1
store i32 %add, i32* %c, align 4
%2 = load i32, i32* %c, align 4
ret i32 %2
}
I searched the following sites for such a path, but could not find one suitable.
https://llvm.org/docs/Passes.html#loops-natural-loop-information

LLVM: Instruction does not dominate all uses - No control flow

I implemented a function pass which iterates over basic block instructions and tracks all instructions that have a type of IntegerTy.
Here is the snippet of the pass that does it:
if (!I->isTerminator()){
Type::TypeID datatype = I->getType()->getTypeID();
if (datatype == llvm::Type::IntegerTyID) {
IRBuilder<> IRB(I);
Value* v_value = IRB.CreateZExt(I, IRB.getInt64Ty());
Value *args[] = {v_value};
IRB.CreateCall(NNT_log_int, args);
}
}
However the IRB.CreateZExt(I, IRB.getInt64Ty()); command seems to create a Instruction does not dominate all uses! problem.
I understand the nature of the issue (here and here there are similar problems).
My point of confusion that I apply this pass to a toy program with no if statements or any other control flow statements, yet I still encounter this problem.
The error message:
Instruction does not dominate all uses!
%2 = load i32, i32* %y, align 4
%1 = zext i32 %2 to i64
Instruction does not dominate all uses!
%4 = load i32, i32* %y, align 4
%3 = zext i32 %4 to i64
Note the fact that the inserted zext instructions name a constant with a counter number less than the previous instruction - I think this is the problem but I have no idea why my pass does this!!!
Here is the IR of my toy program before the application of the pass:
; Function Attrs: noinline nounwind optnone uwtable
define i32 #_Z3fooi(i32 %x) #4 {
entry:
%x.addr = alloca i32, align 4
%y = alloca i32, align 4
%z = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
store i32 0, i32* %y, align 4
%0 = load i32, i32* %x.addr, align 4
%add = add nsw i32 %0, 3
store i32 %add, i32* %y, align 4
%1 = load i32, i32* %y, align 4
store i32 %1, i32* %x.addr, align 4
%2 = load i32, i32* %y, align 4
ret i32 %2
}
; Function Attrs: noinline nounwind optnone uwtable
define i32 #_Z3bari(i32 %panos) #4 {
entry:
%panos.addr = alloca i32, align 4
%y = alloca i32, align 4
store i32 %panos, i32* %panos.addr, align 4
%0 = load i32, i32* %panos.addr, align 4
%add = add nsw i32 %0, 2
store i32 %add, i32* %y, align 4
%1 = load i32, i32* %y, align 4
ret i32 %1
}
Also, note that that the problematic instructions are before a terminator - Again I think that this is related.
Any ideas will be highly appreciated !
Your zext instruction uses I, but you're inserting it before I. When you create the IRBuilder, you should pass in the instruction after I as the insert point. For example like this:
IRBuilder<> IRB(I->getNextNode());

Why is this block of LLVM instructions generated?

The DataFlowSanitizer pass on LLVM 3.8.0, 64 bit (Ubuntu 16.04.2) generates the following IR from source:
The source:
test.c
#include <sanitizer/dfsan_interface.h>
int main(void) {
int i = 1;
dfsan_label i_label = dfsan_create_label("i", 0);
dfsan_set_label(i_label, &i, sizeof(i));
return 0;
}
The commands to generate the IR:
clang -c -emit-llvm -fsanitize=dataflow test.c -o test.bc
llvm-dis test.bc
The disassembly:
test.ll
; Function Attrs: nounwind uwtable
define i32 #main() #0 {
entry:
%0 = alloca i16
%retval = alloca i32, align 4
%i = alloca i32, align 4
%1 = alloca i16
%i_label = alloca i16, align 2
store i16 0, i16* %0
store i32 0, i32* %retval, align 4
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%2 = ptrtoint i32* %i to i64
%3 = and i64 %2, -123145302310913
%4 = mul i64 %3, 2
%5 = inttoptr i64 %4 to i16*
%6 = bitcast i16* %5 to i64*
store i64 0, i64* %6, align 2
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
store i32 1, i32* %i, align 4
%call = call zeroext i16 #dfsan_create_label(i8* getelementptr inbounds ([2 x i8], [2 x i8]* #.str, i32 0, i32 0), i8* null)
store i16 0, i16* %1
store i16 %call, i16* %i_label, align 2
%7 = load i16, i16* %1
%8 = load i16, i16* %i_label, align 2
%9 = bitcast i32* %i to i8*
call void #dfsan_set_label(i16 zeroext %8, i8* %9, i64 4)
ret i32 0
}
I don't understand why the block of instruction I separated out is being generated. Looking at the Transform/Instrumentation/DataFlowsanitizer.cpp, I can't find the code that inserts the instrumentation above. Can anyone explain this behavior?

How to get better results from LLVM's MemoryDependenceAnalysis pass?

I am trying to use the results of LLVMs built-in MemoryDependenceAnalysis (MDA) in a custom LLVM pass that I'm working on. Given some instruction which reads from memory (a load, for example), i'd like MDA to tell me all of the previous instructions which may have Defined or Clobbered it. If my understanding of the MDA documentation serves me correctly, MDA should be able to give me this info. However, I'm having a hard time getting the precision that I need out of it. Here's the relevant snippet of a simple test program i have been toying around with:
%1 = alloca i32, align 4
%result = alloca i32, align 4
%x = alloca i32, align 4
%xp = alloca i32*, align 8
store i32 0, i32* %1
store i32 5, i32* %result, align 4, !dbg !14
store i32 7, i32* %x, align 4, !dbg !16
store i32* %x, i32** %xp, align 8, !dbg !19
%2 = load i32* %x, align 4, !dbg !20
%3 = icmp eq i32 %2, 4, !dbg !20
br i1 %3, label %4, label %7, !dbg !22, !dataware.bbuid !23
; <label>:4 ; preds = %0
%5 = load i32** %xp, align 8, !dbg !24
%6 = load i32* %5, align 4, !dbg !26
store i32 %6, i32* %result, align 4, !dbg !27
br label %8, !dbg !28, !dataware.bbuid !29
; <label>:7 ; preds = %0
store i32 42, i32* %result, align 4, !dbg !30
br label %8, !dataware.bbuid !32
The command im using to run the analysis:
opt-3.6 -enable-tbaa -tbaa -basicaa -libcall-aa -scev-aa -globalsmodref-aa -domtree -memdep -print-memdeps -gvn -analyze test.bc
For some instructions, I am getting results as expected. For example, It tells me that %5 = load i32** %xp is dependent on store i32* %x, i32** %xp. However, it doesn't determine that %6 = load i32* %5 is dependent on store i32 7, i32* %x. Here's a snippet of the output:
Def in block %0 from: store i32* %x, i32** %xp, align 8, !dbg !19
%5 = load i32** %xp, align 8, !dbg !24
Unknown in block %4
%6 = load i32* %5, align 4, !dbg !26
The latter case (which it apparently doesn't know how to reason about) seems like it should be easy enough to detect, even with sub-optimal alias analysis. How do I go about investigating why the results of MDA are sub-optimal? And do you have suggestions for getting more precise results? Are there some additional analysis passes I can add to cause MDA to work better? Im using opt 3.6.0 - maybe MDA has been improved since this release?
Thanks.
I got the same results as you obtained. You might use a simpler pointer analysis result to infer that %5 and %x mayalias which in turn infer that the store to %x is potentially loaded from %5.
opt -basicaa -aa-eval -print-all-alias-modref-info test.bc -disable-output
MayAlias: i32* %5, i32* %x

How to execute llvm code

I have a c code that calculates the factorial of an int "factorial.c". I compile it to llvm readable code "factorial.ll" and I modify in the compiled llvm code.
The objective is to execute the modified llvm code and to see its output, How can I do this?
It will depend on how your outputted LLVM is assembled and what libraries it links against, but for example executing the following factorial.ll with the shell command lli
$ lli factorial.ll
Factorial of 10 = 3628800
Will execute the main function with the JIT and use the standard printf to output the result to stdout.
#.str = private unnamed_addr constant [22 x i8] c"Factorial of %d = %d\0A\00", align 1
declare i32 #printf(i8*, ...)
define i32 #factorial(i32 %n) nounwind uwtable {
entry:
%n.addr = alloca i32, align 4
store i32 %n, i32* %n.addr, align 4
%0 = load i32* %n.addr, align 4
%cmp = icmp sle i32 %0, 1
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
br label %cond.end
cond.false: ; preds = %entry
%1 = load i32* %n.addr, align 4
%2 = load i32* %n.addr, align 4
%sub = sub nsw i32 %2, 1
%call = call i32 #factorial(i32 %sub)
%mul = mul nsw i32 %1, %call
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ 1, %cond.true ], [ %mul, %cond.false ]
ret i32 %cond
}
define i32 #main(i32 %argc, i8** %argv) nounwind uwtable {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
store i32 0, i32* %retval
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%call = call i32 #factorial(i32 10)
%call1 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([22 x i8]* #.str, i32 0, i32 0), i32 10, i32 %call)
ret i32 0
}