I'm breaking my mind on my problem with a micro compiler wrote with the llvm and OCaml binding.
During the code generation function of my program, I have the error error: Invalid record from llc when I'm trying to compile a program that has some function with void type. However, it works well with all function with int type (my language support the only int).
This is a program that with LLC I have error: Invalid record
; ModuleID = 'MicrocC-module'
source_filename = "MicrocC-module"
declare i32 #print(i32)
declare i32 #getint()
define void #printem(i32 %0, i32 %1, i32 %2, i32 %3) {
entry:
%a = alloca i32
store i32 %0, i32* %a
%b = alloca i32
store i32 %1, i32* %b
%c = alloca i32
store i32 %2, i32* %c
%d = alloca i32
store i32 %3, i32* %d
%acc_var = load i32, i32* %a
%print = call i32 #print(i32 %acc_var)
%acc_var1 = load i32, i32* %b
%print2 = call i32 #print(i32 %acc_var1)
%acc_var3 = load i32, i32* %c
%print4 = call i32 #print(i32 %acc_var3)
%acc_var5 = load i32, i32* %d
%print6 = call i32 #print(i32 %acc_var5)
ret void
}
define i32 #main() {
entry:
%printem = call void #printem(i32 42, i32 17, i32 192, i32 8)
ret i32 0
}
and this is the program with all int functions, and it works without error
; ModuleID = 'MicrocC-module'
source_filename = "MicrocC-module"
declare i32 #print(i32)
declare i32 #getint()
define i32 #add(i32 %0, i32 %1) {
entry:
%a = alloca i32
store i32 %0, i32* %a
%b = alloca i32
store i32 %1, i32* %b
%acc_var = load i32, i32* %a
%acc_var1 = load i32, i32* %b
%tmp = add i32 %acc_var, %acc_var1
ret i32 %tmp
}
define i32 #main() {
entry:
%a = alloca i32
%add = call i32 #add(i32 39, i32 3)
store i32 %add, i32* %a
%acc_var = load i32, i32* %a
%print = call i32 #print(i32 %acc_var)
ret i32 0
}
My LLVM version is the 10
I found an answer and the solution is really stupid, but I want to add an answer because some new people on LLVM can have the same problem.
as in all language does not make sense store a result on a void function, in fact, in the main of my function there is the following code
%add = call i32 #add(i32 39, i32 3)
When a function is a Void the system call in OCaml, and I think also in C++ needs an empty string as a name function (in C++ I think it is null).
So the correct call is without the name %add.
In addition, I don't know why during the Ocaml build I received the error: Invalid record error, but I download the last version of LLVM from github and run directly the llvm compiler (llc), It has given me a very well description of the error, such as
./llc: error: ./llc: /media/vincent/VincentHDD/SandBoxDev/test_llvm.ll:35:3: error: instructions returning void cannot have a name
%printem = call void #printem.1(i32 42, i32 17, i32 192, i32 8)
^
Related
My Problem
I am new to LLVM and C++.
I am currently creating an LLVM backend compiler and need to replace LLVM function calls with the instructions in its definition.
Is there already an existing pass that accomplishes this?
Examples
For example, I have the following C code, compiled to LLVM IR with clang-14 -S -emit-llvm.
int add(int a, int b) {
return a + b;
}
int main() {
int a = 10;
int b = 20;
int c = add(a, b);
return c;
}
Then, I get a LLVM IR code below.
define dso_local i32 #add(i32 noundef %a, i32 noundef %b) #0 {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 %b, i32* %b.addr, align 4
%0 = load i32, i32* %a.addr, align 4
%1 = load i32, i32* %b.addr, align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #main() #0 {
entry:
%retval = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 10, i32* %a, align 4
store i32 20, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%call = call i32 #add(i32 noundef %0, i32 noundef %1)
store i32 %call, i32* %c, align 4
%2 = load i32, i32* %c, align 4
ret i32 %2
}
I want to replace the function call #add with instructions in it's definition from the code above using opt command, and emit the following new code.
define dso_local i32 #main() #0 {
entry:
%retval = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 10, i32* %a, align 4
store i32 20, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%add = add nsw i32 %0, %1
store i32 %add, i32* %c, align 4
%2 = load i32, i32* %c, align 4
ret i32 %2
}
I searched the following sites for such a path, but could not find one suitable.
https://llvm.org/docs/Passes.html#loops-natural-loop-information
I am new to llvm framework and was able to run a basic pass to iterate over instructions in a simple IR function with only entry basic block, but to expand upon that I got an .ll file from clang for a simple c function ( don't mind the correctness of the function I don't care about it for the sake of learning llvm at least for now ).
// fact.c
int fact(int n){
int t =1;
for(int i = 2;i<=n;i++){
t = t*i;
}
return t;
}
I was able to get a fact.ll file for this function, given below, and there are 3 functions in the fact.ll which I hand coded into the IR. foo , add and bar. And I attempt to run a simple pass which will iterate over each BasicBlock and gather it's inst opcodes and simply print them at the end, My issue is the opt tool is able to do it for foo, add and bar functions but not for the fact function.
Pass file :
#include "llvm/Transforms/Utils/MyHello.h"
#include <string>
using namespace llvm;
PreservedAnalyses MyHelloPass::run(Function &F,FunctionAnalysisManager &AM) {
std::string output;
errs()<<F.getName()<<"\n";
for(Function::iterator BB = F.begin();BB!=F.end();BB++){
for(BasicBlock::iterator I = BB->begin();I!=BB->end();I++){
output+=(I->getOpcodeName());
output+='\n';
}
}
errs()<<output<<'\n';
return PreservedAnalyses::all();
}
fact.ll
; ModuleID = 'fact.c'
source_filename = "fact.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #fact(i32 noundef %n) #0 {
entry:
%n.addr = alloca i32, align 4
%t = alloca i32, align 4
%i = alloca i32, align 4
store i32 %n, i32* %n.addr, align 4
store i32 1, i32* %t, align 4
store i32 2, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %i, align 4
%1 = load i32, i32* %n.addr, align 4
%cmp = icmp sle i32 %0, %1
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%2 = load i32, i32* %t, align 4
%3 = load i32, i32* %i, align 4
%mul = mul nsw i32 %2, %3
store i32 %mul, i32* %t, align 4
br label %for.inc
for.inc: ; preds = %for.body
%4 = load i32, i32* %i, align 4
%inc = add nsw i32 %4, 1
store i32 %inc, i32* %i, align 4
br label %for.cond, !llvm.loop !6
for.end: ; preds = %for.cond
%5 = load i32, i32* %t, align 4
ret i32 %5
}
define i32 #foo(){
%a = add i32 2,3
ret i32 %a
}
define i32 #add(i32 %a,i32 %b){
%c = add i32 %a,%b
%d = add i32 %c,%c
%e = sub i32 %c, %d
%f = mul i32 %d, %e
ret i32 %f
}
define void #bar(){
ret void
}
attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"AMD\C2\A0\C2\A0-DCLANG_REPOSITORY_STRING=CLANG: clang version 15.0.0 (CLANG: Jenkins CPUPC_Mirror_To_Staging_Merge-Build#892) (based on LLVM Mirror.Version.14.0.0)"}
!6 = distinct !{!6, !7}
!7 = !{!"llvm.loop.mustprogress"}
run command : opt -disable-output fact.ll -passes="myhello"
Ouput :
foo
add
ret
add
add
add
sub
mul
ret
bar
ret
See the optnone in:
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #fact(i32 noundef %n) #0 {
That means that this function is opting out of optimizations, hence your pass will not be run on that function.
You can manually remove the optnone from the definition of #0 at the bottom (note: the ; Function Attrs: ... line is merely a comment, changing it has no effect) or you can build your LLVM IR with "clang -O2". You may want to also add -mllvm -disable-llvm-optzns if you want clang to produce IR that could be optimized but hasn't been run through LLVM passes.
I'm working with the LLVM IR in Ocaml to build a toy language and, now my problem is to convert the variable into the reference to this variable.
In other words, my simple program is this
int main(){
int i;
i = 2;
int *p;
p = &i;
print(*p);
return 0;
}
and my problem is to get the pointer of the variable i in the instruction p = &i;, my actual
IR generated is
define i32 #main() {
entry:
%i = alloca i32
store i32 2, i32* %i
%p = alloca i32*
%0 = getelementptr i32, i32* %i, i32 0
store i32* %0, i32** %p
%1 = load i32*, i32** %p
%2 = load i32, i32* %1
call void #print(i32 %2)
ret i32 0
}
I don't like this line %0 = getelementptr i32, i32* %i, i32 0, and I think that I'm only lucky that my code work as expected.
To summarize my question is, What is the good practice to make this memory operation with a variable like C language? In particular, I need to to the following
i = 2;
int *p;
p = &i;
And also
int *p;
p = &i;
*p = *p + 2;
I'm missing something because when I try to compile code like that *p = *p + 2; I receive some core dump.
I noted also that clang for my first example doesn't use getelementptr, but generate some code like that
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32*, align 8
store i32 0, i32* %1, align 4
store i32 2, i32* %2, align 4
store i32* %2, i32** %3, align 8
%4 = load i32*, i32** %3, align 8
%5 = load i32, i32* %4, align 4
%6 = call i32 (i32, ...) bitcast (i32 (...)* #print to i32 (i32, ...)*)(i32 %5)
ret i32 0
}
In my grammar, the *p is a pointer and I convert it into llvm IR into an llvm pointer type.
In a function , I want to know how the parameters is passed into the function body, so that to track the flow of the parameters. I tried a simple code, and find there seems to be an alloc-store pattern for every parameter, I wonder whether it is true or not?
A demo code is
int add(int x, int y){
return x+y;
}
The llvm ir it generated is:
; Function Attrs: nounwind uwtable
define i32 #add(i32 %x, i32 %y) #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 %x, i32* %1, align 4
store i32 %y, i32* %2, align 4
%3 = load i32, i32* %1, align 4
%4 = load i32, i32* %2, align 4
%5 = add nsw i32 %3, %4
ret i32 %5
}
In the example we can see that,
For every parameters, the Clang use a alloc instruction to define a
local variable
Following the alloc instruction, store instructions is used to
assign values?
My questions are:
Is all the function LLVM IR are generated in this alloc and store patterns? Or what exactly LLVM do with the parameters?
The order of parameters is determined by the convention it used?
I think this pattern holds for code that has no compile-time optimizations; however, if you instead compile the code with -O3 (or anything that applies the mem2reg optimization), this pattern is optimized out:
(clang -emit-llvm -S -O0 add.c)
define i32 #add(i32 %x, i32 %y) #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 %x, i32* %1, align 4
store i32 %y, i32* %2, align 4
%3 = load i32, i32* %1, align 4
%4 = load i32, i32* %2, align 4
%5 = add nsw i32 %3, %4
ret i32 %5
}
(opt -mem2reg add.ll -o add_m.ll)
define i32 #add(i32 %x, i32 %y) #0 {
%1 = add nsw i32 %y, %x
ret i32 %1
}
So if you are controlling all of the code that you are analyzing, then you can rely on this pattern. I would instead recommend that you use the LLVM APIs to get the function arguments. The following code iterates through the arguments to a function F and prints them after casting to values.
for (auto AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI)
{
Value* v = &*AI;
errs() << *v << "\n";
}
The values in the above sample are usable in the same way as any other value in the IR.
I have a c code that calculates the factorial of an int "factorial.c". I compile it to llvm readable code "factorial.ll" and I modify in the compiled llvm code.
The objective is to execute the modified llvm code and to see its output, How can I do this?
It will depend on how your outputted LLVM is assembled and what libraries it links against, but for example executing the following factorial.ll with the shell command lli
$ lli factorial.ll
Factorial of 10 = 3628800
Will execute the main function with the JIT and use the standard printf to output the result to stdout.
#.str = private unnamed_addr constant [22 x i8] c"Factorial of %d = %d\0A\00", align 1
declare i32 #printf(i8*, ...)
define i32 #factorial(i32 %n) nounwind uwtable {
entry:
%n.addr = alloca i32, align 4
store i32 %n, i32* %n.addr, align 4
%0 = load i32* %n.addr, align 4
%cmp = icmp sle i32 %0, 1
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
br label %cond.end
cond.false: ; preds = %entry
%1 = load i32* %n.addr, align 4
%2 = load i32* %n.addr, align 4
%sub = sub nsw i32 %2, 1
%call = call i32 #factorial(i32 %sub)
%mul = mul nsw i32 %1, %call
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ 1, %cond.true ], [ %mul, %cond.false ]
ret i32 %cond
}
define i32 #main(i32 %argc, i8** %argv) nounwind uwtable {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
store i32 0, i32* %retval
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%call = call i32 #factorial(i32 10)
%call1 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([22 x i8]* #.str, i32 0, i32 0), i32 10, i32 %call)
ret i32 0
}