To test LLVM's functionality, I wrote the following simple program.
#include <stdio.h>
int main()
{
printf( "Hello World!\n" );
return 0;
}
And then compiled it to LLVM IR by typing clang -S -emit-llvm main.c -o main.ll. The generated code in main.ll was the following.
; ModuleID = 'main.c'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
#.str = private unnamed_addr constant [14 x i8] c"Hello World!\0A\00"
define i32 #main() nounwind {
%1 = alloca i32, align 4
store i32 0, i32* %1
%2 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([14 x i8]* #.str, i32 0, i32 0))
ret i32 0
}
declare i32 #printf(i8*, ...)
Then when I tried to compile the IR code (in main.ll) to native executable binary, by typing llc main.ll -o main.s && gcc main.s -o main, I got the following error.
llc: main.ll:5:17: error: expected 'global' or 'constant'
#.str = private unnamed_addr constant [14 x i8] c"Hello World!\0A\00"
However, If I remove unnamed_addr from main.ll, it does get compiled. So my question is what is wrong with unnamed_addr. Why it is not compiling with it? Is this maybe because I'm using incompatible versions of clang and llvm?
The unnamed_addr attribute was introduced in LLVM 2.9.
Could it be that your clang is from 2.9 or newer, while your llc is from 2.8 or older?
Related
I've been trying to link IR generated with llvm's C++ api with a another IR file generated by Clang++. The input file to Clang is a function fn I'm trying to call from the first IR file. But llvm-link doesn't replace fn's declaration with its definition.
main_ir.ll
source_filename = "top"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
#0 = private unnamed_addr constant [5 x i8] c"%d \0A\00", align 1
declare i32 #printf(...)
declare i32 #fn(i32, ...)
define internal i32 #main() {
entrypoint:
%f_call = call i32 (i32, ...) #fn(i32 2)
%printfCall = call i32 (...) #printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* #0,
i32 0, i32 0), i32 %f_call)
br label %ProgramExit
ProgramExit: ; preds = %entrypoint
ret i32 0
}
fn_ir.ll (generated with Clang)
source_filename = "libDessin.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #_Z2fni(i32) #0 {
%2 = alloca i32, align 4
store i32 %0, i32* %2, align 4
%3 = load i32, i32* %2, align 4
%4 = mul nsw i32 %3, 2
ret i32 %4
}
attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-
math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-
width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-
math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-
math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-
cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false"
"use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 9.0.1-12 "}
And all llvm-link does is reproduce the contents of fn_ir.ll with the source_filename changed to llvm-link. I'd be real happy to know the bit I'm missing.
The answer is in the name mangling.
Your 'manually' generated IR has a function named fn, while clang++ emits the name _Z2fni.
You need to make the names match. Either emit the _Z2fni in the main_ir.ll, or (arguable better in this case) change the definition of fn in the fn_ir, e.g.:
extern "C" void fn(int x) {
return x * 2;
}
extern "C" tells the compiler to use C mangling convention, this is less fragile since it will work even if you change type or number of arguments of fn. However, it won't work if you want to pass C++ types into the fn, then you need to emit the right function name for the main_ir.ll.
UPD:
There two more 'discrepancies':
The fn has different arguments in the two modules: i32 vs i32, ...
The other issue is that main declared as internal. I guess it is just stripped since it is internal and it is not being called by anyone.
So just removing the internal flag should do the job for you.
I have a piece of code that I'm trying to turn into LLVM bitcode:
int main() {
volatile double n = 0.45;
for (int j = 0; j < 32; j++) {
n *= j;
}
return 0;
}
I run the following command on it:
clang -O0 -S -emit-llvm TrainingCode/trainingCode.cpp -o TrainingCode/trainingCode.ll
to generate the following LLVM bitcode (take note of the 6th line, the one with "Function Attrs"):
; ModuleID = 'TrainingCode/trainingCode.cpp'
source_filename = "TrainingCode/trainingCode.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline norecurse nounwind optnone uwtable
define i32 #main() #0 {
entry:
%retval = alloca i32, align 4
%n = alloca double, align 8
%j = alloca i32, align 4
store i32 0, i32* %retval, align 4
store double 4.500000e-01, double* %n, align 8
store i32 0, i32* %j, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %j, align 4
%cmp = icmp slt i32 %0, 32
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%1 = load i32, i32* %j, align 4
%conv = sitofp i32 %1 to double
%2 = load double, double* %n, align 8
%mul = fmul double %2, %conv
store double %mul, double* %n, align 8
br label %for.inc
for.inc: ; preds = %for.body
%3 = load i32, i32* %j, align 4
%inc = add nsw i32 %3, 1
store i32 %inc, i32* %j, align 4
br label %for.cond
for.end: ; preds = %for.cond
ret i32 0
}
attributes #0 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (tags/RELEASE_500/final)"}
Why is clang adding the optnone attribute to main? I need LLVM to run various transformation passes on the bitcode and the optnone attribute is causing LLVM to skip over main... I need this attribute to not be added.
Compiling with -O1 seems to fix this, however this is unacceptable because I need Clang to give me unoptimized code. I want LLVM to optimize the unoptimized code given to me by Clang, but the presence of the optnone attribute is causing LLVM to not perform any optimizations.
There are clang options to disable its optimization of the LLVM-IR. See https://reviews.llvm.org/D28047 for discussion of a patch that would change them, and #Anton's answer to this question for more about using it. Some or all of these options might be the right thing:
clang -O1 -mllvm -disable-llvm-optzns -disable-llvm-passes
(The resolution of that discussion was commit rL290392: Make '-disable-llvm-optzns' an alias for '-disable-llvm-passes', so current clang only needs one.)
Or there's the dumb way: A simple workaround is possible with sed (or your favourite text-processing tool).
You're only using this on compiler-generated code, so you don't have to worry about using regexes to parse free-form hand-written code. Thus you can match fixed formatting that the compiler always uses to make sure you operate only on the correct lines.
# tested and works on the .ll in your question
sed -i '/^; Function Attrs:\|^attributes #/ s/optnone //' *.ll
Replaces the first occurence of optnone (with a trailing space) with the empty string, on lines that start with attributes # (the one that matters) or with ; Function Attrs: (the comment).
It's an s/// command controlled by a /foo\|bar/ address regex to select which lines it operates on.
sed -i rewrites the input file(s) in-place.
This is expected. The -O0 output is not intended for further optimizations, some bits of IR are not emitted at all in order to reduce the compilation time.
So, you'd need to use -O1 -mllvm -disable-llvm-optzns if you want to get unoptimized IR that may be optimized later.
Sorry not a solution, but maybe a clue.
But be clang version released or environment variables.
Taking your code on OS X with XCODE 9:
$ clang -O0 -S -emit-llvm test.cpp -o test2.ll
$ more test2.ll
; ModuleID = 'test.cpp'
source_filename = "test.cpp"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
; Function Attrs: norecurse nounwind ssp uwtable
define i32 #main() #0 {
%1 = alloca i32, align 4
%2 = alloca double, align 8
...
; <label>:15: ; preds = %4
ret i32 0
}
attributes #0 = { norecurse nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"Apple LLVM version 8.1.0 (clang-802.0.42)"}
I have 3 files, first file is a library code mylib.c, second is myProgram1.c and third is myProgram2.c. I need a combined llvm bitcode file. So I do the following things.
clang mylib.c -S -emit-llvm -o mylib.ll
clang myProgram1.c -S -emit-llvm -o myProgram1.ll
clang myProgram2.c -S -emit-llvm -o myProgram2.ll
llvm-link mylib.ll myProgram1.ll myProgram2.ll -o final.ll -S
But the problem is I have main function in both myProgram1.c and myProgram2.c. So it is unable to link saying that there is duplicate symbol.
So is there any way I can link all the 3 files through llvm-link?
I have an idea to use llvm-extract that maybe work.
Suppose we have following two source code.
$ cat prog1.c
#include <stdlib.h>
#include <stdio.h>
void f1() {
printf("f1\n");
}
int main() {
f1();
return 0;
}
$ cat prog2.c
#include <stdio.h>
extern void f1();
void f2() {
printf("f2\n");
}
int main() {
f1();
f2();
return 0;
}
First we compile it with clang.
$ clang -c -emit-llvm prog2.c
$ clang -c -emit-llvm prog1.c
Then list the symbol defined in bytecode with llvm-nm.
$ llvm-nm prog1.bc |grep -v main
---------------- T f1
U printf
And then use llvm-extract to extract everything exclude main.
$ llvm-extract -func=f1 prog1.bc -o prog1_f1.bc
At last, link with llvm-link.
$ llvm-link prog2.bc prog1_f1.bc -o link.bc
$ llvm-dis link.bc -o -
It seems the bytecode is valid.
; ModuleID = 'link.bc'
source_filename = "llvm-link"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
#.str.4 = private unnamed_addr constant [4 x i8] c"f2\0A\00", align 1
#.str = external hidden unnamed_addr constant [4 x i8], align 1
; Function Attrs: noinline nounwind uwtable
define void #f2() #0 {
entry:
%call = call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* #.str.4, i32 0, i32 0))
ret void
}
declare i32 #printf(i8*, ...) #1
; Function Attrs: noinline nounwind uwtable
define i32 #main() #0 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
call void (...) bitcast (void ()* #f1 to void (...)*)()
call void #f2()
ret i32 0
}
; Function Attrs: noinline nounwind uwtable
define void #f1() #0 {
entry:
%call = call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* #.str, i32 0, i32 0))
ret void
}
attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0, !0}
!0 = !{!"clang version 4.0.0 (tags/RELEASE_400/final)"}
How did you managed to pass through expected top-level entity error while executing lli in the llvm framework?
This error usually means that you copy-pasted part of some IR code which doesn't count as a top level entity. In other words, it's not a function, not a type, not a global variable, etc. The same error can happen in C, just for comparison:
x = 8;
Is not valid contents for a C file, because the assignment statement isn't a valid top level entity. To make it valid you put it in a function:
void foo() {
x = 8; /* assuming x is global and visible here */
}
The same error happens in LLVM IR.
My Issue: The .ll file format was "UTF-8 with BOM" instead of "UTF-8 without BOM".
Fix: With notepad++, in the encoding menu, select the "UTF-8 without BOM", then save.
Quick setup: (For llvm 3.4.0 .ll files on windows)
advanced text editor from https://notepad-plus-plus.org/
llvm binaries from https://github.com/CRogers/LLVM-Windows-Binaries
hello.ll as "UTF-8 without BOM" (This code is in llvm 3.4.0 format):
#msg = internal constant [13 x i8] c"Hello World!\00"
declare i32 #puts(i8*)
define i32 #main() {
call i32 #puts(i8* getelementptr inbounds ([13 x i8]* #msg, i32 0, i32 0))
ret i32 0
}
In command prompt:
lli hello.ll
Quick setup: (For llvm 3.8.0 .ll files on windows)
advanced text editor from https://notepad-plus-plus.org/
clang binaries from: http://llvm.org/releases/download.html#3.8.0
hello.ll as "UTF-8 without BOM" (This code is in llvm 3.8.0 format):
#msg = internal constant [13 x i8] c"Hello World!\00"
declare i32 #puts(i8*)
define i32 #main() {
call i32 #puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* #msg, i32 0, i32 0))
ret i32 0
}
In command prompt:
clang hello.ll -o hello.exe
hello.exe
Errors about char16_t, u16String, etc means clang needs: -fms-compatibility-version=19
I'm using LLVM to convert a user-defined language into bytecode, and I'm not sure to understand how should be used a module.
At the beginning, I thought it was something like the C/C++ object files (to avoid bytecode recompilation of every files when a single file is edited). However, I have found this line into LLVMpy documentation, which seems to say that it is not the case :
Inter-module reference is not possible. That is module A cannot call a function in module B, directly.
Can someone explain why are modules separated from the contexts if we can't have multiple modules for a single context ?
It is possible, but like the .o files you mention, they must first be linked together into a single binary.
Given a pair of bitcode files:
$ llvm-dis a.bc -o -
; ModuleID = 'a.bc'
#0 = global [13 x i8] c"Hello world!\0A"
declare i32 #printf(i8*)
define void #f() {
%1 = call i32 #printf(i8* getelementptr inbounds ([13 x i8]* #0, i64 0, i64 0))
ret void
}
$ llvm-dis b.bc -o -
; ModuleID = 'b.bc'
declare void #f()
define i32 #main() {
call void #f()
ret i32 0
}
This won't work:
$ lli b.bc
LLVM ERROR: Program used external function 'f' which could not be resolved!
But if you link them together, it will:
$ llvm-ld a.bc b.bc -disable-opt -o c
$ llvm-dis c.bc -o -
; ModuleID = 'c.bc'
#0 = global [13 x i8] c"Hello world!\0A"
declare i32 #printf(i8*)
define void #f() {
%1 = call i32 #printf(i8* getelementptr inbounds ([13 x i8]* #0, i64 0, i64 0))
ret void
}
define i32 #main() {
call void #f()
ret i32 0
}
$ lli c.bc
Hello world!