Cannot link custom generated LLVM IR with Clang generated IR - llvm

I've been trying to link IR generated with llvm's C++ api with a another IR file generated by Clang++. The input file to Clang is a function fn I'm trying to call from the first IR file. But llvm-link doesn't replace fn's declaration with its definition.
main_ir.ll
source_filename = "top"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
#0 = private unnamed_addr constant [5 x i8] c"%d \0A\00", align 1
declare i32 #printf(...)
declare i32 #fn(i32, ...)
define internal i32 #main() {
entrypoint:
%f_call = call i32 (i32, ...) #fn(i32 2)
%printfCall = call i32 (...) #printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* #0,
i32 0, i32 0), i32 %f_call)
br label %ProgramExit
ProgramExit: ; preds = %entrypoint
ret i32 0
}
fn_ir.ll (generated with Clang)
source_filename = "libDessin.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #_Z2fni(i32) #0 {
%2 = alloca i32, align 4
store i32 %0, i32* %2, align 4
%3 = load i32, i32* %2, align 4
%4 = mul nsw i32 %3, 2
ret i32 %4
}
attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-
math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-
width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-
math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-
math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-
cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false"
"use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 9.0.1-12 "}
And all llvm-link does is reproduce the contents of fn_ir.ll with the source_filename changed to llvm-link. I'd be real happy to know the bit I'm missing.

The answer is in the name mangling.
Your 'manually' generated IR has a function named fn, while clang++ emits the name _Z2fni.
You need to make the names match. Either emit the _Z2fni in the main_ir.ll, or (arguable better in this case) change the definition of fn in the fn_ir, e.g.:
extern "C" void fn(int x) {
return x * 2;
}
extern "C" tells the compiler to use C mangling convention, this is less fragile since it will work even if you change type or number of arguments of fn. However, it won't work if you want to pass C++ types into the fn, then you need to emit the right function name for the main_ir.ll.
UPD:
There two more 'discrepancies':
The fn has different arguments in the two modules: i32 vs i32, ...
The other issue is that main declared as internal. I guess it is just stripped since it is internal and it is not being called by anyone.
So just removing the internal flag should do the job for you.

Related

Why is Clang automatically adding attributes to my functions?

I have a piece of code that I'm trying to turn into LLVM bitcode:
int main() {
volatile double n = 0.45;
for (int j = 0; j < 32; j++) {
n *= j;
}
return 0;
}
I run the following command on it:
clang -O0 -S -emit-llvm TrainingCode/trainingCode.cpp -o TrainingCode/trainingCode.ll
to generate the following LLVM bitcode (take note of the 6th line, the one with "Function Attrs"):
; ModuleID = 'TrainingCode/trainingCode.cpp'
source_filename = "TrainingCode/trainingCode.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline norecurse nounwind optnone uwtable
define i32 #main() #0 {
entry:
%retval = alloca i32, align 4
%n = alloca double, align 8
%j = alloca i32, align 4
store i32 0, i32* %retval, align 4
store double 4.500000e-01, double* %n, align 8
store i32 0, i32* %j, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %j, align 4
%cmp = icmp slt i32 %0, 32
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%1 = load i32, i32* %j, align 4
%conv = sitofp i32 %1 to double
%2 = load double, double* %n, align 8
%mul = fmul double %2, %conv
store double %mul, double* %n, align 8
br label %for.inc
for.inc: ; preds = %for.body
%3 = load i32, i32* %j, align 4
%inc = add nsw i32 %3, 1
store i32 %inc, i32* %j, align 4
br label %for.cond
for.end: ; preds = %for.cond
ret i32 0
}
attributes #0 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (tags/RELEASE_500/final)"}
Why is clang adding the optnone attribute to main? I need LLVM to run various transformation passes on the bitcode and the optnone attribute is causing LLVM to skip over main... I need this attribute to not be added.
Compiling with -O1 seems to fix this, however this is unacceptable because I need Clang to give me unoptimized code. I want LLVM to optimize the unoptimized code given to me by Clang, but the presence of the optnone attribute is causing LLVM to not perform any optimizations.
There are clang options to disable its optimization of the LLVM-IR. See https://reviews.llvm.org/D28047 for discussion of a patch that would change them, and #Anton's answer to this question for more about using it. Some or all of these options might be the right thing:
clang -O1 -mllvm -disable-llvm-optzns -disable-llvm-passes
(The resolution of that discussion was commit rL290392: Make '-disable-llvm-optzns' an alias for '-disable-llvm-passes', so current clang only needs one.)
Or there's the dumb way: A simple workaround is possible with sed (or your favourite text-processing tool).
You're only using this on compiler-generated code, so you don't have to worry about using regexes to parse free-form hand-written code. Thus you can match fixed formatting that the compiler always uses to make sure you operate only on the correct lines.
# tested and works on the .ll in your question
sed -i '/^; Function Attrs:\|^attributes #/ s/optnone //' *.ll
Replaces the first occurence of optnone (with a trailing space) with the empty string, on lines that start with attributes # (the one that matters) or with ; Function Attrs: (the comment).
It's an s/// command controlled by a /foo\|bar/ address regex to select which lines it operates on.
sed -i rewrites the input file(s) in-place.
This is expected. The -O0 output is not intended for further optimizations, some bits of IR are not emitted at all in order to reduce the compilation time.
So, you'd need to use -O1 -mllvm -disable-llvm-optzns if you want to get unoptimized IR that may be optimized later.
Sorry not a solution, but maybe a clue.
But be clang version released or environment variables.
Taking your code on OS X with XCODE 9:
$ clang -O0 -S -emit-llvm test.cpp -o test2.ll
$ more test2.ll
; ModuleID = 'test.cpp'
source_filename = "test.cpp"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
; Function Attrs: norecurse nounwind ssp uwtable
define i32 #main() #0 {
%1 = alloca i32, align 4
%2 = alloca double, align 8
...
; <label>:15: ; preds = %4
ret i32 0
}
attributes #0 = { norecurse nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"Apple LLVM version 8.1.0 (clang-802.0.42)"}

clang-4.0 generates redundant methods when initializing global variables

I'm learning LLVM these days via observing how clang deal with complex situations. I wrote (top level, not in a function):
int qaq = 666;
int tat = 233;
auto hh = qaq + tat;
And I use the command:
clang-4.0 003.cpp -emit-llvm -S -std=c++11
And clang generates codes like this:
#qaq = global i32 666, align 4
#tat = global i32 233, align 4
#hh = global i32 0, align 4
#llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* #_GLOBAL__sub_I_003.cpp, i8* null }]
; Function Attrs: noinline uwtable
define internal void #__cxx_global_var_init() #0 section ".text.startup" {
%1 = load i32, i32* #qaq, align 4
%2 = load i32, i32* #tat, align 4
%3 = add nsw i32 %1, %2
store i32 %3, i32* #hh, align 4
ret void
}
; Function Attrs: noinline uwtable
define internal void #_GLOBAL__sub_I_003.cpp() #0 section ".text.startup" {
call void #__cxx_global_var_init()
ret void
}
I'm confused with _GLOBAL__sub_I_003.cpp: why does clang generate a function that actually only invoke another function (and not doing anything else)? Even both of them have no parameters?
Disclaimer: This is my interpretation of the logic, I'm not part of the LLVM team.
In order to understand the reasoning behind this, you have to understand a fundamental concept in software engineering: Complexity creates bugs, and makes testing harder.
But first, let's make your example a little more interesting:
int qaq = 666;
int tat = 233;
auto hh = qaq + tat;
auto ii = qaq - tat;
Which leads to:
; Function Attrs: noinline uwtable
define internal void #__cxx_global_var_init() #0 section ".text.startup" !dbg !16 {
%1 = load i32, i32* #qaq, align 4, !dbg !19
%2 = load i32, i32* #tat, align 4, !dbg !20
%3 = add nsw i32 %1, %2, !dbg !21
store i32 %3, i32* #hh, align 4, !dbg !21
ret void, !dbg !20
}
; Function Attrs: noinline uwtable
define internal void #__cxx_global_var_init.1() #0 section ".text.startup" !dbg !22 {
%1 = load i32, i32* #qaq, align 4, !dbg !23
%2 = load i32, i32* #tat, align 4, !dbg !24
%3 = sub nsw i32 %1, %2, !dbg !25
store i32 %3, i32* #ii, align 4, !dbg !25
ret void, !dbg !24
}
; Function Attrs: noinline uwtable
define internal void #_GLOBAL__sub_I_example.cpp() #0 section ".text.startup" !dbg !26 {
call void #__cxx_global_var_init(), !dbg !28
call void #__cxx_global_var_init.1(), !dbg !29
ret void
}
So we see that CLANG emits a single function for each non-trivial initialization, and calls each of them one after the other in _GLOBAL__sub_I_example.cpp(). That makes sense and is sensible, as things are neatly organized this way, and could become a garbled mess in larger/more complicated files otherwise.
Notice how that's the exact same logic that is being applied in your example.
Doing otherwise would imply an algorithm of the type: "if there is a single non-trivial global initialization, then put the code directly in the translation unit's global constructor".
Note the following:
The current logic handles that case correctly already.
In optimized code, the end result would be the exact same.
So what would that logic get us, really?
More branches to test.
More opportunities to accidentaly insert a bug.
More code to maintain in the long run.
Removal of a single function call in the global initialization of some translation units in non-optimized builds.
Keeping things the way they are is just the right decision.

clang can't optimize away global variables used only in main()?

If I plug this c++ program into clang (version 3.7)
///*
#include "stdio.h"
#include "stdint.h"
//extern int printf(const unsigned char*, ...);
extern "C" void __cxa_pure_virtual() { }
struct A
{
virtual void foo() = 0;
};
struct B : A
{
uint32_t x;
B(int x) : x(x) { }
virtual void foo()
{
printf("This is a test %d\n", x);
}
};
//*/
uint64_t thing = 0;
float other = 10.0f;
B b(12345);
int main()
{
thing++;
A* a = &b;
other *= 3.14159f;
a->foo();
}
And compile with clang -emit-llvm main.cpp -fno-rtti -O3 -S, then I get the following byte code:
; ModuleID = 'main.cpp'
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i686-pc-linux-gnu"
%struct.B = type { %struct.A, i32 }
%struct.A = type { i32 (...)** }
$_ZN1B3fooEv = comdat any
$_ZTV1B = comdat any
#thing = global i64 0, align 8
#other = global float 1.000000e+01, align 4
#b = global %struct.B { %struct.A { i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* #_ZTV1B, i64 0, i64 2) to i32 (...)**) }, i32 12345 }, align 4
#_ZTV1B = linkonce_odr unnamed_addr constant [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* #_ZN1B3fooEv to i8*)], comdat, align 4
#.str = private unnamed_addr constant [19 x i8] c"This is a test %d\0A\00", align 1
#llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
; Function Attrs: nounwind readnone
define void #__cxa_pure_virtual() #0 {
entry:
ret void
}
define i32 #main() #1 {
entry:
%0 = load i64, i64* #thing, align 8, !tbaa !1
%inc = add i64 %0, 1
store i64 %inc, i64* #thing, align 8, !tbaa !1
%1 = load float, float* #other, align 4, !tbaa !5
%mul = fmul float %1, 0x400921FA00000000
store float %mul, float* #other, align 4, !tbaa !5
%vtable = load void (%struct.A*)**, void (%struct.A*)*** bitcast (%struct.B* #b to void (%struct.A*)***), align 4, !tbaa !7
%2 = load void (%struct.A*)*, void (%struct.A*)** %vtable, align 4
tail call void %2(%struct.A* getelementptr inbounds (%struct.B, %struct.B* #b, i32 0, i32 0))
ret i32 0
}
; Function Attrs: nounwind
define linkonce_odr void #_ZN1B3fooEv(%struct.B* nocapture readonly %this) unnamed_addr #2 comdat align 2 {
entry:
%x = getelementptr inbounds %struct.B, %struct.B* %this, i32 0, i32 1
%0 = load i32, i32* %x, align 4, !tbaa !9
%call = tail call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* #.str, i32 0, i32 0), i32 %0)
ret void
}
; Function Attrs: nounwind
declare i32 #printf(i8* nocapture readonly, ...) #2
attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.7.1 "}
!1 = !{!2, !2, i64 0}
!2 = !{!"long long", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !6, i64 0}
!6 = !{!"float", !3, i64 0}
!7 = !{!8, !8, i64 0}
!8 = !{!"vtable pointer", !4, i64 0}
!9 = !{!10, !11, i64 4}
!10 = !{!"_ZTS1B", !11, i64 4}
!11 = !{!"int", !3, i64 0}
If you look at the main function, I have two variables that are useless. Sure I increment one and I do some multiplication on another, but I never use the values in them ever.
But if you look at the output of the byte code, it looks like it is still doing the useless math.
Is it just me or is this a bug?
Those variables are variables in global scope. The compiler simply couldn't figure out whether or not those variables could be declared and referenced in other translation units.
I'd be surprised if any modern C++ compiler is sophisticated enough to figure out that execution flow could not escape this translation unit, in a defined manner, and thus it would be safe to optimize away unused global variables in this translation unit.
No, I don't believe that this is a bug, as your variables are globals.
Clang cannot remove this math as it can't know that any externally called function (like the printf function, in a different translation unit) doesn't declare extern float other; and somehow uses it.
Try writing:
int main()
{
uint64_t thing = 0;
float other = 10.0f;
B b(12345);
thing++;
A* a = &b;
other *= 3.14159f;
a->foo();
}

Need insights about writing a pass

For my source code, I have the following IR:
; ModuleID = '<stdin>'
#.str = private unnamed_addr constant [9 x i8] c"SOME_ENV_VAR\00", align 1
#.str1 = private unnamed_addr constant [26 x i8] c"Need to set $ENV_Variable.\0A\00", align 1
; Function Attrs: nounwind
define void #foo(i8* %bar) #0 {
entry:
%bar.addr = alloca i8*, align 4
%baz = alloca i8*, align 4
store i8* %bar, i8** %bar.addr, align 4
%call = call i8* #getenv(i8* getelementptr inbounds ([9 x i8]* #.str, i32 0, i32 0)) #2
store i8* %call, i8** %baz, align 4
%0 = load i8** %baz, align 4
%cmp = icmp eq i8* %0, null
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
%call1 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([26 x i8]* #.str1, i32 0, i32 0))
br label %if.end
if.else: ; preds = %entry
%1 = load i8** %bar.addr, align 4
%2 = load i8** %baz, align 4
%call2 = call i8* #strcpy(i8* %1, i8* %2) #2
br label %if.end
if.end: ; preds = %if.else, %if.then
ret void
}
; Function Attrs: nounwind
declare i8* #getenv(i8*) #0
declare i32 #printf(i8*, ...) #1
; Function Attrs: nounwind
declare i8* #strcpy(i8*, i8*) #0
I intend to write a pass, which when compiled (using LLVM), produces bitcode where the call to strcpy(dest,src) is replaced with strncpy(dest,src,n).
I've written the following code so far:
#include <stdlib.h>
#include <stdio.h>
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/IR/IRBuilder.h"
using namespace llvm;
namespace
{
Module* makeLLVMModule() {
Module* mod = new Module(llvm::StringRef("CustomPass"),getGlobalContext());
Constant* c = mod->getOrInsertFunction(llvm::StringRef("foo"),Type::getInt32Ty(getGlobalContext()),NULL);
Function* foo = cast<Function>(c);
Function::arg_iterator args =foo->arg_begin();
Value* bar = args++;
BasicBlock* Entry = BasicBlock::Create(getGlobalContext(),llvm::Twine("Entry"), foo);
BasicBlock* False = BasicBlock::Create(getGlobalContext(),llvm::Twine("False"), foo);
BasicBlock* True = BasicBlock::Create(getGlobalContext(),llvm::Twine("True"), foo);
char* pPath;
pPath = getenv("SOME_ENV_VAR");
IRBuilder<> builder(Entry);
Value* envVarDoesntExist = builder.CreateICmpEQ(llvm::StringRef(pPath),Constant::getNullValue(Value),llvm::Twine("temp"));
//---1
builder.CreateCondBr(envVarDoesntExist, False, True);
builder.SetInsertPoint(True);
builder.CreateCall3(strncpy,bar,llvm::StringRef(pPath),45,llvm::Twine("temp"));
//---2
builder.SetInsertPoint(False);
builder.CreateCall(printf,llvm::StringRef("Need to set $ENV_Variable.\n"),llvm::Twine("temp"));
//---1
return mod;
}
}
char funcP::ID = 0;
static RegisterPass<funcP> X("funcp", "funcP", false, false);
From ---1:How to convert llvm::StringRef to Value* ?
From ---2:How to convert char* to Value*
Could Constant::getNullValue(Value) be used for getting a NULL value?
I intend to write a pass, which when compiled (using LLVM), produces bitcode where the call to strcpy(dest,src) is replaced with strncpy(dest,src,n).
Then what you need to do is to locate the call instruction and change it. There's no need to recreate the entire flow, it's already in your source code.
All you need to do is to create a function pass, iterate over all the instructions in the function, and if the instruction is a call instruction and the callee's name is strcpy then create a new call instruction to your new function, then replace the old instruction with the new instruction.
Also there seems to be some fundamental misunderstanding in your code between values in the compiler (such as 45 and all the StringRefs) and values in the code you are processing (instances of one of the subtypes of llvm::Value). Specifically, you can't just use 45 as a parameter to a function in the code you are processing - you have to create a constant int from that number, and then you can use that constant.
One final note - you can implicitly construct a StringRef from a const char*, you don't need to explicitly call the StringRef's constructor all over the place. Same with Twine.

LLVM mark function as const and remove duplicate calls

I have an external (C) function that I am calling in my LLVM IR. The IR gets JITed and everything works fine, but the generated code is performance sensitive, and I want to remove duplicate calls to my external function if possible. The function has no side effects. Is there a FunctionPass that eliminates redundant calls to the function? Is there something I have to do to mark the function as having no side effects?
Thanks!
According to http://llvm.org/docs/LangRef.html#function-attributes you can specify the attributes readonly or readnone for a function:
declare i32 #fn(i32 %i);
declare i32 #readonly_fn(i32 %i) readonly;
declare i32 #readnone_fn(i32 %i) readnone;
readonly means that the function doesn't write memory,
readnone means that it doesn't even read memory (for example sin() could be readnone)
If a function doesn't write memory, it should return the result only based on the parameters, and therefor be a pure function (if the global state doesn't change). In case of a readnone function, even the global state could change.
The llvm optimizer can optimize calls to readonly and readnone functions with the EarlyCSE pass (common subexpression elimination), as shown in the following example:
using the following test functions
define i32 #test_no_readonly()
{
%1 = call i32 #fn(i32 0)
%2 = call i32 #fn(i32 0)
%add = add i32 %1, %2
ret i32 %add
}
define i32 #test_readonly()
{
%1 = call i32 #readonly_fn(i32 0)
%2 = call i32 #readonly_fn(i32 0)
%add = add i32 %1, %2
ret i32 %add
}
define i32 #test_readnone()
{
%1 = call i32 #readnone_fn(i32 0)
%2 = call i32 #readnone_fn(i32 0)
%add = add i32 %1, %2
ret i32 %add
}
and running opt -early-cse -S readonly_fn.ll > readonly_fn_opt.ll optimizes away the second call for the readonly and readnone functions, resulting in
define i32 #test_no_readonly() {
%1 = call i32 #fn(i32 0)
%2 = call i32 #fn(i32 0)
%add = add i32 %1, %2
ret i32 %add
}
define i32 #test_readonly() {
%1 = call i32 #readonly_fn(i32 0)
%add = add i32 %1, %1
ret i32 %add
}
define i32 #test_readnone() {
%1 = call i32 #readnone_fn(i32 0)
%add = add i32 %1, %1
ret i32 %add
}
The readonly_fn and readnone_fn functions are only called once, thus eleminating redundand calls.
The -functionattrs pass can also add these attributes to defined functions