clang can't optimize away global variables used only in main()? - c++

If I plug this c++ program into clang (version 3.7)
///*
#include "stdio.h"
#include "stdint.h"
//extern int printf(const unsigned char*, ...);
extern "C" void __cxa_pure_virtual() { }
struct A
{
virtual void foo() = 0;
};
struct B : A
{
uint32_t x;
B(int x) : x(x) { }
virtual void foo()
{
printf("This is a test %d\n", x);
}
};
//*/
uint64_t thing = 0;
float other = 10.0f;
B b(12345);
int main()
{
thing++;
A* a = &b;
other *= 3.14159f;
a->foo();
}
And compile with clang -emit-llvm main.cpp -fno-rtti -O3 -S, then I get the following byte code:
; ModuleID = 'main.cpp'
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i686-pc-linux-gnu"
%struct.B = type { %struct.A, i32 }
%struct.A = type { i32 (...)** }
$_ZN1B3fooEv = comdat any
$_ZTV1B = comdat any
#thing = global i64 0, align 8
#other = global float 1.000000e+01, align 4
#b = global %struct.B { %struct.A { i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* #_ZTV1B, i64 0, i64 2) to i32 (...)**) }, i32 12345 }, align 4
#_ZTV1B = linkonce_odr unnamed_addr constant [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* #_ZN1B3fooEv to i8*)], comdat, align 4
#.str = private unnamed_addr constant [19 x i8] c"This is a test %d\0A\00", align 1
#llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
; Function Attrs: nounwind readnone
define void #__cxa_pure_virtual() #0 {
entry:
ret void
}
define i32 #main() #1 {
entry:
%0 = load i64, i64* #thing, align 8, !tbaa !1
%inc = add i64 %0, 1
store i64 %inc, i64* #thing, align 8, !tbaa !1
%1 = load float, float* #other, align 4, !tbaa !5
%mul = fmul float %1, 0x400921FA00000000
store float %mul, float* #other, align 4, !tbaa !5
%vtable = load void (%struct.A*)**, void (%struct.A*)*** bitcast (%struct.B* #b to void (%struct.A*)***), align 4, !tbaa !7
%2 = load void (%struct.A*)*, void (%struct.A*)** %vtable, align 4
tail call void %2(%struct.A* getelementptr inbounds (%struct.B, %struct.B* #b, i32 0, i32 0))
ret i32 0
}
; Function Attrs: nounwind
define linkonce_odr void #_ZN1B3fooEv(%struct.B* nocapture readonly %this) unnamed_addr #2 comdat align 2 {
entry:
%x = getelementptr inbounds %struct.B, %struct.B* %this, i32 0, i32 1
%0 = load i32, i32* %x, align 4, !tbaa !9
%call = tail call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* #.str, i32 0, i32 0), i32 %0)
ret void
}
; Function Attrs: nounwind
declare i32 #printf(i8* nocapture readonly, ...) #2
attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.7.1 "}
!1 = !{!2, !2, i64 0}
!2 = !{!"long long", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !6, i64 0}
!6 = !{!"float", !3, i64 0}
!7 = !{!8, !8, i64 0}
!8 = !{!"vtable pointer", !4, i64 0}
!9 = !{!10, !11, i64 4}
!10 = !{!"_ZTS1B", !11, i64 4}
!11 = !{!"int", !3, i64 0}
If you look at the main function, I have two variables that are useless. Sure I increment one and I do some multiplication on another, but I never use the values in them ever.
But if you look at the output of the byte code, it looks like it is still doing the useless math.
Is it just me or is this a bug?

Those variables are variables in global scope. The compiler simply couldn't figure out whether or not those variables could be declared and referenced in other translation units.
I'd be surprised if any modern C++ compiler is sophisticated enough to figure out that execution flow could not escape this translation unit, in a defined manner, and thus it would be safe to optimize away unused global variables in this translation unit.

No, I don't believe that this is a bug, as your variables are globals.
Clang cannot remove this math as it can't know that any externally called function (like the printf function, in a different translation unit) doesn't declare extern float other; and somehow uses it.
Try writing:
int main()
{
uint64_t thing = 0;
float other = 10.0f;
B b(12345);
thing++;
A* a = &b;
other *= 3.14159f;
a->foo();
}

Related

Cannot link custom generated LLVM IR with Clang generated IR

I've been trying to link IR generated with llvm's C++ api with a another IR file generated by Clang++. The input file to Clang is a function fn I'm trying to call from the first IR file. But llvm-link doesn't replace fn's declaration with its definition.
main_ir.ll
source_filename = "top"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
#0 = private unnamed_addr constant [5 x i8] c"%d \0A\00", align 1
declare i32 #printf(...)
declare i32 #fn(i32, ...)
define internal i32 #main() {
entrypoint:
%f_call = call i32 (i32, ...) #fn(i32 2)
%printfCall = call i32 (...) #printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* #0,
i32 0, i32 0), i32 %f_call)
br label %ProgramExit
ProgramExit: ; preds = %entrypoint
ret i32 0
}
fn_ir.ll (generated with Clang)
source_filename = "libDessin.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 #_Z2fni(i32) #0 {
%2 = alloca i32, align 4
store i32 %0, i32* %2, align 4
%3 = load i32, i32* %2, align 4
%4 = mul nsw i32 %3, 2
ret i32 %4
}
attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-
math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-
width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-
math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-
math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-
cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false"
"use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 9.0.1-12 "}
And all llvm-link does is reproduce the contents of fn_ir.ll with the source_filename changed to llvm-link. I'd be real happy to know the bit I'm missing.
The answer is in the name mangling.
Your 'manually' generated IR has a function named fn, while clang++ emits the name _Z2fni.
You need to make the names match. Either emit the _Z2fni in the main_ir.ll, or (arguable better in this case) change the definition of fn in the fn_ir, e.g.:
extern "C" void fn(int x) {
return x * 2;
}
extern "C" tells the compiler to use C mangling convention, this is less fragile since it will work even if you change type or number of arguments of fn. However, it won't work if you want to pass C++ types into the fn, then you need to emit the right function name for the main_ir.ll.
UPD:
There two more 'discrepancies':
The fn has different arguments in the two modules: i32 vs i32, ...
The other issue is that main declared as internal. I guess it is just stripped since it is internal and it is not being called by anyone.
So just removing the internal flag should do the job for you.

Identify annotated variable in an LLVM pass

How can I identify an annotated variable in an LLVM pass?
#include <stdio.h>
int main (){
int x __attribute__((annotate("my_var")))= 0;
int a,b;
x = x + 1;
a = 5;
b = 6;
x = x + a;
return x;
}
For example, I want to identify the instructions which have the annotated variable (x in this case) and print them out (x = x+1; and x = x+a)
How can I achieve this?
This is the .ll file generated using LLVM
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
#.str = private unnamed_addr constant [7 x i8] c"my_var\00", section "llvm.metadata"
#.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section "llvm.metadata"
; Function Attrs: noinline nounwind optnone
define i32 #main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
%4 = alloca i32, align 4
store i32 0, i32* %1, align 4
%5 = bitcast i32* %2 to i8*
call void #llvm.var.annotation(i8* %5, i8* getelementptr inbounds ([7 x i8], [7 x i8]* #.s$
store i32 0, i32* %2, align 4
%6 = load i32, i32* %2, align 4
%7 = add nsw i32 %6, 1
store i32 %7, i32* %2, align 4
store i32 5, i32* %3, align 4
store i32 6, i32* %4, align 4
%8 = load i32, i32* %2, align 4
%9 = load i32, i32* %3, align 4
%10 = add nsw i32 %8, %9
store i32 %10, i32* %2, align 4
%11 = load i32, i32* %2, align 4
ret i32 %11
}
; Function Attrs: nounwind
declare void #llvm.var.annotation(i8*, i8*, i8*, i32) #1
attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" $
attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
I recently encountered similiary problem, as I searched Google still not found a solution.
But in the end , I found "ollvm" project's Utils.cpp ,it solved my problem.
In your case,
%5 = bitcast i32* %2 to i8*
call void #llvm.var.annotation(i8* %5, i8* getelementptr inbounds ([7 x i8], [7 x i8]* #.s$
as we can see there is a call to #llvm.var.annotation , in our pass ,
we can loop through instructions over a function , and search for "call" instruction.
Then get the called function's name:
Function *fn = callInst->getCalledFunction();
StringRef fn_name = fn->getName();
and compare the called function's name with "llvm.var.annotation" .
If they match ,then we found the location of "int x " in your case .
The function "llvm.var.annotation" is documented in llvm's doc :
http://llvm.org/docs/LangRef.html#llvm-var-annotation-intrinsic
If you have learn the function "llvm.var.annotation"'s prototype,
then you know that it's second argument is a pointer ,the pointer
points to "my_var\00" in your case . If you thought you can simply
convert it to a GlobalVariable ,then you will failed to get what
you wanted . The actual second argument passed to "llvm.var.annotation"
is
i8* getelementptr inbounds ([7 x i8], [7 x i8]* #.s$
in your case.
It's a expression but a GlobalVariable !!! By knowing this , we can
finally get the annotation of our target variable by :
ConstantExpr *ce =
cast<ConstantExpr>(callInst->getOperand(1));
if (ce) {
if (ce->getOpcode() == Instruction::GetElementPtr) {
if (GlobalVariable *annoteStr =
dyn_cast<GlobalVariable>(ce->getOperand(0))) {
if (ConstantDataSequential *data =
dyn_cast<ConstantDataSequential>(
annoteStr->getInitializer())) {
if (data->isString()) {
errs() << "Found data " << data->getAsString();
}
}
}
}
Hope you already solved the problem .
Have a nice day .
You have to loop on instructions and identify calls to llvm.var.annotation
First argument is a pointer to the annotated variable (i8*).
To get the actual annotated variable, you then need to find what this pointer points to.
In your case, this is the source operand of the bitcast instruction.

How to determine if a function parameter is annotated?

I'm annotating function parameters as shown below with the label bar.
int foo (char* s __attribute__((annotate("bar")))) {
...
}
Next, I am running a function pass. How can I determine if a given function argument is annotated with the label bar?
You will have to read the llvm.var.annotation and llvm.dbg.declare intrinsics.
More specifically, here is the llvm-ir generated by your code above:
#.str = private unnamed_addr constant [4 x i8] c"bar\00", section "llvm.metadata"
#.str.1 = private unnamed_addr constant [75 x i8] c"/tmp/compiler-explorer-compiler117030-12962-1rhu4lb.ojfaiz4cxr/example.cpp\00", section "llvm.metadata"
; Function Attrs: nounwind uwtable
define i32 #foo(char*)(i8*) #0 !dbg !6 {
%2 = alloca i8*, align 8
store i8* %0, i8** %2, align 8
call void #llvm.dbg.declare(metadata i8** %2, metadata !12, metadata !13), !dbg !14
%3 = bitcast i8** %2 to i8*
call void #llvm.var.annotation(i8* %3, i8* getelementptr inbounds ([4 x i8], [4 x i8]* #.str, i32 0, i32 0), i8* getelementptr inbounds ([75 x i8], [75 x i8]* #.str.1, i32 0, i32 0), i32 1)
ret i32 0, !dbg !15
}
!6 = distinct !DISubprogram(name: "foo", linkageName: "foo(char*)", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !10}
!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64, align: 64)
!11 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!12 = !DILocalVariable(name: "s", arg: 1, scope: !6, file: !1, line: 1, type: !10)
The dbg.declare instruction tells you that %2 is actually the first parameter of the function (named s).
%3 is a bitcast of %2, so basically an alias.
And the the llvm.var.annotation instruction tells you that %2 is annotated with the constant string #str, which value is "bar".

Need insights about writing a pass

For my source code, I have the following IR:
; ModuleID = '<stdin>'
#.str = private unnamed_addr constant [9 x i8] c"SOME_ENV_VAR\00", align 1
#.str1 = private unnamed_addr constant [26 x i8] c"Need to set $ENV_Variable.\0A\00", align 1
; Function Attrs: nounwind
define void #foo(i8* %bar) #0 {
entry:
%bar.addr = alloca i8*, align 4
%baz = alloca i8*, align 4
store i8* %bar, i8** %bar.addr, align 4
%call = call i8* #getenv(i8* getelementptr inbounds ([9 x i8]* #.str, i32 0, i32 0)) #2
store i8* %call, i8** %baz, align 4
%0 = load i8** %baz, align 4
%cmp = icmp eq i8* %0, null
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
%call1 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([26 x i8]* #.str1, i32 0, i32 0))
br label %if.end
if.else: ; preds = %entry
%1 = load i8** %bar.addr, align 4
%2 = load i8** %baz, align 4
%call2 = call i8* #strcpy(i8* %1, i8* %2) #2
br label %if.end
if.end: ; preds = %if.else, %if.then
ret void
}
; Function Attrs: nounwind
declare i8* #getenv(i8*) #0
declare i32 #printf(i8*, ...) #1
; Function Attrs: nounwind
declare i8* #strcpy(i8*, i8*) #0
I intend to write a pass, which when compiled (using LLVM), produces bitcode where the call to strcpy(dest,src) is replaced with strncpy(dest,src,n).
I've written the following code so far:
#include <stdlib.h>
#include <stdio.h>
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/IR/IRBuilder.h"
using namespace llvm;
namespace
{
Module* makeLLVMModule() {
Module* mod = new Module(llvm::StringRef("CustomPass"),getGlobalContext());
Constant* c = mod->getOrInsertFunction(llvm::StringRef("foo"),Type::getInt32Ty(getGlobalContext()),NULL);
Function* foo = cast<Function>(c);
Function::arg_iterator args =foo->arg_begin();
Value* bar = args++;
BasicBlock* Entry = BasicBlock::Create(getGlobalContext(),llvm::Twine("Entry"), foo);
BasicBlock* False = BasicBlock::Create(getGlobalContext(),llvm::Twine("False"), foo);
BasicBlock* True = BasicBlock::Create(getGlobalContext(),llvm::Twine("True"), foo);
char* pPath;
pPath = getenv("SOME_ENV_VAR");
IRBuilder<> builder(Entry);
Value* envVarDoesntExist = builder.CreateICmpEQ(llvm::StringRef(pPath),Constant::getNullValue(Value),llvm::Twine("temp"));
//---1
builder.CreateCondBr(envVarDoesntExist, False, True);
builder.SetInsertPoint(True);
builder.CreateCall3(strncpy,bar,llvm::StringRef(pPath),45,llvm::Twine("temp"));
//---2
builder.SetInsertPoint(False);
builder.CreateCall(printf,llvm::StringRef("Need to set $ENV_Variable.\n"),llvm::Twine("temp"));
//---1
return mod;
}
}
char funcP::ID = 0;
static RegisterPass<funcP> X("funcp", "funcP", false, false);
From ---1:How to convert llvm::StringRef to Value* ?
From ---2:How to convert char* to Value*
Could Constant::getNullValue(Value) be used for getting a NULL value?
I intend to write a pass, which when compiled (using LLVM), produces bitcode where the call to strcpy(dest,src) is replaced with strncpy(dest,src,n).
Then what you need to do is to locate the call instruction and change it. There's no need to recreate the entire flow, it's already in your source code.
All you need to do is to create a function pass, iterate over all the instructions in the function, and if the instruction is a call instruction and the callee's name is strcpy then create a new call instruction to your new function, then replace the old instruction with the new instruction.
Also there seems to be some fundamental misunderstanding in your code between values in the compiler (such as 45 and all the StringRefs) and values in the code you are processing (instances of one of the subtypes of llvm::Value). Specifically, you can't just use 45 as a parameter to a function in the code you are processing - you have to create a constant int from that number, and then you can use that constant.
One final note - you can implicitly construct a StringRef from a const char*, you don't need to explicitly call the StringRef's constructor all over the place. Same with Twine.

How to create an array of classes types?

I have a single class "Base", and a few tens of classes derived from Base. I would like to have a method that creates me the right class by an index. Like this:
class Base
{
};
class A : public Base
{
}
class B : public Base
{
}
class C : public Base
{
}
Type array = { A, B, C };
and then I could do new array[i];
How could this be achieved with C++(0x)? Usually I would use an the Abstract Factory Pattern. But since I have a LOT of derived classes, this would really slow down the program.
Since the derived classes will be used only once I also taught to use this:
Base *array = { new A, new B, new C };
But this would lead to huge memory consumption, not counting that not every class will always be used.
Any suggestion?
You cannot use an array of classes, but you can use an array of pointers to functions.
typedef std::unique_ptr<Base> (*Creator)();
template <typename T>
std::unique_ptr<Base> make() { return new T{}; }
Creator const array[] = { make<A>, make<B>, make<C> };
int main() {
std::unique_ptr<Base> b = array[1]();
b->foo();
}
For those worried by the cost of creating so many template functions, here is an example:
#include <stdio.h>
struct Base { virtual void foo() const = 0; };
struct A: Base { void foo() const { printf("A"); } };
struct B: Base { void foo() const { printf("B"); } };
struct C: Base { void foo() const { printf("C"); } };
typedef Base* (*Creator)();
template <typename T>
static Base* make() { return new T{}; }
static Creator const array[] = { make<A>, make<B>, make<C> };
Base* select_array(int i) {
return array[i]();
}
Base* select_switch(int i) {
switch(i) {
case 0: return make<A>();
case 1: return make<B>();
case 2: return make<C>();
default: return 0;
}
}
LLVM/Clang generates the following output:
define %struct.Base* #select_array(int)(i32 %i) uwtable {
%1 = sext i32 %i to i64
%2 = getelementptr inbounds [3 x %struct.Base* ()*]* #array, i64 0, i64 %1
%3 = load %struct.Base* ()** %2, align 8, !tbaa !0
%4 = tail call %struct.Base* %3()
ret %struct.Base* %4
}
define noalias %struct.Base* #select_switch(int)(i32 %i) uwtable {
switch i32 %i, label %13 [
i32 0, label %1
i32 1, label %5
i32 2, label %9
]
; <label>:1 ; preds = %0
%2 = tail call noalias i8* #operator new(unsigned long)(i64 8)
%3 = bitcast i8* %2 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* #vtable for A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %3, align 8
%4 = bitcast i8* %2 to %struct.Base*
br label %13
; <label>:5 ; preds = %0
%6 = tail call noalias i8* #operator new(unsigned long)(i64 8)
%7 = bitcast i8* %6 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* #vtable for B, i64 0, i64 2) to i32 (...)**), i32 (...)*** %7, align 8
%8 = bitcast i8* %6 to %struct.Base*
br label %13
; <label>:9 ; preds = %0
%10 = tail call noalias i8* #operator new(unsigned long)(i64 8)
%11 = bitcast i8* %10 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* #vtable for C, i64 0, i64 2) to i32 (...)**), i32 (...)*** %11, align 8
%12 = bitcast i8* %10 to %struct.Base*
br label %13
; <label>:13 ; preds = %9, %5, %1, %0
%.0 = phi %struct.Base* [ %12, %9 ], [ %8, %5 ], [ %4, %1 ], [ null, %0 ]
ret %struct.Base* %.0
}
Unfortunately, it is not quite intelligent enough to automatically inline the functions with a regular array code (known issue with the LLVM optimizer, I don't know if gcc does better)... but using switch it is indeed possible.
typedef Base* BaseMaker();
template <class X> Base* make() {
return new X;
}
BaseMaker* makers[] = { make<A>, make<B>, make<C> };
Base* b = makers[2]();