How to create an array of classes types? - c++

I have a single class "Base", and a few tens of classes derived from Base. I would like to have a method that creates me the right class by an index. Like this:
class Base
{
};
class A : public Base
{
}
class B : public Base
{
}
class C : public Base
{
}
Type array = { A, B, C };
and then I could do new array[i];
How could this be achieved with C++(0x)? Usually I would use an the Abstract Factory Pattern. But since I have a LOT of derived classes, this would really slow down the program.
Since the derived classes will be used only once I also taught to use this:
Base *array = { new A, new B, new C };
But this would lead to huge memory consumption, not counting that not every class will always be used.
Any suggestion?

You cannot use an array of classes, but you can use an array of pointers to functions.
typedef std::unique_ptr<Base> (*Creator)();
template <typename T>
std::unique_ptr<Base> make() { return new T{}; }
Creator const array[] = { make<A>, make<B>, make<C> };
int main() {
std::unique_ptr<Base> b = array[1]();
b->foo();
}
For those worried by the cost of creating so many template functions, here is an example:
#include <stdio.h>
struct Base { virtual void foo() const = 0; };
struct A: Base { void foo() const { printf("A"); } };
struct B: Base { void foo() const { printf("B"); } };
struct C: Base { void foo() const { printf("C"); } };
typedef Base* (*Creator)();
template <typename T>
static Base* make() { return new T{}; }
static Creator const array[] = { make<A>, make<B>, make<C> };
Base* select_array(int i) {
return array[i]();
}
Base* select_switch(int i) {
switch(i) {
case 0: return make<A>();
case 1: return make<B>();
case 2: return make<C>();
default: return 0;
}
}
LLVM/Clang generates the following output:
define %struct.Base* #select_array(int)(i32 %i) uwtable {
%1 = sext i32 %i to i64
%2 = getelementptr inbounds [3 x %struct.Base* ()*]* #array, i64 0, i64 %1
%3 = load %struct.Base* ()** %2, align 8, !tbaa !0
%4 = tail call %struct.Base* %3()
ret %struct.Base* %4
}
define noalias %struct.Base* #select_switch(int)(i32 %i) uwtable {
switch i32 %i, label %13 [
i32 0, label %1
i32 1, label %5
i32 2, label %9
]
; <label>:1 ; preds = %0
%2 = tail call noalias i8* #operator new(unsigned long)(i64 8)
%3 = bitcast i8* %2 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* #vtable for A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %3, align 8
%4 = bitcast i8* %2 to %struct.Base*
br label %13
; <label>:5 ; preds = %0
%6 = tail call noalias i8* #operator new(unsigned long)(i64 8)
%7 = bitcast i8* %6 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* #vtable for B, i64 0, i64 2) to i32 (...)**), i32 (...)*** %7, align 8
%8 = bitcast i8* %6 to %struct.Base*
br label %13
; <label>:9 ; preds = %0
%10 = tail call noalias i8* #operator new(unsigned long)(i64 8)
%11 = bitcast i8* %10 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* #vtable for C, i64 0, i64 2) to i32 (...)**), i32 (...)*** %11, align 8
%12 = bitcast i8* %10 to %struct.Base*
br label %13
; <label>:13 ; preds = %9, %5, %1, %0
%.0 = phi %struct.Base* [ %12, %9 ], [ %8, %5 ], [ %4, %1 ], [ null, %0 ]
ret %struct.Base* %.0
}
Unfortunately, it is not quite intelligent enough to automatically inline the functions with a regular array code (known issue with the LLVM optimizer, I don't know if gcc does better)... but using switch it is indeed possible.

typedef Base* BaseMaker();
template <class X> Base* make() {
return new X;
}
BaseMaker* makers[] = { make<A>, make<B>, make<C> };
Base* b = makers[2]();

Related

LLVM IR How to pass struct to function

I'm making my own c-like language and I'm trying to pass a struct to a function. The struct
is representing an array(one member is a pointer to the array and the other member is the length). If I call the function "test" like this: call void #test(%structintarray %a) I get error: '%a' defined with type '%structintarray*' but expected '%structintarray = type { i32*, i32 }' . But if I call "test" like this: call void #test(%structintarray* %a) I get error: '#test' defined with type 'void (%structintarray)*' but expected 'void (%structintarray*)*' I don't understand this second error.
What I'm I doing wrong here?
`
void test(int[] a) {
}
int main() {
int[] a = new int[5];
test(a);
return 0;
}
generates;
%structintarray = type { i32*, i32 }
define void #test(%structintarray %__p__a) {
entry: %a = alloca %structintarray, align 4
store %structintarray %__p__a , %structintarray* %a, align 4
ret void
}
define i32 #main() {
entry: %t0 = call noalias i8* #calloc(i32 5 , i32 4)
%t1 = bitcast i8* %t0 to i32*
%a = alloca %structintarray, align 4
%t2 = getelementptr %structintarray, %structintarray* %a, i32 0, i32 0
store i32* %t1 , i32** %t2, align 4 ; pointer to array
%t3 = getelementptr %structintarray, %structintarray* %a, i32 0, i32 1
store i32 5 , i32* %t3, align 4 ; size of array
call void #test(%structintarray %a)
ret i32 0
}

Identify annotated variable in an LLVM pass

How can I identify an annotated variable in an LLVM pass?
#include <stdio.h>
int main (){
int x __attribute__((annotate("my_var")))= 0;
int a,b;
x = x + 1;
a = 5;
b = 6;
x = x + a;
return x;
}
For example, I want to identify the instructions which have the annotated variable (x in this case) and print them out (x = x+1; and x = x+a)
How can I achieve this?
This is the .ll file generated using LLVM
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
#.str = private unnamed_addr constant [7 x i8] c"my_var\00", section "llvm.metadata"
#.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section "llvm.metadata"
; Function Attrs: noinline nounwind optnone
define i32 #main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
%4 = alloca i32, align 4
store i32 0, i32* %1, align 4
%5 = bitcast i32* %2 to i8*
call void #llvm.var.annotation(i8* %5, i8* getelementptr inbounds ([7 x i8], [7 x i8]* #.s$
store i32 0, i32* %2, align 4
%6 = load i32, i32* %2, align 4
%7 = add nsw i32 %6, 1
store i32 %7, i32* %2, align 4
store i32 5, i32* %3, align 4
store i32 6, i32* %4, align 4
%8 = load i32, i32* %2, align 4
%9 = load i32, i32* %3, align 4
%10 = add nsw i32 %8, %9
store i32 %10, i32* %2, align 4
%11 = load i32, i32* %2, align 4
ret i32 %11
}
; Function Attrs: nounwind
declare void #llvm.var.annotation(i8*, i8*, i8*, i32) #1
attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" $
attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
I recently encountered similiary problem, as I searched Google still not found a solution.
But in the end , I found "ollvm" project's Utils.cpp ,it solved my problem.
In your case,
%5 = bitcast i32* %2 to i8*
call void #llvm.var.annotation(i8* %5, i8* getelementptr inbounds ([7 x i8], [7 x i8]* #.s$
as we can see there is a call to #llvm.var.annotation , in our pass ,
we can loop through instructions over a function , and search for "call" instruction.
Then get the called function's name:
Function *fn = callInst->getCalledFunction();
StringRef fn_name = fn->getName();
and compare the called function's name with "llvm.var.annotation" .
If they match ,then we found the location of "int x " in your case .
The function "llvm.var.annotation" is documented in llvm's doc :
http://llvm.org/docs/LangRef.html#llvm-var-annotation-intrinsic
If you have learn the function "llvm.var.annotation"'s prototype,
then you know that it's second argument is a pointer ,the pointer
points to "my_var\00" in your case . If you thought you can simply
convert it to a GlobalVariable ,then you will failed to get what
you wanted . The actual second argument passed to "llvm.var.annotation"
is
i8* getelementptr inbounds ([7 x i8], [7 x i8]* #.s$
in your case.
It's a expression but a GlobalVariable !!! By knowing this , we can
finally get the annotation of our target variable by :
ConstantExpr *ce =
cast<ConstantExpr>(callInst->getOperand(1));
if (ce) {
if (ce->getOpcode() == Instruction::GetElementPtr) {
if (GlobalVariable *annoteStr =
dyn_cast<GlobalVariable>(ce->getOperand(0))) {
if (ConstantDataSequential *data =
dyn_cast<ConstantDataSequential>(
annoteStr->getInitializer())) {
if (data->isString()) {
errs() << "Found data " << data->getAsString();
}
}
}
}
Hope you already solved the problem .
Have a nice day .
You have to loop on instructions and identify calls to llvm.var.annotation
First argument is a pointer to the annotated variable (i8*).
To get the actual annotated variable, you then need to find what this pointer points to.
In your case, this is the source operand of the bitcast instruction.

clang can't optimize away global variables used only in main()?

If I plug this c++ program into clang (version 3.7)
///*
#include "stdio.h"
#include "stdint.h"
//extern int printf(const unsigned char*, ...);
extern "C" void __cxa_pure_virtual() { }
struct A
{
virtual void foo() = 0;
};
struct B : A
{
uint32_t x;
B(int x) : x(x) { }
virtual void foo()
{
printf("This is a test %d\n", x);
}
};
//*/
uint64_t thing = 0;
float other = 10.0f;
B b(12345);
int main()
{
thing++;
A* a = &b;
other *= 3.14159f;
a->foo();
}
And compile with clang -emit-llvm main.cpp -fno-rtti -O3 -S, then I get the following byte code:
; ModuleID = 'main.cpp'
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i686-pc-linux-gnu"
%struct.B = type { %struct.A, i32 }
%struct.A = type { i32 (...)** }
$_ZN1B3fooEv = comdat any
$_ZTV1B = comdat any
#thing = global i64 0, align 8
#other = global float 1.000000e+01, align 4
#b = global %struct.B { %struct.A { i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* #_ZTV1B, i64 0, i64 2) to i32 (...)**) }, i32 12345 }, align 4
#_ZTV1B = linkonce_odr unnamed_addr constant [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* #_ZN1B3fooEv to i8*)], comdat, align 4
#.str = private unnamed_addr constant [19 x i8] c"This is a test %d\0A\00", align 1
#llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
; Function Attrs: nounwind readnone
define void #__cxa_pure_virtual() #0 {
entry:
ret void
}
define i32 #main() #1 {
entry:
%0 = load i64, i64* #thing, align 8, !tbaa !1
%inc = add i64 %0, 1
store i64 %inc, i64* #thing, align 8, !tbaa !1
%1 = load float, float* #other, align 4, !tbaa !5
%mul = fmul float %1, 0x400921FA00000000
store float %mul, float* #other, align 4, !tbaa !5
%vtable = load void (%struct.A*)**, void (%struct.A*)*** bitcast (%struct.B* #b to void (%struct.A*)***), align 4, !tbaa !7
%2 = load void (%struct.A*)*, void (%struct.A*)** %vtable, align 4
tail call void %2(%struct.A* getelementptr inbounds (%struct.B, %struct.B* #b, i32 0, i32 0))
ret i32 0
}
; Function Attrs: nounwind
define linkonce_odr void #_ZN1B3fooEv(%struct.B* nocapture readonly %this) unnamed_addr #2 comdat align 2 {
entry:
%x = getelementptr inbounds %struct.B, %struct.B* %this, i32 0, i32 1
%0 = load i32, i32* %x, align 4, !tbaa !9
%call = tail call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* #.str, i32 0, i32 0), i32 %0)
ret void
}
; Function Attrs: nounwind
declare i32 #printf(i8* nocapture readonly, ...) #2
attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.7.1 "}
!1 = !{!2, !2, i64 0}
!2 = !{!"long long", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !6, i64 0}
!6 = !{!"float", !3, i64 0}
!7 = !{!8, !8, i64 0}
!8 = !{!"vtable pointer", !4, i64 0}
!9 = !{!10, !11, i64 4}
!10 = !{!"_ZTS1B", !11, i64 4}
!11 = !{!"int", !3, i64 0}
If you look at the main function, I have two variables that are useless. Sure I increment one and I do some multiplication on another, but I never use the values in them ever.
But if you look at the output of the byte code, it looks like it is still doing the useless math.
Is it just me or is this a bug?
Those variables are variables in global scope. The compiler simply couldn't figure out whether or not those variables could be declared and referenced in other translation units.
I'd be surprised if any modern C++ compiler is sophisticated enough to figure out that execution flow could not escape this translation unit, in a defined manner, and thus it would be safe to optimize away unused global variables in this translation unit.
No, I don't believe that this is a bug, as your variables are globals.
Clang cannot remove this math as it can't know that any externally called function (like the printf function, in a different translation unit) doesn't declare extern float other; and somehow uses it.
Try writing:
int main()
{
uint64_t thing = 0;
float other = 10.0f;
B b(12345);
thing++;
A* a = &b;
other *= 3.14159f;
a->foo();
}

llvm get annotations

I updated my previous question under a new form.
Hello everyone,
I have the following LLVM IR :
#.str = private unnamed_addr constant [3 x i8] c"DS\00", section "llvm.metadata"
#llvm.global.annotations = appending global [1 x { i8*, i8*, i8*, i32 }] [{ i8*, i8*, i8*, i32 } { i8* bitcast (i32* #f to i8*), i8* getelementptr inbounds ([3 x i8]* #.str, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* #.str1, i32 0, i32 0), i32 18 }], section "llvm.metadata"
I need to get #f (or maybe I can get somehow the definition of #f = global i32 0, align 4 ) and also I need to get "DS" from #.str. In my target code I have :
__attribute__((annotate("DS"))) int f=0;
I have problems to parse #llvm.global.annotations and I assume I will have with #.str. What I tried:
1.
for (Module::global_iterator I = F.global_begin(), E = F.global_end(); I != E; ++I) {
if (I->getName() == "llvm.global.annotations") {
Value *V = cast<Value>(I->getOperand(0));
errs()<<"\n "<<*(V)<<"\n";
errs()<<"\n "<<*(V->getType())<<"\n";
RESULT :
[1 x { i8*, i8*, i8*, i32 }] [{ i8*, i8*, i8*, i32 } { i8* bitcast (i32* #f to i8*), i8* getelementptr inbounds ([3 x i8]* #.str, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* #.str1, i32 0, i32 0), i32 18 }]
[1 x { i8*, i8*, i8*, i32 }]
2.
errs()<<"\n "<<(V->getValueID())<<"\n";
if(V->getValueID() == Value::ConstantArrayVal) {
ConstantArray *ca = (ConstantArray *)V;
errs()<<"\n "<<(ca[0])<<"\n"; }
RESULT :
[1 x { i8*, i8*, i8*, i32 }] [{ i8*, i8*, i8*, i32 } { i8* bitcast (i32* #f to i8*), i8* getelementptr inbounds ([3 x i8]* #.str, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* #.str1, i32 0, i32 0), i32 18 }]
Any help is welcomed ! Thank you !
Quite a late answer, but Google led me here and I thought that providing a full LLVM pass that discovers free text annotation would be helpful.
This LLVM pass would instrument only function marked with __attribute((annotate("someFreeTextAnnotation"))).
The code follows:
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Constants.h"
#include <set>
using namespace llvm;
const char *AnnotationString = "someFreeTextAnnotation";
namespace {
struct Hello : public FunctionPass {
static char ID;
Hello() : FunctionPass(ID) {}
std::set<Function*> annotFuncs;
virtual bool doInitialization(Module &M)override{
getAnnotatedFunctions(&M);
return false;
}
bool shouldInstrumentFunc(Function &F){
return annotFuncs.find(&F)!=annotFuncs.end();
}
void getAnnotatedFunctions(Module *M){
for (Module::global_iterator I = M->global_begin(),
E = M->global_end();
I != E;
++I) {
if (I->getName() == "llvm.global.annotations") {
ConstantArray *CA = dyn_cast<ConstantArray>(I->getOperand(0));
for(auto OI = CA->op_begin(); OI != CA->op_end(); ++OI){
ConstantStruct *CS = dyn_cast<ConstantStruct>(OI->get());
Function *FUNC = dyn_cast<Function>(CS->getOperand(0)->getOperand(0));
GlobalVariable *AnnotationGL = dyn_cast<GlobalVariable>(CS->getOperand(1)->getOperand(0));
StringRef annotation = dyn_cast<ConstantDataArray>(AnnotationGL->getInitializer())->getAsCString();
if(annotation.compare(AnnotationString)==0){
annotFuncs.insert(FUNC);
//errs() << "Found annotated function " << FUNC->getName()<<"\n";
}
}
}
}
}
bool runOnFunction(Function &F) override {
if(shouldInstrumentFunc(F)==false)
return false;
errs() << "Instrumenting " << F.getName() << "\n";
return false;
}
}; // end of struct Hello
} // end of anonymous namespace
char Hello::ID = 0;
static RegisterPass<Hello> X("hello", "Discover annotation attribute",
false /* Only looks at CFG */,
false /* Analysis Pass */);
Use runOnModule() instead of runOnFunction() if you are doing so. Alternatively, you can take the module. llvm.global.annotations is defined outside functions. Inside do something like:
for (Module::global_iterator I = F.global_begin(), E = F.global_end(); I != E; ++I) {
if (I->getName() == "llvm.global.annotations")
{
errs()<<"\nllvm.global.annotations\n";
//1. find out what global variable is by "parsing" the IR
//2. get through the module till you find a load #f
//3. you can add metadata to the load function and you can easily get the metadata from the normal pass
}
}
I solved it.
I cast the entire annotated expression to Value*. Then, in order to avoid ugly things like getAsString(), I check if V->getValueID() == Value::ConstantArrayVal in order to cast it to ConstantArray. Because it contains only array[0], I cast array0>getOperand(0) to ConstantStruct. Therefore, from ConstantStruct you can get all the four operands. Now to do is only to get the names of #f, #str from every field. This is done by ConstantStruct->getOperand(0)->getOperand(0).

Do modern C++ compilers inline functions which are called exactly once?

As in, say my header file is:
class A
{
void Complicated();
}
And my source file
void A::Complicated()
{
...really long function...
}
Can I split the source file into
void DoInitialStuff(pass necessary vars by ref or value)
{
...
}
void HandleCaseA(pass necessary vars by ref or value)
{
...
}
void HandleCaseB(pass necessary vars by ref or value)
{
...
}
void FinishUp(pass necessary vars by ref or value)
{
...
}
void A::Complicated()
{
...
DoInitialStuff(...);
switch ...
HandleCaseA(...)
HandleCaseB(...)
...
FinishUp(...)
}
Entirely for readability and without any fear of impact in terms of performance?
You should mark the functions static so that the compiler know they are local to that translation unit.
Without static the compiler cannot assume (barring LTO / WPA) that the function is only called once, so is less likely to inline it.
Demonstration using the LLVM Try Out page.
That said, code for readability first, micro-optimizations (and such tweaking is a micro-optimization) should only come after performance measures.
Example:
#include <cstdio>
static void foo(int i) {
int m = i % 3;
printf("%d %d", i, m);
}
int main(int argc, char* argv[]) {
for (int i = 0; i != argc; ++i) {
foo(i);
}
}
Produces with static:
; ModuleID = '/tmp/webcompile/_27689_0.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
#.str = private constant [6 x i8] c"%d %d\00" ; <[6 x i8]*> [#uses=1]
define i32 #main(i32 %argc, i8** nocapture %argv) nounwind {
entry:
%cmp4 = icmp eq i32 %argc, 0 ; <i1> [#uses=1]
br i1 %cmp4, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%0 = phi i32 [ %inc, %for.body ], [ 0, %entry ] ; <i32> [#uses=3]
%rem.i = srem i32 %0, 3 ; <i32> [#uses=1]
%call.i = tail call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([6 x i8]* #.str, i64 0, i64 0), i32 %0, i32 %rem.i) nounwind ; <i32> [#uses=0]
%inc = add nsw i32 %0, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %inc, %argc ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret i32 0
}
declare i32 #printf(i8* nocapture, ...) nounwind
Without static:
; ModuleID = '/tmp/webcompile/_27859_0.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
#.str = private constant [6 x i8] c"%d %d\00" ; <[6 x i8]*> [#uses=1]
define void #foo(int)(i32 %i) nounwind {
entry:
%rem = srem i32 %i, 3 ; <i32> [#uses=1]
%call = tail call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([6 x i8]* #.str, i64 0, i64 0), i32 %i, i32 %rem) ; <i32> [#uses=0]
ret void
}
declare i32 #printf(i8* nocapture, ...) nounwind
define i32 #main(i32 %argc, i8** nocapture %argv) nounwind {
entry:
%cmp4 = icmp eq i32 %argc, 0 ; <i1> [#uses=1]
br i1 %cmp4, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%0 = phi i32 [ %inc, %for.body ], [ 0, %entry ] ; <i32> [#uses=3]
%rem.i = srem i32 %0, 3 ; <i32> [#uses=1]
%call.i = tail call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([6 x i8]* #.str, i64 0, i64 0), i32 %0, i32 %rem.i) nounwind ; <i32> [#uses=0]
%inc = add nsw i32 %0, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %inc, %argc ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret i32 0
}
Depends on aliasing (pointers to that function) and function length (a large function inlined in a branch could throw the other branch out of cache, thus hurting performance).
Let the compiler worry about that, you worry about your code :)
A complicated function is likely to have its speed dominated by the operations within the function; the overhead of a function call won't be noticeable even if it isn't inlined.
You don't have much control over the inlining of a function, the best way to know is to try it and find out.
A compiler's optimizer might be more effective with shorter pieces of code, so you might find it getting faster even if it's not inlined.
If you split up your code into logical groupings the compiler will do what it deems best: If it's short and easy, the compiler should inline it and the result is the same. If however the code is complicated, making an extra function call might actually be faster than doing all the work inlined, so you leave the compiler the option to do that too. On top of all that, the logically split code can be far easier for a maintainer to grok and avoid future bugs.
I suggest you create a helper class to break your complicated function into method calls, much like you were proposing, but without the long, boring and unreadable task of passing arguments to each and every one of these smaller functions. Pass these arguments only once by making them member variables of the helper class.
Don't focus on optimization at this point, make sure your code is readable and you'll be fine 99% of the time.