How to change a do-while form loop into a while form loop in LLVM IR - llvm

How can I change a loop in do-while form into a loop in while-form in LLVM IR?

Here we have a little loop example. The loops are just running through a boolean array until they find the first occurrence of true. I compiled it with clang -emit-llvm to get the optimized llvm IR.
#include <stdio.h>
#include <string.h>
int foo(bool* start){
bool* cond = start;;
while (*cond != true)
cond++;
return cond - start;
}
int bar(bool* start){
bool* cond = start;
do {
}while (*(++cond) != true);
return cond - start;
}
int main(){
bool cond[8];
memset(&cond, 0, sizeof(bool)*8);
cond[5] = true;
printf("%i %i\n", foo(cond), bar(cond));
}
The IR for the foo function (using just a while loop) looks like this:
; Function Attrs: nounwind uwtable
define i32 #_Z3fooPb(i8* %start) #0 {
%1 = alloca i8*, align 8
%cond = alloca i8*, align 8
store i8* %start, i8** %1, align 8
%2 = load i8** %1, align 8
store i8* %2, i8** %cond, align 8
br label %3
; <label>:3 ; preds = %9, %0
%4 = load i8** %cond, align 8
%5 = load i8* %4, align 1
%6 = trunc i8 %5 to i1
%7 = zext i1 %6 to i32
%8 = icmp ne i32 %7, 1
br i1 %8, label %9, label %12
; <label>:9 ; preds = %3
%10 = load i8** %cond, align 8
%11 = getelementptr inbounds i8* %10, i32 1
store i8* %11, i8** %cond, align 8
br label %3
; <label>:12 ; preds = %3
%13 = load i8** %cond, align 8
%14 = load i8** %1, align 8
%15 = ptrtoint i8* %13 to i64
%16 = ptrtoint i8* %14 to i64
%17 = sub i64 %15, %16
%18 = trunc i64 %17 to i32
ret i32 %18
}
and for bar, which is using a do while we get:
; Function Attrs: nounwind uwtable
define i32 #_Z3barPb(i8* %start) #0 {
%1 = alloca i8*, align 8
%cond = alloca i8*, align 8
store i8* %start, i8** %1, align 8
%2 = load i8** %1, align 8
store i8* %2, i8** %cond, align 8
br label %3
; <label>:3 ; preds = %4, %0
br label %4
; <label>:4 ; preds = %3
%5 = load i8** %cond, align 8
%6 = getelementptr inbounds i8* %5, i32 1
store i8* %6, i8** %cond, align 8
%7 = load i8* %6, align 1
%8 = trunc i8 %7 to i1
%9 = zext i1 %8 to i32
%10 = icmp ne i32 %9, 1
br i1 %10, label %3, label %11
; <label>:11 ; preds = %4
%12 = load i8** %cond, align 8
%13 = load i8** %1, align 8
%14 = ptrtoint i8* %12 to i64
%15 = ptrtoint i8* %13 to i64
%16 = sub i64 %14, %15
%17 = trunc i64 %16 to i32
ret i32 %17
}
The differences are very small for bar we have one additional label and an additional br because we jump strait to the body of the loop and execute it before we evaluate the condition.
So the first thing to transform a do while is to get rid of the branch and just jump to the condition. Now its a while loop where the condition is evaluated first. That is easy. Now you have two choices how you handle the condition. You can try to modify the condition what is a realy hard task because you can put almost everything inside a loops condition. The easy way is to just copy the loop body one time (everything from ;<label>:4 to ;<label>:11) prior to the first branch of the loop. so you want change the correctness of your code and your do-while loop will become a loop (with on loop-body execution) in-front of the loop.
You can copy the loop body with CloneBasicBlock from llvm/Transforms/Utils/Cloning.h:
/// CloneBasicBlock - Return a copy of the specified basic block, but without
/// embedding the block into a particular function. The block returned is an
/// exact copy of the specified basic block, without any remapping having been
/// performed. Because of this, this is only suitable for applications where
/// the basic block will be inserted into the same function that it was cloned
/// from (loop unrolling would use this, for example).
///
/// Also, note that this function makes a direct copy of the basic block, and
/// can thus produce illegal LLVM code. In particular, it will copy any PHI
/// nodes from the original block, even though there are no predecessors for the
/// newly cloned block (thus, phi nodes will have to be updated). Also, this
/// block will branch to the old successors of the original block: these
/// successors will have to have any PHI nodes updated to account for the new
/// incoming edges.
///
/// The correlation between instructions in the source and result basic blocks
/// is recorded in the VMap map.
///
/// If you have a particular suffix you'd like to use to add to any cloned
/// names, specify it as the optional third parameter.
///
/// If you would like the basic block to be auto-inserted into the end of a
/// function, you can specify it as the optional fourth parameter.
///
/// If you would like to collect additional information about the cloned
/// function, you can specify a ClonedCodeInfo object with the optional fifth
/// parameter.
///
BasicBlock *CloneBasicBlock(const BasicBlock *BB,
ValueToValueMapTy &VMap,
const Twine &NameSuffix = "", Function *F = nullptr,
ClonedCodeInfo *CodeInfo = nullptr);
I hope this is a little help. Have Fun!

Related

Does loop operation with variable assignment violate SSA principle?

I just started to learn LLVM IR and SSA, got a question about the SSA principle.
I found the following code block on the Internet, which seems to violate SSA principle because variables are assigned value for several times. Is my comprehension right?
; <label>:4: ; preds = %7, %0
%5 = load i32, i32* %3, align 4
%6 = icmp slt i32 %5, 10
br i1 %6, label %7, label %12
; <label>:7: ; preds = %4
%8 = load i32, i32* %3, align 4
%9 = add nsw i32 %8, 1
store i32 %9, i32* %3, align 4
%10 = load i32, i32* %2, align 4
%11 = mul nsw i32 %10, 2
store i32 %11, i32* %2, align 4
br label %4
LLVM uses "partial SSA" form. LLVM's infinite registers are in SSA form but memory and global variables are not. Your %5 can take on different values because it is a load from memory.
Even in fully SSA form an SSA value in a loop ordinarily takes on different values through the loop iterations. It would look like %5 = phi i32 [%start_val, %loopheader_bb], [%iteration_val, %backedge_bb]. You should get phi nodes if you run opt -sroa over your code.

LLVM inconsistent numbering scheme

I've been playing around with compilers and have been working on my own toy C compiler. Currently I'm attempting to target LLVM IR, but I'm having trouble wrapping my head around the syntax.
My current current issue: why is this valid IR syntax:
define i32 #main() {
%1 = alloca i32, align 4
%2 = add i32 0, 0
store i32 %2, i32* %1, align 4
%3 = alloca i32, align 4
%4 = add i32 0, 1
store i32 %4, i32* %3, align 4
%5 = load i32, i32* %1, align 4
%6 = icmp ne i32 %5, 0
br i1 %6, label %true0, label %else0
true0: ; preds %0
%7 = add i32 0, 1
store i32 %7, i32* %3, align 4
br label %end0
else0: ; preds %0
%8 = load i32, i32* %3, align 4
%9 = icmp ne i32 %8, 0
br i1 %9, label %true1, label %end1
true1: ; preds %else0
%10 = add i32 0, 2
store i32 %10, i32* %3, align 4
br label %end1
end1: ; preds %true1, %else0
br label %end0
end0: ; preds %true0, %else1
%11 = load i32, i32* %3, align 4
ret i32 %11
}
but this is not:
define i32 #main() {
%1 = alloca i32, align 4
%2 = add i32 0, 0
store i32 %2, i32* %1, align 4 ; variable a
%3 = load i32, i32* %1, align 4
%4 = icmp ne i32 %3, 0
br i1 %4, label %true0, label %else0
true0: ; preds %0
%5 = add i32 0, 1
ret i32 %5
br label %end0
else0: ; preds %0
%6 = add i32 0, 2
ret i32 %6
br label %end0
end0: ; % preds %true0, %else0
ret i32 0
}
I get the error:
llc-6.0: test2.ll:13:1: error: instruction expected to be numbered '%7'
%6 = add i32 0, 2
^
I don't understand why that block needs to be %7, given the previously used number was %6. Compare the %else0 label of the first example, that's very similar syntax and works fine.
And yes, my compiler needs a lot of optimization, but I'm not finished yet :)
Your code is invalid because there is actually another basic block you did not labeled:
true0: ; preds %0
%5 = add i32 0, 1
ret i32 %5
hidden_bb: ; this will named as %6 by default
br label %end0
else0: ; preds %0
If it has a label than the error will gone. Note that all terminator instructions, like br and ret will create their own basic block.

getting block names for LLVM IR parser

I'm writing a LLVM parser to analyse whether a program is adhering to a certain programming paradigm. To that I need to analyse each block of the IR and check certain instructions. When I created the .ll file, I don't see the label names but an address:
; <label>:4 ; preds = %0
%5 = load i32* %c, align 4
%6 = add nsw i32 %5, 10
store i32 %6, i32* %c, align 4
br label %10
; <label>:7 ; preds = %0
%8 = load i32* %c, align 4
%9 = add nsw i32 %8, 15
store i32 %9, i32* %c, align 4
br label %10
; <label>:10 ; preds = %7, %4
%11 = load i32* %1
ret i32 %11
What I need is to get these "labels" into a list. I have also seen that some .ll files has following format:
if.then: ; preds = %entry
%5 = load i32* %c, align 4
%6 = add nsw i32 %5, 10
store i32 %6, i32* %c, align 4
br label %10
if.else: ; preds = %entry
%8 = load i32* %c, align 4
%9 = add nsw i32 %8, 15
store i32 %9, i32* %c, align 4
br label %10
if.end: ; preds = %if.else,
%11 = load i32* %1
ret i32 %11
With the 2nd format, I can use the getName() to get the name of the block: i.e: 'if.then', 'if.else' etc.
But with the 1st format, it's impossible as it doesn't have a name. But I tested with printAsOperand(errs(), true) from which I can print the addresses like: '%4, %7 %10'. What my question is, how to add these addresses (or operands) into a stings list? or obtain these values and assign to a certain variable.
Here's the way to do it;
raw_ostream should be used in printAsOperand() method to get the required address into a variable:
following is the method I used for the purpose:
#include "llvm/Support/raw_ostream.h"
std::string get_block_reference(BasicBlock *BB){
std::string block_address;
raw_string_ostream string_stream(block_address);
BB->printAsOperand(string_stream, false);
return string_stream.str();
}
Instruction / basic block names is a debugging feature that simplifies the development of IR-level passes, but no guarantees are made towards them. E.g. they could be simply stripped off, they could be misleading, etc. You should not rely on them for anything meaningful (and in general they may not have any connection to the original source code). Normally the names are no generated in Release builds of LLVM. You need to build everything in Debug (or Release+Assertions) mode.

Unifying function exits with LLVM

Let's say I have this function in C/C++:
int foo(int x) {
if (x <= 1) return 1;
return x * foo(x-1);
}
And I compile it with Clang.
Clang generates the following IR:
; Function Attrs: ssp uwtable
define i32 #_Z3fooi(i32 %x) #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 %x, i32* %2, align 4
%3 = load i32, i32* %2, align 4
%4 = icmp sle i32 %3, 1
br i1 %4, label %5, label %6
; <label>:5 ; preds = %0
store i32 1, i32* %1, align 4
br label %12
; <label>:6 ; preds = %0
%7 = load i32, i32* %2, align 4
%8 = load i32, i32* %2, align 4
%9 = sub nsw i32 %8, 1
%10 = call i32 #_Z3fooi(i32 %9)
%11 = mul nsw i32 %7, %10
store i32 %11, i32* %1, align 4
br label %12
; <label>:12 ; preds = %6, %5
%13 = load i32, i32* %1, align 4
ret i32 %13
}
As you can see, LLVM passes optimizes out the code and creates a "return register" (where I put the return value), and a "return block" (where the return value is effectively returned).
I'm trying to get the same effect, but when I use SROA pass or the Instruction Combining pass, they translate the exits in a phi instruction:
; Function Attrs: nounwind ssp uwtable
define i32 #__HF3fooTi(i32 %x) #0 {
%1 = icmp sle i32 %x, 1
br i1 %1, label %2, label %3
; <label>:2 ; preds = %0
br label %7
; <label>:3 ; preds = %0
%4 = sub nsw i32 %x, 1
%5 = call i32 #__HF3fooTi(i32 %4)
%6 = mul nsw i32 %x, %5
br label %7
; <label>:7 ; preds = %3, %2
%.0 = phi i32 [ 1, %2 ], [ %6, %3 ]
ret i32 %.0
}
My question is: which solution is faster? And which pass is Clang using to achieve this? (In the Clang source files I found the 2 passes I used, and they give me this different result)

How does MSVC optimize static variable usage?

I'm interested in how the Microsoft Visual C++ compiler treat/optimize static variables.
My code:
#include <cstdlib>
void no_static_initialization()
{
static int value = 3;
}
void static_initialization(int new_value)
{
static int value = new_value;
}
int main()
{
no_static_initialization();
static_initialization(1);
static_initialization(std::rand());
return 0;
}
Here's the assembly for the code (compiled with optimizations):
My main area of interest is the last case.
Here, the first statement got fully optimized and two calls of the second statement were inlined and they actually represent similiar chunks of code.
Each of them does test something something and then makes a short jump if the test wasn't successful (these jumps obviously point to the end of corresponding routine).
Does the compiler make an explicit check on every function call for if the function is called the first time?
Does the compiler actually have a flag, which indicates if this is the first time the function was called or not?
Where is it stored (I guess all that test stuff is about it, but I'm not exactly sure)?
Yes, the compiler has to add a hidden flag to test whether it is the first call to the function and initialize or not depending on that. In both snippets it is testing the flag, if it is raised it will jump to the end of the function or else it will initialize the static variable. Note that since the compiler has inlined the function it could as well optimize away the second test, knowing that the flag is to be tested only on the first call.
The flag seems to be located at address 0x00403374, and takes a byte, while the variable itself is located at address 0x00403370.
I like to use LLVM because the code it generates tells you a bit more explicitly what it's doing:
The actual code is below, because it's kind of a long read. Yes, LLVM creates guard condition variables for static values. notice how static_initialization/bb: acquires the guard, checks to see if its a certain value corresponding with already initialized, and either branches to bb1 if it needs to initialize, or bb2 if it doesn't. This isn't the only way to possibly solve the single initialization requirement, but it's the usual way.
; ModuleID = '/tmp/webcompile/_31867_0.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-linux-gnu"
#guard variable for static_initialization(int)::value = internal global i64 0 ; <i64*> [#uses=3]
#static_initialization(int)::value = internal global i32 0 ; <i32*> [#uses=1]
define void #no_static_initialization()() nounwind {
entry:
br label %return
return: ; preds = %entry
ret void
}
define void #static_initialization(int)(i32 %new_value) nounwind {
entry:
%new_value_addr = alloca i32 ; <i32*> [#uses=2]
%0 = alloca i8 ; <i8*> [#uses=2]
%retval.1 = alloca i8 ; <i8*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i32 %new_value, i32* %new_value_addr
%1 = load i8* bitcast (i64* #guard variable for static_initialization(int)::value to i8*), align 1 ; <i8> [#uses=1]
%2 = icmp eq i8 %1, 0 ; <i1> [#uses=1]
br i1 %2, label %bb, label %bb2
bb: ; preds = %entry
%3 = call i32 #__cxa_guard_acquire(i64* #guard variable for static_initialization(int)::value) nounwind ; <i32> [#uses=1]
%4 = icmp ne i32 %3, 0 ; <i1> [#uses=1]
%5 = zext i1 %4 to i8 ; <i8> [#uses=1]
store i8 %5, i8* %retval.1, align 1
%6 = load i8* %retval.1, align 1 ; <i8> [#uses=1]
%toBool = icmp ne i8 %6, 0 ; <i1> [#uses=1]
br i1 %toBool, label %bb1, label %bb2
bb1: ; preds = %bb
store i8 0, i8* %0, align 1
%7 = load i32* %new_value_addr, align 4 ; <i32> [#uses=1]
store i32 %7, i32* #static_initialization(int)::value, align 4
store i8 1, i8* %0, align 1
call void #__cxa_guard_release(i64* #guard variable for static_initialization(int)::value) nounwind
br label %bb2
bb2: ; preds = %bb1, %bb, %entry
br label %return
return: ; preds = %bb2
ret void
}
declare i32 #__cxa_guard_acquire(i64*) nounwind
declare void #__cxa_guard_release(i64*) nounwind
define i32 #main() nounwind {
entry:
%retval = alloca i32 ; <i32*> [#uses=2]
%0 = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
call void #no_static_initialization()() nounwind
call void #static_initialization(int)(i32 1) nounwind
%1 = call i32 #rand() nounwind ; <i32> [#uses=1]
call void #static_initialization(int)(i32 %1) nounwind
store i32 0, i32* %0, align 4
%2 = load i32* %0, align 4 ; <i32> [#uses=1]
store i32 %2, i32* %retval, align 4
br label %return
return: ; preds = %entry
%retval1 = load i32* %retval ; <i32> [#uses=1]
ret i32 %retval1
}
declare i32 #rand() nounwind