I'm writing a compiler for LLVM for a language whose semantics explicity define that a division by zero should always raise a floating point exception. The problem is, after running -mem2reg and -constprop on my raw IR, my code gets converted to:
define i32 #main() {
entry:
%t3 = sdiv i32 2, 0
ret i32 7
}
which then gets turned by llc -O0 into:
.text
.globl _c0_main
.align 16, 0x90
.type _c0_main,#function
main:
.cfi_startproc
# BB#0: # %entry
movl $7, %eax
ret
.Ltmp0:
.size main, .Ltmp0-main
.cfi_endproc
Is there a way to force llc not to remove effectful operations?
The sdiv instruction has divide by zero semantics that are undefined. If your front end language has some defined semantics for this you'll need to use instructions other than sdiv.
Perhaps you'll have to detect a divide by zero and branch at runtime to a sequence of instructions that gives the semantics you want.
The language reference states that [d]ivision by zero leads to undefined behavior as stated by Colin.
Thus, you have to check if the dividend is zero and explicitly generate a floating point exception. In C, this could look as follows:
extern void raiseFloatingException();
int someFunc() {
int a = 2;
int b = 0;
if (!b) {
raiseFloatingException();
}
int result = a / b;
return result;
}
If you compile this to bit code and optimize it with -mem2reg you get the pattern that you could generate with your back end:
define i32 #someFunc() #0 {
%1 = icmp ne i32 0, 0
br i1 %1, label %3, label %2
; <label>:2 ; preds = %0
call void (...)* #raiseFloatingException()
br label %3
; <label>:3 ; preds = %2, %0
%4 = sdiv i32 2, 0
ret i32 %4
}
Note, that you will have to provide the raiseFloatingException function and link it with your code.
Related
I am trying to run this code but I am getting:
error: expected instruction opcode
label_3:
this is the relevant part of the code:
define void #main(){
%r1 = alloca [50 x i32]
%r7 = alloca i32
store i32 0 , i32* %r7
label_3:
%r9 = load i32 , i32* %r7
%r8 = getelementptr [258 x i32], [258 x i32]* %r6 , i32 0 , i32 %r9
store i32 0 , i32* %r8
%r10 = add i32 1 , %r9
store i32 %r10 , i32* %r7
%r11 = icmp eq i32 256 , i32 %r10
br i1 %r11 , label %label_4 , label %label_3
label_4:
.....
Thanks in advance!
I solved the problem, the problem was that before entering the loop (label_3) we need to close the previous block implicitly and to do that a “Terminator” instruction is required, so I added before label_3: line, br label label_3
for more details read this:
https://zanopia.wordpress.com/2010/09/14/understanding-llvm-assembly-with-fractals-part-i/
Please consider following code:
float test(int len, int* tab)
{
for(int i = 0; i<len; i++)
tab[i] = i;
}
Obviously return is missing. For this scenario for both clang and ndk compiler for ARM processor an infinite loop is generated. After disassembling it becomes clear that compiler generates regular branch instruction instead of conditional branch.
mov r0, #0
.LBB0_1:
str r0, [r1, r0, lsl #2]
add r0, r0, #1
b .LBB0_1
The example with an error can be found here: https://godbolt.org/z/YDSFw-
Please note that c++ specification states that missing return is considered as undefined behaviour but it refers only to the returned value. It shall not affect the preceding instructions.
Am I missing something here? Any thoughts?
No, you can't reason that way with undefined behaviour.
The compiler is free to use undefined behaviour and assumptions around it for optimizations. The compiler is free to assume your code will not contain undefined behaviour.
In this case, the compiler can assume that the code with undefined behaviour won't be reached. As the end of the function contains undefined behaviour, the compiler concludes that the end of the function actually never will be reached, and thus can optimize the loop.
If you remove the -Oz and add -emit-llvm to the compiler explorer command, you'll see what LLVM IR clang produces originally, when not doing optimizations:
https://godbolt.org/z/-dbeNj
define dso_local float #_Z4testiPi(i32 %0, i32* %1) #0 {
%3 = alloca i32, align 4
%4 = alloca i32*, align 4
%5 = alloca i32, align 4
store i32 %0, i32* %3, align 4
store i32* %1, i32** %4, align 4
store i32 0, i32* %5, align 4
br label %6
6: ; preds = %15, %2
%7 = load i32, i32* %5, align 4
%8 = load i32, i32* %3, align 4
%9 = icmp slt i32 %7, %8
br i1 %9, label %10, label %18
10: ; preds = %6
%11 = load i32, i32* %5, align 4
%12 = load i32*, i32** %4, align 4
%13 = load i32, i32* %5, align 4
%14 = getelementptr inbounds i32, i32* %12, i32 %13
store i32 %11, i32* %14, align 4
br label %15
15: ; preds = %10
%16 = load i32, i32* %5, align 4
%17 = add nsw i32 %16, 1
store i32 %17, i32* %5, align 4
br label %6
18: ; preds = %6
call void #llvm.trap()
unreachable
}
The end of the loop, label 18, contains unreachable. This can be used for further optimizations, getting rid of the branch and comparison at the start of the loop.
Edit:
There's an excellent blog post from John Regehr about how to reason around undefined behaviour in C and C++. It's a bit long but well worth a read.
I'm writing a LLVM parser to analyse whether a program is adhering to a certain programming paradigm. To that I need to analyse each block of the IR and check certain instructions. When I created the .ll file, I don't see the label names but an address:
; <label>:4 ; preds = %0
%5 = load i32* %c, align 4
%6 = add nsw i32 %5, 10
store i32 %6, i32* %c, align 4
br label %10
; <label>:7 ; preds = %0
%8 = load i32* %c, align 4
%9 = add nsw i32 %8, 15
store i32 %9, i32* %c, align 4
br label %10
; <label>:10 ; preds = %7, %4
%11 = load i32* %1
ret i32 %11
What I need is to get these "labels" into a list. I have also seen that some .ll files has following format:
if.then: ; preds = %entry
%5 = load i32* %c, align 4
%6 = add nsw i32 %5, 10
store i32 %6, i32* %c, align 4
br label %10
if.else: ; preds = %entry
%8 = load i32* %c, align 4
%9 = add nsw i32 %8, 15
store i32 %9, i32* %c, align 4
br label %10
if.end: ; preds = %if.else,
%11 = load i32* %1
ret i32 %11
With the 2nd format, I can use the getName() to get the name of the block: i.e: 'if.then', 'if.else' etc.
But with the 1st format, it's impossible as it doesn't have a name. But I tested with printAsOperand(errs(), true) from which I can print the addresses like: '%4, %7 %10'. What my question is, how to add these addresses (or operands) into a stings list? or obtain these values and assign to a certain variable.
Here's the way to do it;
raw_ostream should be used in printAsOperand() method to get the required address into a variable:
following is the method I used for the purpose:
#include "llvm/Support/raw_ostream.h"
std::string get_block_reference(BasicBlock *BB){
std::string block_address;
raw_string_ostream string_stream(block_address);
BB->printAsOperand(string_stream, false);
return string_stream.str();
}
Instruction / basic block names is a debugging feature that simplifies the development of IR-level passes, but no guarantees are made towards them. E.g. they could be simply stripped off, they could be misleading, etc. You should not rely on them for anything meaningful (and in general they may not have any connection to the original source code). Normally the names are no generated in Release builds of LLVM. You need to build everything in Debug (or Release+Assertions) mode.
I transform the following llvm-IR
; Function Attrs: noinline norecurse nounwind uwtable
define i32 #main() #0{
entry:
%sub = sub nsw i32 5, 3
%cmp = icmp slt i32 %sub, 3
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
%mul = mul nsw i32 %sub, 2
br label %if.end
if.else: ; preds = %entry
%sub1 = sub nsw i32 %sub, 3
br label %if.end
if.end: ; preds = %if.else,
%if.then
%y.0 = phi i32 [ %mul, %if.then ], [ %sub1, %if.else ]
%sub2 = sub nsw i32 %sub, %y.0
%add = add nsw i32 %sub, %y.0
ret i32 0
}
to assembly code for x86_64-unknown-linux-gnu
using llc sample.ll
generated assembly code:
.text
.file "phi.cpp"
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,#function
main: # #main
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Lcfi0:
.cfi_def_cfa_offset 16
.Lcfi1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Lcfi2:
.cfi_def_cfa_register %rbp
xorl %eax, %eax
testb %al, %al
xorl %eax, %eax
popq %rbp
retq
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
# -- End function
The register in the above code: %rbp is the base pointer, which points to the base of the current stack frame, and %rsp is the stack pointer, which points to the top of the current stack frame and operand are store in %eax and %al for arithmatic operation but in can't find the instruction where the value is load in %eax and %al register
I also want to know
How llc is handling phi node on assembly level
lli defaults to -O2 and your code start with a constant expression sub nsw i32 5, 3. Thus, your function does, basically, nothing, and the only thing LLVM should keep is to nullify EAX.
If you run lli -O0 your.ll, you'll get much verbose code, that perform spills on stack and register loads.
BTW, there are a pair of passes called mem2reg and reg2mem that convert code back and forth code from SSA form. Specifically, these passes would convert phi nodes to branches and introduce explicit stores and loads in IR.
I'm interested in how the Microsoft Visual C++ compiler treat/optimize static variables.
My code:
#include <cstdlib>
void no_static_initialization()
{
static int value = 3;
}
void static_initialization(int new_value)
{
static int value = new_value;
}
int main()
{
no_static_initialization();
static_initialization(1);
static_initialization(std::rand());
return 0;
}
Here's the assembly for the code (compiled with optimizations):
My main area of interest is the last case.
Here, the first statement got fully optimized and two calls of the second statement were inlined and they actually represent similiar chunks of code.
Each of them does test something something and then makes a short jump if the test wasn't successful (these jumps obviously point to the end of corresponding routine).
Does the compiler make an explicit check on every function call for if the function is called the first time?
Does the compiler actually have a flag, which indicates if this is the first time the function was called or not?
Where is it stored (I guess all that test stuff is about it, but I'm not exactly sure)?
Yes, the compiler has to add a hidden flag to test whether it is the first call to the function and initialize or not depending on that. In both snippets it is testing the flag, if it is raised it will jump to the end of the function or else it will initialize the static variable. Note that since the compiler has inlined the function it could as well optimize away the second test, knowing that the flag is to be tested only on the first call.
The flag seems to be located at address 0x00403374, and takes a byte, while the variable itself is located at address 0x00403370.
I like to use LLVM because the code it generates tells you a bit more explicitly what it's doing:
The actual code is below, because it's kind of a long read. Yes, LLVM creates guard condition variables for static values. notice how static_initialization/bb: acquires the guard, checks to see if its a certain value corresponding with already initialized, and either branches to bb1 if it needs to initialize, or bb2 if it doesn't. This isn't the only way to possibly solve the single initialization requirement, but it's the usual way.
; ModuleID = '/tmp/webcompile/_31867_0.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-linux-gnu"
#guard variable for static_initialization(int)::value = internal global i64 0 ; <i64*> [#uses=3]
#static_initialization(int)::value = internal global i32 0 ; <i32*> [#uses=1]
define void #no_static_initialization()() nounwind {
entry:
br label %return
return: ; preds = %entry
ret void
}
define void #static_initialization(int)(i32 %new_value) nounwind {
entry:
%new_value_addr = alloca i32 ; <i32*> [#uses=2]
%0 = alloca i8 ; <i8*> [#uses=2]
%retval.1 = alloca i8 ; <i8*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i32 %new_value, i32* %new_value_addr
%1 = load i8* bitcast (i64* #guard variable for static_initialization(int)::value to i8*), align 1 ; <i8> [#uses=1]
%2 = icmp eq i8 %1, 0 ; <i1> [#uses=1]
br i1 %2, label %bb, label %bb2
bb: ; preds = %entry
%3 = call i32 #__cxa_guard_acquire(i64* #guard variable for static_initialization(int)::value) nounwind ; <i32> [#uses=1]
%4 = icmp ne i32 %3, 0 ; <i1> [#uses=1]
%5 = zext i1 %4 to i8 ; <i8> [#uses=1]
store i8 %5, i8* %retval.1, align 1
%6 = load i8* %retval.1, align 1 ; <i8> [#uses=1]
%toBool = icmp ne i8 %6, 0 ; <i1> [#uses=1]
br i1 %toBool, label %bb1, label %bb2
bb1: ; preds = %bb
store i8 0, i8* %0, align 1
%7 = load i32* %new_value_addr, align 4 ; <i32> [#uses=1]
store i32 %7, i32* #static_initialization(int)::value, align 4
store i8 1, i8* %0, align 1
call void #__cxa_guard_release(i64* #guard variable for static_initialization(int)::value) nounwind
br label %bb2
bb2: ; preds = %bb1, %bb, %entry
br label %return
return: ; preds = %bb2
ret void
}
declare i32 #__cxa_guard_acquire(i64*) nounwind
declare void #__cxa_guard_release(i64*) nounwind
define i32 #main() nounwind {
entry:
%retval = alloca i32 ; <i32*> [#uses=2]
%0 = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
call void #no_static_initialization()() nounwind
call void #static_initialization(int)(i32 1) nounwind
%1 = call i32 #rand() nounwind ; <i32> [#uses=1]
call void #static_initialization(int)(i32 %1) nounwind
store i32 0, i32* %0, align 4
%2 = load i32* %0, align 4 ; <i32> [#uses=1]
store i32 %2, i32* %retval, align 4
br label %return
return: ; preds = %entry
%retval1 = load i32* %retval ; <i32> [#uses=1]
ret i32 %retval1
}
declare i32 #rand() nounwind