LLVM Global Instruction Selection failure - llvm

I am trying to run llc with global instruction selection, unfortunately it fails on any shift instruction (e.g. shl).
Here is how to represent it using llc -global-isel=true -filetype=asm sample.ll:
define i64 #test_advance(i1 %nullify, i64 %iaoq) {
entry:
%cache = alloca i64
store i64 %iaoq, i64* %cache
br i1 %nullify, label %cond_true, label %cond_false
cond_false:
%iaoq.1 = load i64, i64* %cache
%iaoq.2 = add i64 %iaoq.1, 4
%iaoq.x = shl i64 %iaoq.2, 13
store i64 %iaoq.x, i64* %cache
br label %next_insn
cond_true:
%iaoq.3 = load i64, i64* %cache
%iaoq.4 = add i64 %iaoq.3, 8
store i64 %iaoq.4, i64* %cache
br label %after_next_insn
next_insn:
%iaoq.5 = load i64, i64* %cache
%iaoq.6 = add i64 %iaoq.5, 4
store i64 %iaoq.6, i64* %cache
br label %after_next_insn
after_next_insn:
%res = load i64, i64* %cache
ret i64 %res
}
Why is there so strange behaviour? What's so strange about shifts? My platform is x86.
EDIT
In release_70 this problem is solved, unfortunately global-isel can not cope with legalisation of uadd_with_overflow:
define i64 #test_advance(i1 %nullify, i64 %iaoq) {
entry:
%0 = call { i64, i1 } #llvm.uadd.with.overflow.i64(i64 %iaoq, i64 %iaoq)
%sum_64_7949 = extractvalue { i64, i1 } %0, 0
ret i64 %sum_64_7949
}
; Function Attrs: nounwind readnone speculatable
declare { i64, i1 } #llvm.uadd.with.overflow.i64(i64, i64) #0
It gets the same error:
LLVM ERROR: unable to legalize instruction: %3:_(s64), %4:_(s1) = G_UADDE %1:_, %1:_, %5:_ (in function: test_advance)

Related

How to get Syscall Arguments in LLVM IR

I am new to LLVM IR. Currently I am trying to implement a FunctionPass to detect some custom requirement proposed by my current research collaborators. The requirement involves detecting syscalls. To be more precise, given a source code, I have to detect whether there is a syscall in the control flow graph. If there is, I have to find out the syscall type (i.e., open, fork), syscall arguments (i.e., file descriptor and other parameters) and return value.
Let's show an example. The following code is the code of fopen.c in musl-libc library code:
#include "stdio_impl.h"
#include <fcntl.h>
#include <string.h>
#include <errno.h>
FILE *fopen(const char *restrict filename, const char *restrict mode)
{
FILE *f;
int fd;
int flags;
/* Check for valid initial mode character */
if (!strchr("rwa", *mode)) {
errno = EINVAL;
return 0;
}
/* Compute the flags to pass to open() */
flags = __fmodeflags(mode);
fd = sys_open(filename, flags, 0666);
if (fd < 0) return 0;
if (flags & O_CLOEXEC)
__syscall(SYS_fcntl, fd, F_SETFD, FD_CLOEXEC);
f = __fdopen(fd, mode);
if (f) return f;
__syscall(SYS_close, fd);
return 0;
}
weak_alias(fopen, fopen64);
Here is the generated intermediate representation in ll format:
; ModuleID = 'src/stdio/fopen.c'
source_filename = "src/stdio/fopen.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i32 (%struct._IO_FILE*)*, i8*, i8*, i8*, i8*, i64 (%struct._IO_FILE*, i8*, i64)*, i64 (%struct._IO_FILE*, i8*, i64)*, i64 (%struct._IO_FILE*, i64, i32)*, i8*, i64, %struct._IO_FILE*, %struct._IO_FILE*, i32, i32, i64, i32, i32, i32, i8*, i64, i8*, i8*, i8*, i64, i64, %struct._IO_FILE*, %struct._IO_FILE*, %struct.__locale_struct* }
%struct.__locale_struct = type opaque
#.str = private unnamed_addr constant [4 x i8] c"rwa\00", align 1
#fopen64 = weak alias %struct._IO_FILE* (i8*, i8*), %struct._IO_FILE* (i8*, i8*)* #fopen
; Function Attrs: nounwind optsize strictfp
define %struct._IO_FILE* #fopen(i8* noalias noundef %0, i8* noalias noundef %1) #0 {
%3 = load i8, i8* %1, align 1, !tbaa !3
%4 = sext i8 %3 to i32
%5 = tail call i8* #strchr(i8* noundef getelementptr inbounds ([4 x i8], [4 x i8]* #.str, i64 0, i64 0), i32 noundef %4) #3
%6 = icmp eq i8* %5, null
br i1 %6, label %7, label %9
7: ; preds = %2
%8 = tail call i32* #___errno_location() #4
store i32 22, i32* %8, align 4, !tbaa !6
br label %32
9: ; preds = %2
%10 = tail call i32 #__fmodeflags(i8* noundef nonnull %1) #3
%11 = ptrtoint i8* %0 to i64
%12 = or i32 %10, 32768
%13 = sext i32 %12 to i64
%14 = tail call i64 asm sideeffect "syscall", "={ax},{ax},{di},{si},{dx},~{rcx},~{r11},~{memory},~{dirflag},~{fpsr},~{flags}"(i64 2, i64 %11, i64 %13, i64 438) #5, !srcloc !8
%15 = tail call i64 #__syscall_ret(i64 noundef %14) #3
%16 = trunc i64 %15 to i32
%17 = icmp slt i32 %16, 0
br i1 %17, label %32, label %18
18: ; preds = %9
%19 = and i32 %10, 524288
%20 = icmp eq i32 %19, 0
br i1 %20, label %25, label %21
21: ; preds = %18
%22 = shl i64 %15, 32
%23 = ashr exact i64 %22, 32
%24 = tail call i64 asm sideeffect "syscall", "={ax},{ax},{di},{si},{dx},~{rcx},~{r11},~{memory},~{dirflag},~{fpsr},~{flags}"(i64 72, i64 %23, i64 2, i64 1) #5, !srcloc !8
br label %25
25: ; preds = %21, %18
%26 = tail call %struct._IO_FILE* #__fdopen(i32 noundef %16, i8* noundef nonnull %1) #3
%27 = icmp eq %struct._IO_FILE* %26, null
br i1 %27, label %28, label %32
28: ; preds = %25
%29 = shl i64 %15, 32
%30 = ashr exact i64 %29, 32
%31 = tail call i64 asm sideeffect "syscall", "={ax},{ax},{di},~{rcx},~{r11},~{memory},~{dirflag},~{fpsr},~{flags}"(i64 3, i64 %30) #5, !srcloc !9
br label %32
32: ; preds = %25, %9, %28, %7
%33 = phi %struct._IO_FILE* [ null, %28 ], [ null, %7 ], [ null, %9 ], [ %26, %25 ]
ret %struct._IO_FILE* %33
}
; Function Attrs: optsize
declare i8* #strchr(i8* noundef, i32 noundef) local_unnamed_addr #1
; Function Attrs: mustprogress nofree nosync nounwind optsize readnone willreturn
declare hidden i32* #___errno_location() local_unnamed_addr #2
; Function Attrs: optsize
declare hidden i32 #__fmodeflags(i8* noundef) local_unnamed_addr #1
; Function Attrs: optsize
declare hidden i64 #__syscall_ret(i64 noundef) local_unnamed_addr #1
; Function Attrs: optsize
declare hidden %struct._IO_FILE* #__fdopen(i32 noundef, i8* noundef) local_unnamed_addr #1
attributes #0 = { nounwind optsize strictfp "frame-pointer"="none" "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "strictfp" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { optsize "frame-pointer"="none" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { mustprogress nofree nosync nounwind optsize readnone willreturn "frame-pointer"="none" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #3 = { nobuiltin nounwind optsize strictfp "no-builtins" }
attributes #4 = { nobuiltin nounwind optsize readnone strictfp willreturn "no-builtins" }
attributes #5 = { nounwind strictfp }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{!"Ubuntu clang version 14.0.6"}
!3 = !{!4, !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}
!6 = !{!7, !7, i64 0}
!7 = !{!"int", !4, i64 0}
!8 = !{i64 71220}
!9 = !{i64 70822}
I am using the following basic construct to catch instructions inside the FunctionPass:
static void parseCallInstruction(CallInst *call){
assert(call != NULL);
int numOperands = call->getNumOperands();
for(int i=0;i<numOperands;i++){
Value *operand = call->getArgOperand(i);
// How do I get the syscall number, arguments and return type from here ?
}
}
virtual bool runOnModule(Module &M)
{
for (Module::iterator functionIt = M.begin(), endFunctionIt = M.end(); functionIt != endFunctionIt; ++functionIt)
{
const Function &currentFunction = *functionIt;
for (auto &basicBlock : currentFunction)
{
for (auto &instruction : basicBlock)
{
if (isa<CallInst>(instruction))
{
Instruction *inst = const_cast<Instruction *>(&instruction);
CallInst *call = dyn_cast<CallInst>(inst);
// Let's assume for now that the call instruction in question is one of the instructions among instruction %14,%24,%31 of the llvm IR code.
parseCallInstruction(call);
}
}
}
}
return false;
}
For example consider this instruction %14 in the llvm bitcode: tail call i64 asm sideeffect "syscall", "={ax},{ax},{di},{si},{dx},~{rcx},~{r11},~{memory},~{dirflag},~{fpsr},~{flags}"(i64 2, i64 %11, i64 %13, i64 438) #5, !srcloc !8. Cursory look at the source code shows that it's a open syscall. How do I get this, other argument values and return value information by parsing the CallInst? If CallInst is not the right way to go about it, what other instruction level mechanism I can exploit to get those information?
Any help or tips will be greatly appreciated. Thanks in advance.

LLVM API optimisation run

I am trying to perform -O2 optimisation with LLVM IR obtained by calling CLANG API. Unfortunately, optimisation works only with IR created with manual calls. I have the following function:
int mult_add(int x, int y){
if(x > 2){
return y + 1 + 2;
} else {
return y - 1 + 2;
}
}
And with these calls:
clang -S -emit-llvm main.cpp
opt main.ll -o opt.ll -S -O2
I get the correct result:
define i32 #_Z8mult_addii(i32, i32) local_unnamed_addr #0 {
%3 = icmp sgt i32 %0, 2
%.sink = select i1 %3, i32 3, i32 1
%4 = add nsw i32 %.sink, %1
ret i32 %4
}
Unfortunately, when I do it through LLVM API with legacy::PassManager and legacy::FunctionPassManager optimisation simply does not work and got long ugly code:
define i32 #_Z8mult_addii(i32, i32) #0 {
%3 = alloca i32, align 4
%4 = alloca i32, align 4
%5 = alloca i32, align 4
store i32 %0, i32* %4, align 4
store i32 %1, i32* %5, align 4
%6 = load i32, i32* %4, align 4
%7 = icmp sgt i32 %6, 2
br i1 %7, label %8, label %12
; <label>:8: ; preds = %2
%9 = load i32, i32* %5, align 4
%10 = add nsw i32 %9, 1
%11 = add nsw i32 %10, 2
store i32 %11, i32* %3, align 4
br label %16
; <label>:12: ; preds = %2
%13 = load i32, i32* %5, align 4
%14 = sub nsw i32 %13, 1
%15 = add nsw i32 %14, 2
store i32 %15, i32* %3, align 4
br label %16
; <label>:16: ; preds = %12, %8
%17 = load i32, i32* %3, align 4
ret i32 %17
}
Seems like CLANG creates IR in some unoptimisable state? Because running the passes on a manual created IR works fine.
By the way, PMBuilder.populateModulePassManager is called, here is the code:
legacy::PassManager Passes;
legacy::FunctionPassManager FPasses(M2.get());
AddOptimizationPasses(Passes, FPasses, &(TheJIT->getTargetMachine()), 2, 0);
Passes.add(createPrintModulePass(outs()));
Passes.run(*M2);
And AddOptimizationPasses is stolen and simplified from opt utility:
static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
legacy::FunctionPassManager &FPM,
TargetMachine *TM, unsigned OptLevel,
unsigned SizeLevel) {
FPM.add(createVerifierPass());
PassManagerBuilder Builder;
Builder.OptLevel = OptLevel;
Builder.SizeLevel = SizeLevel;
Builder.Inliner = createFunctionInliningPass(50);
Builder.DisableUnitAtATime = true;//!UnitAtATime;
Builder.DisableUnrollLoops = false;
if (TM)
TM->adjustPassManager(Builder);
//Builder.populateFunctionPassManager(FPM);
Builder.populateModulePassManager(MPM);
}
By the way, initialisation is following:
InitializeAllTargets();
InitializeAllTargetMCs();
InitializeAllAsmPrinters();
Unfortunately, it does not work.
Did you forget to populate the pass manager?
PassManagerBase& PM = ...; // create the pass manager.
PassManagerBuilder PMBuilder;
PMBuilder.OptLevel = 2;
PMBuilder.DisableUnrollLoops = false;
PMBuilder.Inliner = createFunctionInliningPass(50);
PMBuilder.populateModulePassManager(PM);
Module& = ...; // your IR module here
PM.run(M);
Note that a "FunctionPassManager" may not do what you need. You're likely looking for legacy::PassManager instead (which can hold any type of pass).

Why is this block of LLVM instructions generated?

The DataFlowSanitizer pass on LLVM 3.8.0, 64 bit (Ubuntu 16.04.2) generates the following IR from source:
The source:
test.c
#include <sanitizer/dfsan_interface.h>
int main(void) {
int i = 1;
dfsan_label i_label = dfsan_create_label("i", 0);
dfsan_set_label(i_label, &i, sizeof(i));
return 0;
}
The commands to generate the IR:
clang -c -emit-llvm -fsanitize=dataflow test.c -o test.bc
llvm-dis test.bc
The disassembly:
test.ll
; Function Attrs: nounwind uwtable
define i32 #main() #0 {
entry:
%0 = alloca i16
%retval = alloca i32, align 4
%i = alloca i32, align 4
%1 = alloca i16
%i_label = alloca i16, align 2
store i16 0, i16* %0
store i32 0, i32* %retval, align 4
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%2 = ptrtoint i32* %i to i64
%3 = and i64 %2, -123145302310913
%4 = mul i64 %3, 2
%5 = inttoptr i64 %4 to i16*
%6 = bitcast i16* %5 to i64*
store i64 0, i64* %6, align 2
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
store i32 1, i32* %i, align 4
%call = call zeroext i16 #dfsan_create_label(i8* getelementptr inbounds ([2 x i8], [2 x i8]* #.str, i32 0, i32 0), i8* null)
store i16 0, i16* %1
store i16 %call, i16* %i_label, align 2
%7 = load i16, i16* %1
%8 = load i16, i16* %i_label, align 2
%9 = bitcast i32* %i to i8*
call void #dfsan_set_label(i16 zeroext %8, i8* %9, i64 4)
ret i32 0
}
I don't understand why the block of instruction I separated out is being generated. Looking at the Transform/Instrumentation/DataFlowsanitizer.cpp, I can't find the code that inserts the instrumentation above. Can anyone explain this behavior?

LLVM Create VarArg Function and access var args

I have been trying to create a function using the module pass in LLVM. What I am trying to do is create a variable argument function and then add the logic to manipulate the variable arguments.
For example:
/\*can do this\*/
int foo(int a, ...)
{
double var1;
//can't figure out how to add any of this using llvm
va_list ap;
va_start(ap, a);
va_arg(var1,double);
va_end(ap);
}
Creating the function type is easy because I just set the vararg boolean to true. What do I do after that?
I always use clang to check what it needs to convert for c/c++ lang.
Use llvm instruction va_arg and intinsics llvm.va_start, llvm.va_end, llvm.va_copy to use llvm variable argument support.
you also need target-specific value type “va_list” for functions that operates on arguments that use this.
; This struct is different for every platform. For most platforms,
; it is merely an i8*.
%struct.va_list = type { i8* }
; For Unix x86_64 platforms, va_list is the following struct:
; %struct.va_list = type { i32, i32, i8*, i8* }
ref http://llvm.org/docs/LangRef.html#variable-argument-handling-intrinsics
for your listed code,
; ModuleID = 'test.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
; Function Attrs: nounwind uwtable
define i32 #foo(i32 %a, ...) #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%var1 = alloca double, align 8
%ap = alloca [1 x %struct.__va_list_tag], align 16
store i32 %a, i32* %2, align 4
%3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
%4 = bitcast %struct.__va_list_tag* %3 to i8*
call void #llvm.va_start(i8* %4)
%5 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
%6 = getelementptr inbounds %struct.__va_list_tag* %5, i32 0, i32 1
%7 = load i32* %6
%8 = icmp ule i32 %7, 160
br i1 %8, label %9, label %15
; <label>:9 ; preds = %0
%10 = getelementptr inbounds %struct.__va_list_tag* %5, i32 0, i32 3
%11 = load i8** %10
%12 = getelementptr i8* %11, i32 %7
%13 = bitcast i8* %12 to double*
%14 = add i32 %7, 16
store i32 %14, i32* %6
br label %20
; <label>:15 ; preds = %0
%16 = getelementptr inbounds %struct.__va_list_tag* %5, i32 0, i32 2
%17 = load i8** %16
%18 = bitcast i8* %17 to double*
%19 = getelementptr i8* %17, i32 8
store i8* %19, i8** %16
br label %20
; <label>:20 ; preds = %15, %9
%21 = phi double* [ %13, %9 ], [ %18, %15 ]
%22 = load double* %21
%23 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
%24 = bitcast %struct.__va_list_tag* %23 to i8*
call void #llvm.va_end(i8* %24)
%25 = load i32* %1
ret i32 %25
}
; Function Attrs: nounwind
declare void #llvm.va_start(i8*) #1
; Function Attrs: nounwind
declare void #llvm.va_end(i8*) #1
; Function Attrs: nounwind uwtable
define i32 #main() #0 {
ret i32 0
}

How to execute llvm code

I have a c code that calculates the factorial of an int "factorial.c". I compile it to llvm readable code "factorial.ll" and I modify in the compiled llvm code.
The objective is to execute the modified llvm code and to see its output, How can I do this?
It will depend on how your outputted LLVM is assembled and what libraries it links against, but for example executing the following factorial.ll with the shell command lli
$ lli factorial.ll
Factorial of 10 = 3628800
Will execute the main function with the JIT and use the standard printf to output the result to stdout.
#.str = private unnamed_addr constant [22 x i8] c"Factorial of %d = %d\0A\00", align 1
declare i32 #printf(i8*, ...)
define i32 #factorial(i32 %n) nounwind uwtable {
entry:
%n.addr = alloca i32, align 4
store i32 %n, i32* %n.addr, align 4
%0 = load i32* %n.addr, align 4
%cmp = icmp sle i32 %0, 1
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
br label %cond.end
cond.false: ; preds = %entry
%1 = load i32* %n.addr, align 4
%2 = load i32* %n.addr, align 4
%sub = sub nsw i32 %2, 1
%call = call i32 #factorial(i32 %sub)
%mul = mul nsw i32 %1, %call
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ 1, %cond.true ], [ %mul, %cond.false ]
ret i32 %cond
}
define i32 #main(i32 %argc, i8** %argv) nounwind uwtable {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
store i32 0, i32* %retval
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%call = call i32 #factorial(i32 10)
%call1 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([22 x i8]* #.str, i32 0, i32 0), i32 10, i32 %call)
ret i32 0
}