I am new to LLVM IR. Currently I am trying to implement a FunctionPass to detect some custom requirement proposed by my current research collaborators. The requirement involves detecting syscalls. To be more precise, given a source code, I have to detect whether there is a syscall in the control flow graph. If there is, I have to find out the syscall type (i.e., open, fork), syscall arguments (i.e., file descriptor and other parameters) and return value.
Let's show an example. The following code is the code of fopen.c in musl-libc library code:
#include "stdio_impl.h"
#include <fcntl.h>
#include <string.h>
#include <errno.h>
FILE *fopen(const char *restrict filename, const char *restrict mode)
{
FILE *f;
int fd;
int flags;
/* Check for valid initial mode character */
if (!strchr("rwa", *mode)) {
errno = EINVAL;
return 0;
}
/* Compute the flags to pass to open() */
flags = __fmodeflags(mode);
fd = sys_open(filename, flags, 0666);
if (fd < 0) return 0;
if (flags & O_CLOEXEC)
__syscall(SYS_fcntl, fd, F_SETFD, FD_CLOEXEC);
f = __fdopen(fd, mode);
if (f) return f;
__syscall(SYS_close, fd);
return 0;
}
weak_alias(fopen, fopen64);
Here is the generated intermediate representation in ll format:
; ModuleID = 'src/stdio/fopen.c'
source_filename = "src/stdio/fopen.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i32 (%struct._IO_FILE*)*, i8*, i8*, i8*, i8*, i64 (%struct._IO_FILE*, i8*, i64)*, i64 (%struct._IO_FILE*, i8*, i64)*, i64 (%struct._IO_FILE*, i64, i32)*, i8*, i64, %struct._IO_FILE*, %struct._IO_FILE*, i32, i32, i64, i32, i32, i32, i8*, i64, i8*, i8*, i8*, i64, i64, %struct._IO_FILE*, %struct._IO_FILE*, %struct.__locale_struct* }
%struct.__locale_struct = type opaque
#.str = private unnamed_addr constant [4 x i8] c"rwa\00", align 1
#fopen64 = weak alias %struct._IO_FILE* (i8*, i8*), %struct._IO_FILE* (i8*, i8*)* #fopen
; Function Attrs: nounwind optsize strictfp
define %struct._IO_FILE* #fopen(i8* noalias noundef %0, i8* noalias noundef %1) #0 {
%3 = load i8, i8* %1, align 1, !tbaa !3
%4 = sext i8 %3 to i32
%5 = tail call i8* #strchr(i8* noundef getelementptr inbounds ([4 x i8], [4 x i8]* #.str, i64 0, i64 0), i32 noundef %4) #3
%6 = icmp eq i8* %5, null
br i1 %6, label %7, label %9
7: ; preds = %2
%8 = tail call i32* #___errno_location() #4
store i32 22, i32* %8, align 4, !tbaa !6
br label %32
9: ; preds = %2
%10 = tail call i32 #__fmodeflags(i8* noundef nonnull %1) #3
%11 = ptrtoint i8* %0 to i64
%12 = or i32 %10, 32768
%13 = sext i32 %12 to i64
%14 = tail call i64 asm sideeffect "syscall", "={ax},{ax},{di},{si},{dx},~{rcx},~{r11},~{memory},~{dirflag},~{fpsr},~{flags}"(i64 2, i64 %11, i64 %13, i64 438) #5, !srcloc !8
%15 = tail call i64 #__syscall_ret(i64 noundef %14) #3
%16 = trunc i64 %15 to i32
%17 = icmp slt i32 %16, 0
br i1 %17, label %32, label %18
18: ; preds = %9
%19 = and i32 %10, 524288
%20 = icmp eq i32 %19, 0
br i1 %20, label %25, label %21
21: ; preds = %18
%22 = shl i64 %15, 32
%23 = ashr exact i64 %22, 32
%24 = tail call i64 asm sideeffect "syscall", "={ax},{ax},{di},{si},{dx},~{rcx},~{r11},~{memory},~{dirflag},~{fpsr},~{flags}"(i64 72, i64 %23, i64 2, i64 1) #5, !srcloc !8
br label %25
25: ; preds = %21, %18
%26 = tail call %struct._IO_FILE* #__fdopen(i32 noundef %16, i8* noundef nonnull %1) #3
%27 = icmp eq %struct._IO_FILE* %26, null
br i1 %27, label %28, label %32
28: ; preds = %25
%29 = shl i64 %15, 32
%30 = ashr exact i64 %29, 32
%31 = tail call i64 asm sideeffect "syscall", "={ax},{ax},{di},~{rcx},~{r11},~{memory},~{dirflag},~{fpsr},~{flags}"(i64 3, i64 %30) #5, !srcloc !9
br label %32
32: ; preds = %25, %9, %28, %7
%33 = phi %struct._IO_FILE* [ null, %28 ], [ null, %7 ], [ null, %9 ], [ %26, %25 ]
ret %struct._IO_FILE* %33
}
; Function Attrs: optsize
declare i8* #strchr(i8* noundef, i32 noundef) local_unnamed_addr #1
; Function Attrs: mustprogress nofree nosync nounwind optsize readnone willreturn
declare hidden i32* #___errno_location() local_unnamed_addr #2
; Function Attrs: optsize
declare hidden i32 #__fmodeflags(i8* noundef) local_unnamed_addr #1
; Function Attrs: optsize
declare hidden i64 #__syscall_ret(i64 noundef) local_unnamed_addr #1
; Function Attrs: optsize
declare hidden %struct._IO_FILE* #__fdopen(i32 noundef, i8* noundef) local_unnamed_addr #1
attributes #0 = { nounwind optsize strictfp "frame-pointer"="none" "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "strictfp" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { optsize "frame-pointer"="none" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { mustprogress nofree nosync nounwind optsize readnone willreturn "frame-pointer"="none" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #3 = { nobuiltin nounwind optsize strictfp "no-builtins" }
attributes #4 = { nobuiltin nounwind optsize readnone strictfp willreturn "no-builtins" }
attributes #5 = { nounwind strictfp }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{!"Ubuntu clang version 14.0.6"}
!3 = !{!4, !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}
!6 = !{!7, !7, i64 0}
!7 = !{!"int", !4, i64 0}
!8 = !{i64 71220}
!9 = !{i64 70822}
I am using the following basic construct to catch instructions inside the FunctionPass:
static void parseCallInstruction(CallInst *call){
assert(call != NULL);
int numOperands = call->getNumOperands();
for(int i=0;i<numOperands;i++){
Value *operand = call->getArgOperand(i);
// How do I get the syscall number, arguments and return type from here ?
}
}
virtual bool runOnModule(Module &M)
{
for (Module::iterator functionIt = M.begin(), endFunctionIt = M.end(); functionIt != endFunctionIt; ++functionIt)
{
const Function ¤tFunction = *functionIt;
for (auto &basicBlock : currentFunction)
{
for (auto &instruction : basicBlock)
{
if (isa<CallInst>(instruction))
{
Instruction *inst = const_cast<Instruction *>(&instruction);
CallInst *call = dyn_cast<CallInst>(inst);
// Let's assume for now that the call instruction in question is one of the instructions among instruction %14,%24,%31 of the llvm IR code.
parseCallInstruction(call);
}
}
}
}
return false;
}
For example consider this instruction %14 in the llvm bitcode: tail call i64 asm sideeffect "syscall", "={ax},{ax},{di},{si},{dx},~{rcx},~{r11},~{memory},~{dirflag},~{fpsr},~{flags}"(i64 2, i64 %11, i64 %13, i64 438) #5, !srcloc !8. Cursory look at the source code shows that it's a open syscall. How do I get this, other argument values and return value information by parsing the CallInst? If CallInst is not the right way to go about it, what other instruction level mechanism I can exploit to get those information?
Any help or tips will be greatly appreciated. Thanks in advance.
Related
I'm following the example of Kaleidoscope to write a minimum IR file interpreter. It takes one command line argument, which is a path to .ll file, and executes the main function in the file. But when I tested it on an IR file, it failed with:
Assertion failed: (KV.second.getFlags() & ~WeakFlags) == (I->second & ~WeakFlags) && "Resolving symbol with incorrect flags", file <path>\llvm\lib\ExecutionEngine\Orc\Core.cpp, line 2775
Considering the simplicity of my code (which only has 63 lines), I can't figure out what's wrong in it. Please help 😭!!!
Full Source Code
#include <iostream>
#include <llvm/IR/DataLayout.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/IRReader/IRReader.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/ExecutionEngine/Orc/CompileUtils.h>
#include <llvm/ExecutionEngine/Orc/Core.h>
#include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
#include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
#include <llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h>
#include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
using namespace llvm;
int
main(int argc, char* argv[])
{
InitializeNativeTarget();
InitializeNativeTargetAsmPrinter();
InitializeNativeTargetAsmParser();
orc::ThreadSafeContext tsctx(std::make_unique<LLVMContext>());
SMDiagnostic error;
auto mod = parseIRFile(argv[1], error, *tsctx.getContext());
auto epc = orc::SelfExecutorProcessControl::Create();
cantFail(epc.takeError());
orc::ExecutionSession es(std::move(*epc));
auto triple = es.getExecutorProcessControl().getTargetTriple();
orc::JITTargetMachineBuilder jtmb(triple);
auto dl = jtmb.getDefaultDataLayoutForTarget();
cantFail(dl.takeError());
orc::RTDyldObjectLinkingLayer ol(
es, []() { return std::make_unique<SectionMemoryManager>(); });
orc::IRCompileLayer cl(
es, ol, std::make_unique<orc::ConcurrentIRCompiler>(std::move(jtmb)));
auto& jd = es.createBareJITDylib("jd");
jd.addGenerator(
cantFail(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
dl->getGlobalPrefix())));
cantFail(cl.add(jd, orc::ThreadSafeModule(std::move(mod), tsctx)));
orc::MangleAndInterner mangle(es, *dl);
auto f = es.lookup({ &jd }, mangle("main"));
cantFail(f.takeError());
return reinterpret_cast<int (*)()>(f->getAddress())();
}
Test .ll file
; ModuleID = 'sum.ll'
source_filename = "sum.c"
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc19.29.30133"
%struct._iobuf = type { i8* }
%struct.__crt_locale_pointers = type { %struct.__crt_locale_data*, %struct.__crt_multibyte_data* }
%struct.__crt_locale_data = type opaque
%struct.__crt_multibyte_data = type opaque
$scanf = comdat any
$__local_stdio_scanf_options = comdat any
$"??_C#_02DPKJAMEF#?$CFd?$AA#" = comdat any
#"??_C#_02DPKJAMEF#?$CFd?$AA#" = linkonce_odr dso_local unnamed_addr constant [3 x i8] c"%d\00", comdat, align 1
#__local_stdio_scanf_options._OptionsStorage = internal global i64 0, align 8
; Function Attrs: nounwind uwtable
define dso_local i32 #main() local_unnamed_addr #0 {
%1 = alloca i32, align 4
%2 = bitcast i32* %1 to i8*
call void #llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #6
%3 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%4 = load i32, i32* %1, align 4, !tbaa !4
%5 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%6 = load i32, i32* %1, align 4, !tbaa !4
%7 = add nsw i32 %6, %4
%8 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%9 = load i32, i32* %1, align 4, !tbaa !4
%10 = add nsw i32 %7, %9
%11 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%12 = load i32, i32* %1, align 4, !tbaa !4
%13 = add nsw i32 %10, %12
%14 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%15 = load i32, i32* %1, align 4, !tbaa !4
%16 = add nsw i32 %13, %15
%17 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%18 = load i32, i32* %1, align 4, !tbaa !4
%19 = add nsw i32 %16, %18
%20 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%21 = load i32, i32* %1, align 4, !tbaa !4
%22 = add nsw i32 %19, %21
%23 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%24 = load i32, i32* %1, align 4, !tbaa !4
%25 = add nsw i32 %22, %24
%26 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%27 = load i32, i32* %1, align 4, !tbaa !4
%28 = add nsw i32 %25, %27
%29 = call i32 (i8*, ...) #scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* #"??_C#_02DPKJAMEF#?$CFd?$AA#", i64 0, i64 0), i32* nonnull %1)
%30 = load i32, i32* %1, align 4, !tbaa !4
%31 = add nsw i32 %28, %30
call void #llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #6
ret i32 %31
}
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void #llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: inlinehint nobuiltin nounwind uwtable
define linkonce_odr dso_local i32 #scanf(i8* %0, ...) local_unnamed_addr #2 comdat {
%2 = alloca i8*, align 8
%3 = bitcast i8** %2 to i8*
call void #llvm.lifetime.start.p0i8(i64 8, i8* nonnull %3) #6
call void #llvm.va_start(i8* nonnull %3)
%4 = load i8*, i8** %2, align 8, !tbaa !8
%5 = call %struct._iobuf* #__acrt_iob_func(i32 0) #6
%6 = call i64* #__local_stdio_scanf_options() #6
%7 = load i64, i64* %6, align 8, !tbaa !10
%8 = call i32 #__stdio_common_vfscanf(i64 %7, %struct._iobuf* %5, i8* %0, %struct.__crt_locale_pointers* null, i8* %4) #6
call void #llvm.va_end(i8* nonnull %3)
call void #llvm.lifetime.end.p0i8(i64 8, i8* nonnull %3) #6
ret i32 %8
}
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void #llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: mustprogress nofree nosync nounwind willreturn
declare void #llvm.va_start(i8*) #3
; Function Attrs: mustprogress nofree nosync nounwind willreturn
declare void #llvm.va_end(i8*) #3
declare dso_local %struct._iobuf* #__acrt_iob_func(i32) local_unnamed_addr #4
declare dso_local i32 #__stdio_common_vfscanf(i64, %struct._iobuf*, i8*, %struct.__crt_locale_pointers*, i8*) local_unnamed_addr #4
; Function Attrs: noinline nounwind uwtable
define linkonce_odr dso_local i64* #__local_stdio_scanf_options() local_unnamed_addr #5 comdat {
ret i64* #__local_stdio_scanf_options._OptionsStorage
}
attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
attributes #2 = { inlinehint nobuiltin nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #3 = { mustprogress nofree nosync nounwind willreturn }
attributes #4 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #5 = { noinline nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #6 = { nounwind }
!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}
!0 = !{i32 1, !"wchar_size", i32 2}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{i32 7, !"uwtable", i32 1}
!3 = !{!"clang version 13.0.1"}
!4 = !{!5, !5, i64 0}
!5 = !{!"int", !6, i64 0}
!6 = !{!"omnipotent char", !7, i64 0}
!7 = !{!"Simple C/C++ TBAA"}
!8 = !{!9, !9, i64 0}
!9 = !{!"any pointer", !6, i64 0}
!10 = !{!11, !11, i64 0}
!11 = !{!"long long", !6, i64 0}
Which is compiled from:
#include <stdio.h>
int
main()
{
int n, sum = 0;
for (int i = 0; i < 10; ++i) {
scanf("%d", &n);
sum += n;
}
return sum;
}
Well, it turns out that THERE IS NOTHING WRONG WITH MY CODE!
I compile and test the same code in Linux environment (WSL2), and everything works fine. I'm pretty sure that this is somewhat compatibility problem between Linux and Windows.
Maybe this is a bug of LLVM?
I am trying to run llc with global instruction selection, unfortunately it fails on any shift instruction (e.g. shl).
Here is how to represent it using llc -global-isel=true -filetype=asm sample.ll:
define i64 #test_advance(i1 %nullify, i64 %iaoq) {
entry:
%cache = alloca i64
store i64 %iaoq, i64* %cache
br i1 %nullify, label %cond_true, label %cond_false
cond_false:
%iaoq.1 = load i64, i64* %cache
%iaoq.2 = add i64 %iaoq.1, 4
%iaoq.x = shl i64 %iaoq.2, 13
store i64 %iaoq.x, i64* %cache
br label %next_insn
cond_true:
%iaoq.3 = load i64, i64* %cache
%iaoq.4 = add i64 %iaoq.3, 8
store i64 %iaoq.4, i64* %cache
br label %after_next_insn
next_insn:
%iaoq.5 = load i64, i64* %cache
%iaoq.6 = add i64 %iaoq.5, 4
store i64 %iaoq.6, i64* %cache
br label %after_next_insn
after_next_insn:
%res = load i64, i64* %cache
ret i64 %res
}
Why is there so strange behaviour? What's so strange about shifts? My platform is x86.
EDIT
In release_70 this problem is solved, unfortunately global-isel can not cope with legalisation of uadd_with_overflow:
define i64 #test_advance(i1 %nullify, i64 %iaoq) {
entry:
%0 = call { i64, i1 } #llvm.uadd.with.overflow.i64(i64 %iaoq, i64 %iaoq)
%sum_64_7949 = extractvalue { i64, i1 } %0, 0
ret i64 %sum_64_7949
}
; Function Attrs: nounwind readnone speculatable
declare { i64, i1 } #llvm.uadd.with.overflow.i64(i64, i64) #0
It gets the same error:
LLVM ERROR: unable to legalize instruction: %3:_(s64), %4:_(s1) = G_UADDE %1:_, %1:_, %5:_ (in function: test_advance)
I'm trying to get exceptions working with llvm for a very simple example, that I can later build on but i'm running into some real difficulties and i'm not sure why.
I got clang to give me the following llir code, that I am passing into the MCJIT
; llvm-as c++exn.ll && llvm-ld -native c++exn.bc -lstdc++.6 && ./a.out
%"class.std::ios_base::Init" = type { i8 }
%"class.std::basic_ostream" = type { i32 (...)**, %"class.std::basic_ios" }
%"class.std::basic_ios" = type { %"class.std::ios_base", %"class.std::basic_ostream"*, i8, i8, %"class.std::basic_streambuf"*, %"class.std::ctype"*, %"class.std::num_put"*, %"class.std::num_get"* }
%"class.std::ios_base" = type { i32 (...)**, i64, i64, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"class.std::locale" }
%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"class.std::ios_base"*, i32)*, i32, i32 }
%"struct.std::ios_base::_Words" = type <{ i8*, i32, [4 x i8] }>
%"class.std::locale" = type { %"class.std::locale::_Impl"* }
%"class.std::locale::_Impl" = type { i32, %"class.std::locale::facet"**, i64, %"class.std::locale::facet"**, i8** }
%"class.std::locale::facet" = type <{ i32 (...)**, i32, [4 x i8] }>
%"class.std::basic_streambuf" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"class.std::locale" }
%"class.std::ctype" = type <{ %"class.std::locale::facet.base", [4 x i8], i32*, i8, [7 x i8], i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8, [6 x i8] }>
%"class.std::locale::facet.base" = type <{ i32 (...)**, i32 }>
%"class.std::num_put" = type { %"class.std::locale::facet.base", [4 x i8] }
%"class.std::num_get" = type { %"class.std::locale::facet.base", [4 x i8] }
%class.new_exception = type { %"class.std::exception" }
%"class.std::exception" = type { i32 (...)** }
$_ZN13new_exceptionC2Ev = comdat any
$_ZN13new_exceptionD2Ev = comdat any
$__clang_call_terminate = comdat any
$_ZNSt9exceptionC2Ev = comdat any
$_ZN13new_exceptionD0Ev = comdat any
$_ZTS13new_exception = comdat any
$_ZTI13new_exception = comdat any
$_ZTV13new_exception = comdat any
#_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1
#_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8*
#_ZTS13new_exception = linkonce_odr constant [16 x i8] c"13new_exception\00", comdat
#_ZTISt9exception = external constant i8*
#_ZTI13new_exception = linkonce_odr constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** #_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([16 x i8], [16 x i8]* #_ZTS13new_exception, i32 0, i32 0), i8* bitcast (i8** #_ZTISt9exception to i8*) }, comdat
#_ZSt4cout = external global %"class.std::basic_ostream", align 8
#.str = private unnamed_addr constant [11 x i8] c"caught !!!\00", align 1
#_ZTV13new_exception = linkonce_odr unnamed_addr constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* #_ZTI13new_exception to i8*), i8* bitcast (void (%class.new_exception*)* #_ZN13new_exceptionD2Ev to i8*), i8* bitcast (void (%class.new_exception*)* #_ZN13new_exceptionD0Ev to i8*), i8* bitcast (i8* (%"class.std::exception"*)* #_ZNKSt9exception4whatEv to i8*)], comdat, align 8
#_ZTVSt9exception = external unnamed_addr constant [5 x i8*]
#llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* #_GLOBAL__sub_I_runtime_gen.cpp, i8* null }]
; Function Attrs: uwtable
define internal void #__cxx_global_var_init() #0 {
call void #_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* #_ZStL8__ioinit)
%1 = call i32 #atexit(void ()* #__dtor__ZStL8__ioinit) #2
ret void
}
declare void #_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) #1
declare void #_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) #1
; Function Attrs: uwtable
define internal void #__dtor__ZStL8__ioinit() #0 {
call void #_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"* #_ZStL8__ioinit)
ret void
}
; Function Attrs: nounwind
declare i32 #atexit(void ()*) #2
; Function Attrs: uwtable
define void #_Z5functv() #0 {
%1 = call i8* #__cxa_allocate_exception(i64 8) #2
%2 = bitcast i8* %1 to %class.new_exception*
%3 = bitcast %class.new_exception* %2 to i8*
call void #llvm.memset.p0i8.i64(i8* %3, i8 0, i64 8, i32 16, i1 false)
call void #_ZN13new_exceptionC2Ev(%class.new_exception* %2) #2
call void #__cxa_throw(i8* %1, i8* bitcast ({ i8*, i8*, i8* }* #_ZTI13new_exception to i8*), i8* bitcast (void (%class.new_exception*)* #_ZN13new_exceptionD2Ev to i8*)) #11
unreachable
; No predecessors!
ret void
}
declare i8* #__cxa_allocate_exception(i64)
; Function Attrs: argmemonly nounwind
declare void #llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3
; Function Attrs: inlinehint nounwind uwtable
define linkonce_odr void #_ZN13new_exceptionC2Ev(%class.new_exception* %this) unnamed_addr #4 comdat align 2 {
%1 = alloca %class.new_exception*, align 8
store %class.new_exception* %this, %class.new_exception** %1, align 8
%2 = load %class.new_exception*, %class.new_exception** %1, align 8
%3 = bitcast %class.new_exception* %2 to %"class.std::exception"*
call void #_ZNSt9exceptionC2Ev(%"class.std::exception"* %3) #2
%4 = bitcast %class.new_exception* %2 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* #_ZTV13new_exception, i64 0, i64 2) to i32 (...)**), i32 (...)*** %4, align 8
ret void
}
; Function Attrs: inlinehint nounwind uwtable
define linkonce_odr void #_ZN13new_exceptionD2Ev(%class.new_exception* %this) unnamed_addr #4 comdat align 2 {
%1 = alloca %class.new_exception*, align 8
store %class.new_exception* %this, %class.new_exception** %1, align 8
%2 = load %class.new_exception*, %class.new_exception** %1, align 8
%3 = bitcast %class.new_exception* %2 to %"class.std::exception"*
call void #_ZNSt9exceptionD2Ev(%"class.std::exception"* %3) #2
ret void
}
declare void #__cxa_throw(i8*, i8*, i8*)
; Function Attrs: uwtable
define void #_Z4ctchv() #0 personality i8* bitcast (i32 (...)* #__gxx_personality_seh0 to i8*) {
%1 = alloca i8*
%2 = alloca i32
%o = alloca %class.new_exception*, align 8
invoke void #_Z5functv()
to label %3 unwind label %4
; <label>:3 ; preds = %0
br label %20
; <label>:4 ; preds = %0
%5 = landingpad { i8*, i32 }
catch i8* bitcast ({ i8*, i8*, i8* }* #_ZTI13new_exception to i8*)
%6 = extractvalue { i8*, i32 } %5, 0
store i8* %6, i8** %1, align 8
%7 = extractvalue { i8*, i32 } %5, 1
store i32 %7, i32* %2, align 4
br label %8
; <label>:8 ; preds = %4
%9 = load i32, i32* %2, align 4
%10 = call i32 #llvm.eh.typeid.for(i8* bitcast ({ i8*, i8*, i8* }* #_ZTI13new_exception to i8*)) #2
%11 = icmp eq i32 %9, %10
br i1 %11, label %12, label %26
; <label>:12 ; preds = %8
%13 = load i8*, i8** %1, align 8
%14 = call i8* #__cxa_begin_catch(i8* %13) #2
%15 = bitcast i8* %14 to %class.new_exception*
store %class.new_exception* %15, %class.new_exception** %o, align 8
%16 = invoke dereferenceable(272) %"class.std::basic_ostream"* #_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"class.std::basic_ostream"* dereferenceable(272) #_ZSt4cout, i8* getelementptr inbounds ([11 x i8], [11 x i8]* #.str, i32 0, i32 0))
to label %17 unwind label %21
; <label>:17 ; preds = %12
%18 = invoke dereferenceable(272) %"class.std::basic_ostream"* #_ZNSolsEPFRSoS_E(%"class.std::basic_ostream"* %16, %"class.std::basic_ostream"* (%"class.std::basic_ostream"*)* #_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_)
to label %19 unwind label %21
; <label>:19 ; preds = %17
call void #__cxa_end_catch()
br label %20
; <label>:20 ; preds = %19, %3
ret void
; <label>:21 ; preds = %17, %12
%22 = landingpad { i8*, i32 }
cleanup
%23 = extractvalue { i8*, i32 } %22, 0
store i8* %23, i8** %1, align 8
%24 = extractvalue { i8*, i32 } %22, 1
store i32 %24, i32* %2, align 4
invoke void #__cxa_end_catch()
to label %25 unwind label %31
; <label>:25 ; preds = %21
br label %26
; <label>:26 ; preds = %25, %8
%27 = load i8*, i8** %1, align 8
%28 = load i32, i32* %2, align 4
%29 = insertvalue { i8*, i32 } undef, i8* %27, 0
%30 = insertvalue { i8*, i32 } %29, i32 %28, 1
resume { i8*, i32 } %30
; <label>:31 ; preds = %21
%32 = landingpad { i8*, i32 }
catch i8* null
%33 = extractvalue { i8*, i32 } %32, 0
call void #__clang_call_terminate(i8* %33) #12
unreachable
}
declare i32 #__gxx_personality_seh0(...)
; Function Attrs: nounwind readnone
declare i32 #llvm.eh.typeid.for(i8*) #5
declare i8* #__cxa_begin_catch(i8*)
declare dereferenceable(272) %"class.std::basic_ostream"* #_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"class.std::basic_ostream"* dereferenceable(272), i8*) #1
declare dereferenceable(272) %"class.std::basic_ostream"* #_ZNSolsEPFRSoS_E(%"class.std::basic_ostream"*, %"class.std::basic_ostream"* (%"class.std::basic_ostream"*)*) #1
declare dereferenceable(272) %"class.std::basic_ostream"* #_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(%"class.std::basic_ostream"* dereferenceable(272)) #1
declare void #__cxa_end_catch()
; Function Attrs: noinline noreturn nounwind
define linkonce_odr hidden void #__clang_call_terminate(i8*) #6 comdat {
%2 = call i8* #__cxa_begin_catch(i8* %0) #2
call void #_ZSt9terminatev() #12
unreachable
}
declare void #_ZSt9terminatev()
; Function Attrs: norecurse uwtable
define i32 #main() #7 {
call void #_Z4ctchv()
ret i32 0
}
; Function Attrs: nounwind uwtable
define linkonce_odr void #_ZNSt9exceptionC2Ev(%"class.std::exception"* %this) unnamed_addr #8 comdat align 2 {
%1 = alloca %"class.std::exception"*, align 8
store %"class.std::exception"* %this, %"class.std::exception"** %1, align 8
%2 = load %"class.std::exception"*, %"class.std::exception"** %1, align 8
%3 = bitcast %"class.std::exception"* %2 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* #_ZTVSt9exception, i64 0, i64 2) to i32 (...)**), i32 (...)*** %3, align 8
ret void
}
; Function Attrs: inlinehint nounwind uwtable
define linkonce_odr void #_ZN13new_exceptionD0Ev(%class.new_exception* %this) unnamed_addr #4 comdat align 2 {
%1 = alloca %class.new_exception*, align 8
store %class.new_exception* %this, %class.new_exception** %1, align 8
%2 = load %class.new_exception*, %class.new_exception** %1, align 8
call void #_ZN13new_exceptionD2Ev(%class.new_exception* %2) #2
%3 = bitcast %class.new_exception* %2 to i8*
call void #_ZdlPv(i8* %3) #13
ret void
}
; Function Attrs: nounwind
declare i8* #_ZNKSt9exception4whatEv(%"class.std::exception"*) #9
; Function Attrs: nobuiltin nounwind
declare void #_ZdlPv(i8*) #10
; Function Attrs: nounwind
declare void #_ZNSt9exceptionD2Ev(%"class.std::exception"*) #9
; Function Attrs: uwtable
define internal void #_GLOBAL__sub_I_runtime_gen.cpp() #0 {
call void #__cxx_global_var_init()
ret void
}
attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
attributes #3 = { argmemonly nounwind }
attributes #4 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #5 = { nounwind readnone }
attributes #6 = { noinline noreturn nounwind }
attributes #7 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #8 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #9 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #10 = { nobuiltin nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #11 = { noreturn }
attributes #12 = { noreturn nounwind }
attributes #13 = { builtin nounwind }
This is the llir code generated from the following C++ code.
#include <iostream>
#include <exception>
using namespace std;
class new_exception : public exception {
const char* what() {
return "new_exception";
}
};
void funct() {
throw new_exception();
}
void ctch() {
try {
funct();
} catch(new_exception& o) {
std::cout << "caught !!!" << std::endl;
}
}
int main() {
ctch();
}
When loaded it compiled by MCJIT on windows without issue but when run it crashes, when run with gdb it halts with message 'RaiseException() ?:Unknown signal'.
As far as I am aware the exception is being handled correctly, "caught" by the code and so shouldn't be the issue.
Is my use of the personality function correct, I have looked through answers that suggest the use of SEH based personality on windows might be wrong. What is the alternative if this is wrong and why would clang write incorrect code?
Platform is Windows X86_64
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-w64-windows-gnu"
I'm starting to use exceptions in C++ for Windows with clang and msvc 2015, and they do work very well with standard C++ exceptions (that Microsoft documentation calls synchronous) -- no need to use SEH (AKA Structured Exception Handling), that Microsoft calls asynchronous.
Your Code contain an error. The main method doesn't return an int, please check it:
Change this
int main() {
ctch();
}
By this:
int main() {
ctch();
return 0;
}
I have been trying to create a function using the module pass in LLVM. What I am trying to do is create a variable argument function and then add the logic to manipulate the variable arguments.
For example:
/\*can do this\*/
int foo(int a, ...)
{
double var1;
//can't figure out how to add any of this using llvm
va_list ap;
va_start(ap, a);
va_arg(var1,double);
va_end(ap);
}
Creating the function type is easy because I just set the vararg boolean to true. What do I do after that?
I always use clang to check what it needs to convert for c/c++ lang.
Use llvm instruction va_arg and intinsics llvm.va_start, llvm.va_end, llvm.va_copy to use llvm variable argument support.
you also need target-specific value type “va_list” for functions that operates on arguments that use this.
; This struct is different for every platform. For most platforms,
; it is merely an i8*.
%struct.va_list = type { i8* }
; For Unix x86_64 platforms, va_list is the following struct:
; %struct.va_list = type { i32, i32, i8*, i8* }
ref http://llvm.org/docs/LangRef.html#variable-argument-handling-intrinsics
for your listed code,
; ModuleID = 'test.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
; Function Attrs: nounwind uwtable
define i32 #foo(i32 %a, ...) #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%var1 = alloca double, align 8
%ap = alloca [1 x %struct.__va_list_tag], align 16
store i32 %a, i32* %2, align 4
%3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
%4 = bitcast %struct.__va_list_tag* %3 to i8*
call void #llvm.va_start(i8* %4)
%5 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
%6 = getelementptr inbounds %struct.__va_list_tag* %5, i32 0, i32 1
%7 = load i32* %6
%8 = icmp ule i32 %7, 160
br i1 %8, label %9, label %15
; <label>:9 ; preds = %0
%10 = getelementptr inbounds %struct.__va_list_tag* %5, i32 0, i32 3
%11 = load i8** %10
%12 = getelementptr i8* %11, i32 %7
%13 = bitcast i8* %12 to double*
%14 = add i32 %7, 16
store i32 %14, i32* %6
br label %20
; <label>:15 ; preds = %0
%16 = getelementptr inbounds %struct.__va_list_tag* %5, i32 0, i32 2
%17 = load i8** %16
%18 = bitcast i8* %17 to double*
%19 = getelementptr i8* %17, i32 8
store i8* %19, i8** %16
br label %20
; <label>:20 ; preds = %15, %9
%21 = phi double* [ %13, %9 ], [ %18, %15 ]
%22 = load double* %21
%23 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
%24 = bitcast %struct.__va_list_tag* %23 to i8*
call void #llvm.va_end(i8* %24)
%25 = load i32* %1
ret i32 %25
}
; Function Attrs: nounwind
declare void #llvm.va_start(i8*) #1
; Function Attrs: nounwind
declare void #llvm.va_end(i8*) #1
; Function Attrs: nounwind uwtable
define i32 #main() #0 {
ret i32 0
}
When I run a program in C++, it runs slower than the identical program called by MATLAB using Mex functions.
I tried some sample code to test this, which confirmed my suspicion:
Using C++:
#include <stdio.h>
#include <ctime>
void process(int a[10000], int b[10000]) {
const int dim[2] = {1, 10000};
int barData[20000];
clock_t begin = clock();
for (int i = 0; i < dim[1]; i++) {
for (int j = 0; j < i; j++) {
barData[j] = a[i];
barData[j] = b[i];
}
}
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
printf("%f\n", elapsed_secs);
}
int main() {
int a[10000], b[10000];
process(a,b);
return 0;
}
Using Mex functions:
#include <stdio.h>
#include "mex.h"
void process(const mxArray *first, const mxArray *second) {
int* a = (int *)mxGetData(first);
int* b = (int *)mxGetData(second);
const int *dim = mxGetDimensions(first);
const int dims[2] = {1,dim[1]*2};
mxArray* bar = mxCreateNumericArray(2, dims, mxINT64_CLASS, mxREAL);
int* barData = (int*)mxGetData(bar);
clock_t begin = clock();
for (int i = 0; i < dim[1]; i++) {
for (int j = 0; j < i; j++) {
barData[j] = a[i];
barData[j] = b[i];
}
}
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
printf("%f\n", elapsed_secs);
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
process(prhs[0], prhs[1]);
}
And calling it from MATLAB as follows:
mex test.cpp -output foo
foo(rand(1,10000), rand(1,10000))
Mex function gives ~0.012s while C++ code gives 0.108s. The trends scale for larger array sizes too. Why is this, and is there a way to make the C++ code run with the Mex function speed?
As #Praetorian states in a comment above, you are probably not doing optimization on the C++ code.
Here is what the LLVMIR (pseudo-assembly) of your code is without optimization:
; ModuleID = 'test.cpp'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
#_ZZ7processPiS_E3dim = internal constant [2 x i32] [i32 1, i32 10000], align 4
#.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1
; Function Attrs: uwtable
define void #_Z7processPiS_(i32* %a, i32* %b) #0 {
%1 = alloca i32*, align 8
%2 = alloca i32*, align 8
%barData = alloca [20000 x i32], align 16
%begin = alloca i64, align 8
%i = alloca i32, align 4
%j = alloca i32, align 4
%end = alloca i64, align 8
%elapsed_secs = alloca double, align 8
store i32* %a, i32** %1, align 8
store i32* %b, i32** %2, align 8
%3 = call i64 #clock() #3
store i64 %3, i64* %begin, align 8
store i32 0, i32* %i, align 4
br label %4
; <label>:4 ; preds = %34, %0
%5 = load i32* %i, align 4
%6 = load i32* getelementptr inbounds ([2 x i32]* #_ZZ7processPiS_E3dim, i32 0, i64 1), align 4
%7 = icmp slt i32 %5, %6
br i1 %7, label %8, label %37
; <label>:8 ; preds = %4
store i32 0, i32* %j, align 4
br label %9
; <label>:9 ; preds = %30, %8
%10 = load i32* %j, align 4
%11 = load i32* %i, align 4
%12 = icmp slt i32 %10, %11
br i1 %12, label %13, label %33
; <label>:13 ; preds = %9
%14 = load i32* %i, align 4
%15 = sext i32 %14 to i64
%16 = load i32** %1, align 8
%17 = getelementptr inbounds i32* %16, i64 %15
%18 = load i32* %17, align 4
%19 = load i32* %j, align 4
%20 = sext i32 %19 to i64
%21 = getelementptr inbounds [20000 x i32]* %barData, i32 0, i64 %20
store i32 %18, i32* %21, align 4
%22 = load i32* %i, align 4
%23 = sext i32 %22 to i64
%24 = load i32** %2, align 8
%25 = getelementptr inbounds i32* %24, i64 %23
%26 = load i32* %25, align 4
%27 = load i32* %j, align 4
%28 = sext i32 %27 to i64
%29 = getelementptr inbounds [20000 x i32]* %barData, i32 0, i64 %28
store i32 %26, i32* %29, align 4
br label %30
; <label>:30 ; preds = %13
%31 = load i32* %j, align 4
%32 = add nsw i32 %31, 1
store i32 %32, i32* %j, align 4
br label %9
; <label>:33 ; preds = %9
br label %34
; <label>:34 ; preds = %33
%35 = load i32* %i, align 4
%36 = add nsw i32 %35, 1
store i32 %36, i32* %i, align 4
br label %4
; <label>:37 ; preds = %4
%38 = call i64 #clock() #3
store i64 %38, i64* %end, align 8
%39 = load i64* %end, align 8
%40 = load i64* %begin, align 8
%41 = sub nsw i64 %39, %40
%42 = sitofp i64 %41 to double
%43 = fdiv double %42, 1.000000e+06
store double %43, double* %elapsed_secs, align 8
%44 = load double* %elapsed_secs, align 8
%45 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([4 x i8]* #.str, i32 0, i32 0), double %44)
ret void
}
; Function Attrs: nounwind
declare i64 #clock() #1
declare i32 #printf(i8*, ...) #2
; Function Attrs: uwtable
define i32 #main() #0 {
%1 = alloca i32, align 4
%a = alloca [10000 x i32], align 16
%b = alloca [10000 x i32], align 16
store i32 0, i32* %1
%2 = getelementptr inbounds [10000 x i32]* %a, i32 0, i32 0
%3 = getelementptr inbounds [10000 x i32]* %b, i32 0, i32 0
call void #_Z7processPiS_(i32* %2, i32* %3)
ret i32 0
}
attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame- pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp- math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame- pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp- math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer- elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no- nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp- math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.6.2 (tags/RELEASE_362/final)"}
Notice that Z7processPiS is very long.
Here is with optimization -O3 (which is generally safe in C++ nowadays):
; ModuleID = 'test.cpp'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
#.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1
; Function Attrs: nounwind uwtable
define void #_Z7processPiS_(i32* nocapture readnone %a, i32* nocapture readnone %b) #0 {
%1 = tail call i64 #clock() #2
%2 = tail call i64 #clock() #2
%3 = sub nsw i64 %2, %1
%4 = sitofp i64 %3 to double
%5 = fdiv double %4, 1.000000e+06
%6 = tail call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([4 x i8]* #.str, i64 0, i64 0), double %5)
ret void
}
; Function Attrs: nounwind
declare i64 #clock() #1
; Function Attrs: nounwind
declare i32 #printf(i8* nocapture readonly, ...) #1
; Function Attrs: nounwind uwtable
define i32 #main() #0 {
%1 = tail call i64 #clock() #2
%2 = tail call i64 #clock() #2
%3 = sub nsw i64 %2, %1
%4 = sitofp i64 %3 to double
%5 = fdiv double %4, 1.000000e+06
%6 = tail call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([4 x i8]* #.str, i64 0, i64 0), double %5) #2
ret i32 0
}
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame- pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft- float"="false" }
attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer- elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack- protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.6.2 (tags/RELEASE_362/final)"}
P.S.: It would be more idiomatic to write:
#include <iostream>
#include <vector>
#include <ctime>
using std::vector;
void process(vector<int> a, vector<int> b) {
const pair<int,int> dim = {1, 10000};
vector<int> barData(20000,0);
clock_t begin = clock();
for (int i = 0; i < dim.second; i++) {
for (int j = 0; j < i; j++) {
barData[j] = a[i];
barData[j] = b[i];
}
}
clock_t end = clock();
std::cout << double(end-begin)/CLOCKS_PER_SEC << '\n';
}
int main() {
vector<int> a(10000, 0), b(10000,0);
process(a,b);
return 0;
}