LLVM "Instruction does not dominate all uses" - Inserting new Instruction - llvm

I am getting the following error while inserting an instruction using an llvm pass:
Instruction does not dominate all uses!
%add = add nsw i32 10, 2
%cmp3 = icmp ne i32 %a.01, %add
Broken module found, compilation aborted!
I have the source code in a bitcode file whose snippet is:
if.then: ; preds = %entry
%add = add nsw i32 10, 2
br label %if.end
if.else: ; preds = %entry
%sub = sub nsw i32 10, 2
br label %if.end
if.end: ; preds = %if.else, %if.then
%a.0 = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
%a.01 = call i32 #tauInt32Ty(i32 %a.0) ; line A
%add3 = add nsw i32 %a.01, 2
%add4 = add nsw i32 %a.01, 3
%call5 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([7 x i8]* #.str2, i32 0, i32 0), i32 %add3, i32 %add4)
I want to insert a new instruction after "line A" which is :
%cmp3 = icmp ne i32 %a.01, %add
And I have written a function pass whose snippet of the code which does this task is :
for (Function::iterator bb = F.begin(), e = F.end(); bb != e; ++bb) {
for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; ++i) {
std::string str;
if(isa<CallInst>(i))// || true) {
BasicBlock::iterator next_it = i;
next_it++;
Instruction* next = dyn_cast<Instruction>(&*next_it);
CallInst* ci = dyn_cast<CallInst>(&*i);
Function* ff = ci->getCalledFunction();
str = ff->getName();
errs()<<"> "<<str<<"\n";
if(!str.compare("tauInt32Ty")) {
hotPathSSA1::varVersionWithPathsSet::iterator start = tauArguments[&*ci].begin();
hotPathSSA1::varVersionWithPathsSet::iterator end = tauArguments[&*ci].end();
Value* specArgs = start->second; // specArgs points to %add
ICmpInst* int1_cmp_56 = new ICmpInst(next, ICmpInst::ICMP_NE, ci, specArgs, "cmp3");
}
}
}
}

I have not encountered such a problem jet but I think your problem is the if statement. %add belonges to the if.then BasicBlock and it is not accessable from the if.end block. This is why the phi instruction "chooses" which value is available %add or %sub. So you have to take %a.0 for your IcmpInst as argument not %add.

Related

How to get labels from a phinode and their corresponding basicblocks in LLVM?

Say the IR code looks like:
define void #_Z1mbb(i1 zeroext %r, i1 zeroext %y) nounwind {
entry:
%r.addr = alloca i8, align 1
%y.addr = alloca i8, align 1
%l = alloca i8, align 1
%frombool = zext i1 %r to i8
store i8 %frombool, i8* %r.addr, align 1
%frombool1 = zext i1 %y to i8
store i8 %frombool1, i8* %y.addr, align 1
%0 = load i8* %y.addr, align 1
%tobool = trunc i8 %0 to i1
br i1 %tobool, label %lor.end, label %lor.rhs
lor.rhs: ; preds = %entry
%1 = load i8* %r.addr, align 1
%tobool2 = trunc i8 %1 to i1
br label %lor.end
lor.end: ; preds = %lor.rhs, %entry
%2 = phi i1 [ true, %entry ], [ %tobool2, %lor.rhs ]
%frombool3 = zext i1 %2 to i8
store i8 %frombool3, i8* %l, align 1
ret void
}
the phinode has 2 pairs [ true, %entry ], [ %tobool2, %lor.rhs ]. How do I extract %entry and %lor.rhs and find the corresponding basicblock of each pair? Any help will be appreciated.
PHI->getgetNumIncomingValues() : returns number of incoming values in PHINode
For your phi node:
%2 = phi i1 [ true, %entry ], [ %tobool2, %lor.rhs ]
PHI->getIncomingValue(0) : gives true
PHI->getIncomingBlock(0) : gives %entry
There are iterators for blocks and values as well.
http://llvm.org/doxygen/classllvm_1_1PHINode.html
Always refer to doxygen docs to see all the APIs associated with a class(Ex: PHINode).

How to get Instruction in MachineInstr?

I wanted to know variable dependence in a real register (like X86:EAX, EBX ...). So, I have created an IR-PASS that can identify dependencies on the IR. This pass uses the newly added variables unsigned HasDependency: 1; and unsigned HasMaybeDependency: 1; in the Value class.
.
.
// Use the same type as the bitfield above so that MSVC will pack them.
unsigned IsUsedByMD : 1;
unsigned HasName : 1;
unsigned HasHungOffUses : 1;
unsigned HasDescriptor : 1;
unsigned HasDependency : 1;
unsigned HasMaybeDependency : 1;
.
.
.
void setDependency() { HasDependency = true; }
void setMaybeDependency() { HasMaybeDependency = true; }
bool hasDependency() const { return HasDependency; }
bool hasMaybeDependency() const { return HasMaybeDependency; }
//static_assert(sizeof(Value) == 2 * sizeof(void *) + 2 * sizeof(unsigned),
// "Value too big");
When applied to a code snippet like this:
extern int foo_called(int a);
int foo(int k)
{
int __attribute__((annotate("xxx"))) a;
for (int i = 0; i < k; i++)
{
int c = a + k;
a += foo_called(c);
}
return 0;
}
which produces this bitcode:
define i32 #"\01?foo##YAHH#Z"(i32 %k) local_unnamed_addr #0 {
entry:
%a = alloca i32, align 4
%0 = bitcast i32* %a to i8*
call void #llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #2
call void #llvm.var.annotation(i8* nonnull %0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* #.str, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* #.str.1, i32 0, i32 0), i32 17)
%cmp7 = icmp sgt i32 %k, 0
br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
%.pre = load i32, i32* %a, align 4, !tbaa !3
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
call void #llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #2
ret i32 0
for.body: ; preds = %for.body, %for.body.lr.ph
%1 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add2, %for.body ]
%i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%add = add nsw i32 %1, %k
%call = call i32 #"\01?foo_called##YAHH#Z"(i32 %add)
%2 = load i32, i32* %a, align 4, !tbaa !3
%add2 = add nsw i32 %2, %call
store i32 %add2, i32* %a, align 4, !tbaa !3
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %k
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
declare i32 #"\01?foo_called##YAHH#Z"(i32) local_unnamed_addr #3
The result of the the pass on the above bitcode is:
Function - ?foo##YAHH#Z
Annotated Variable List :
- Annotated : a(message: xxx)
Annotated-Variable : a
(Perpect) %add2 = add nsw i32 %2, %call
(Perpect) %2 = load i32, i32* %a, align 4, !tbaa !3
(Perpect) %a = alloca i32, align 4
(Perpect) %cmp7 = icmp sgt i32 %k, 0
(Maybe) %exitcond = icmp eq i32 %inc, %k
(Maybe) %inc = add nuw nsw i32 %i.08, 1
(Maybe) %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
(Perpect) %call = call i32 #"\01?foo_called##YAHH#Z"(i32 %add)
(Perpect) %add = add nsw i32 %1, %k
(Perpect) %1 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add2, %for.body ]
(Perpect) %.pre = load i32, i32* %a, align 4, !tbaa !3
I followed the SelectionDAGISel.cpp: SelectAllBasicBlocks function to get information from the backend, but I was able to get only AllocaInst, StoreInst, and LoadInst using as follows:
for (MachineBasicBlock &MBB : mf) {
for (MachineInstr& I : MBB) {
for (MachineInstr::mmo_iterator i = I.memoperands_begin(),
e = I.memoperands_end();
i != e; ++i) {
if (const Value *V = (*i)->getValue())
errs() << *V << "\n";
}
}
}
How do I know the correlation between MachineInstr and Instruction? If it is not provided in LLVM, which parts need to be fixed?
This is not normal. This is an trick. But I am using this method very usefully. If you know the normal way, please give me a comment.
I solved this problem using DebugLoc. It is used to represent the line-column-row, function-name etc... information of .c, .cpp files. This information will remain from the time of ;;vm-ir until MachineInstr.
So, if it is guaranteed that DebugLoc is not used in your compiler processing, you can put the address of the class that contains the information needed for the row information. This will allow you to cast the DebugLoc row to the desired class at the right time. (You can use column, because column must less than 2^16.)
The following describes in detail the method I used.
Change file and Re-Build your project.
Several design patterns were used to maximize memory efficiency, so I could not easily change the class.
First, modify DebugLoc-print routine. GOTO DebugLoc.cpp and delete DIScope print routine like this. This processing save you form runtime-error.
void DebugLoc::print(raw_ostream &OS) const {
if (!Loc)
return;
// Print source line info.
//auto *Scope = cast<DIScope>(getScope());
//OS << Scope->getFilename();
OS << ':' << getLine();
if (getCol() != 0)
OS << ':' << getCol();
Second, The verifier should be modified. This syntax will be helpful.
void Verifier::visitDILocation(const DILocation &N) {
- AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
- "location requires a valid scope", &N, N.getRawScope());
+ //AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+ // "location requires a valid scope", &N, N.getRawScope());
if (auto *IA = N.getRawInlinedAt())
AssertDI(isa<DILocation>(IA), "inlined-at should be a location", &N, IA);
}
Third, there are some formal steps to register a class in DebugLoc. Create initialize function for this.
static LLVMContext cnt;
static MDNode *md;
md = MDNode::get(cnt, DILocation::get(cnt, 100, 100, DIScope::get(cnt, nullptr)));
Last, create register function.
static DebugLoc getDebugLoc(DependencyInstrInfoManager *info)
{
return DebugLoc::get(reinterpret_cast<unsigned> (info), (uint16_t)-1, md);
}
static void setDebugLoc(Instruction *I, ...)
{
DependencyInstrInfoManager *mgr;
if (I->getDebugLoc()) {
mgr = reinterpret_cast<DependencyInstrInfoManager *>
(I->getDebugLoc()->getLine());
} else {
mgr = new DependencyInstrInfoManager();
I->setDebugLoc(getDebugLoc(mgr));
}
mgr->addInfo(new DependencyInstrInfo(I, S, T, ...));
}
DependencyInstrInfoManager is the class for answering the above questions.
Finally, you can print your own information in XXXMCInstLower.cpp:EmitInstruction();(like X86MCInstLower.cpp). The following statement is an example of the output of my case.
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
if (MI->getDebugLoc()) {
DependencyInstrInfoManager *mgr = reinterpret_cast<DependencyInstrInfoManager *>
(MI->getDebugLoc()->getLine());
mgr->doFolding();
for (auto DI : *mgr)
OutStreamer->AddComment(DI->getInfo());
}
Dependency Marking
I have done dependency marking using this method.
int foo(int k)
{
int ANNORATE("b") b = 0;
int ANNORATE("a") a = 0;
for (int i = 0; i < k; i++)
{
int c = a + k;
int d = b + k;
a += foo_called(c);
b += foo_called2(c);
}
return a + foo_called(b);
}
to
# BB#1: # %for.body.preheader
movl %esi, %ebx
.p2align 4, 0x90
LBB0_2 : # %for.body
# =>This Inner Loop Header: Depth=1
addl %esi, %edi # [Perpect, Source:b]
# [Perpect, Source: a]
pushl %edi # [Maybe, Source:b]
# [Perpect, Source: a]
calll "?foo_called##YAHH#Z" # [Maybe, Source:b]
# [Perpect, Source: a]
addl $4, %esp # [Maybe, Source:b]
# [Perpect, Source: a]
addl %eax, 4(%esp)
pushl %edi # [Perpect, Source:b]
calll "?foo_called2##YAHH#Z" # [Perpect, Source:b]
addl $4, %esp # [Perpect, Source:b]
addl(%esp), %eax # [Annotated, Source:b]
movl 4(%esp), %edi # [Perpect, Source:b]
# [Perpect, Source: a]
decl %ebx # [Maybe, Source:b]
movl %eax, (%esp)
jne LBB0_2
jmp LBB0_4

Delete complete branch from llvm ir

There is a branch in ir that I want to delete completely(condtion + branch + true_basic_block + false_basic_block). It looks like this:
%4 = icmp sge i32 %2, %3
br i1 %4, label %5, label %7
; <label>:5 ; preds = %0
%6 = load i32* %x, align 4
store i32 %6, i32* %z, align 4
br label %9
; <label>:7 ; preds = %0
%8 = load i32* %y, align 4
store i32 %8, i32* %z, align 4
br label %9
; <label>:9 ; preds = %7, %5
%10 = call dereferenceable(140) %"class.std::basic_ostream"*#_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"class.std::basic_ostream"* dereferenceable(140) #_ZSt4cout, i8* getelementptr inbounds ([5 x i8]* #.str, i32 0, i32 0))
%11 = load i32* %z, align 4
%12 = call dereferenceable(140) %"class.std::basic_ostream"* #_ZNSolsEi(%"class.std::basic_ostream"* %10, i32 %11)
%13 = call dereferenceable(140) %"class.std::basic_ostream"* #_ZNSolsEPFRSoS_E(%"class.std::basic_ostream"* %12, %"class.std::basic_ostream"* (%"class.std::basic_ostream"*)* #_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_)
ret i32 0
Now to delete it , is there a removeBranch function , or do I need to delete instructions one by one. I have been trying the latter way but I have seen every error from "Basic block in main does not have an terminator" to "use remains when def is destroyed", and many more.. I have used erasefromparent, replaceinstwithvalue, replaceinstwithinst, removefromparent, etc.
Can anyone be kind enough to point me in the correct direction?
This is my function_pass :
bool runOnFunction(Function &F) override {
for (auto& B : F)
for (auto& I : B)
if(auto* brn = dyn_cast<BranchInst>(&I))
if(brn->isConditional()){
Instruction* cond = dyn_cast<Instruction>(brn->getCondition());
if(cond->getOpcode() == Instruction::ICmp){
branch_vector.push_back(brn);
//removeConditionalBranch(dyn_cast<BranchInst>(brn));
}
}
/*For now just delete the branches in the vector.*/
for(auto b : branch_vector)
removeConditionalBranch(dyn_cast<BranchInst>(b));
return true;
}
This is the output :
I don't know of any RemoveBranch utility function, but something like this should work. The idea is to delete the branch instruction, then delete anything that becomes dead as a result, and then merge the initial block with the join block.
// for DeleteDeadBlock, MergeBlockIntoPredecessor
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
// for RecursivelyDeleteTriviallyDeadInstructions
#include "llvm/Transforms/Utils/Local.h"
void removeConditionalBranch(BranchInst *Branch) {
assert(Branch &&
Branch->isConditional() &&
Branch->getNumSuccessors() == 2);
BasicBlock *Parent = Branch->getParent();
BasicBlock *ThenBlock = Branch->getSuccessor(0);
BasicBlock *ElseBlock = Branch->getSuccessor(1);
BasicBlock *ThenSuccessor = ThenBlock->getUniqueSuccessor();
BasicBlock *ElseSuccessor = ElseBlock->getUniqueSuccessor();
assert(ThenSuccessor && ElseSuccessor && ThenSuccessor == ElseSuccessor);
Branch->eraseFromParent();
RecursivelyDeleteTriviallyDeadInstructions(Branch->getCondition());
DeleteDeadBlock(ThenBlock);
DeleteDeadBlock(ElseBlock);
IRBuilder<> Builder(Parent);
Builder.CreateBr(ThenSuccessor);
bool Merged = MergeBlockIntoPredecessor(ThenSuccessor);
assert(Merged);
}
This code only handles the simple case you've shown, with the then and else blocks both jumping unconditionally to a common join block (it will fail with an assertion error for anything more complicated). More complicated control flow will be a bit trickier to handle, but you should still be able to use this code as a starting point.

Find values in a basicblock,which are computed in previous basicblocks

In a basicblock I wants to find all the values used in instructions, That are not computed in the same basicblock.
Example,
for.body5:
%i.015 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%add1 = add nsw i32 %2, %i.015
%arrayidx = getelementptr inbounds [100 x i32]* %b, i32 0, i32 %i.015
store i32 %add1, i32* %arrayidx, align 4, !tbaa !0
%arrayidx2 = getelementptr inbounds [100 x i32]* %a, i32 0, i32 %i.015
store i32 %add1, i32* %arrayidx2, align 4, !tbaa !0
%inc = add nsw i32 %i.015, 1
%cmp = icmp slt i32 %inc, %3
br i1 %cmp, label %for.body, label %for.cond3.preheader
In above example i should get,
%2
%b
%a
%3
Which are declared and/or assigned in other basicblocks.
Please Suggest me a method.
Thanks in advance.
Hi I havent tested this out, but I would do something like this:
vector<Value*> values;
BasicBlock::iterator it;
User::op_iterator it;
// Iterate over all of the instructions in the Block
for (it=block->begin(); it++; it != block->end()){
// Iterate over the operands used by an instruction. 'op_begin' Defined in llvm::User class.
for (operand_it=it->op_begin(); operand_it++; operand_it != it->op_end() ){
// Could this if else statement be reduced?
// If this operand is an argument it was not defined in the block.
if (isa<Argument>(operand_it)){
values.push_back(operand_it);
}
// Otherwize, it could be a constant value or ...
else if (!isa<Instruction>(operand_it)){
continue;
}
// Check if the parent of the instruction is not the block in question.
else if (((Instruction*)operand_it)->getParent() != block){
values.push_back(operand_it);
}
}
}

nested if vs loop condition

I have to do a comparison and I want to know which will be faster.
1)
for (i=0;i<4;i++){
if (object1(i)==object2(i))
retval = true;
else {
retval = false;
break;
}
}
2)
if ( (object1(0)==object2(0) && (object1(1)==object2(1) && (object1(2)==object2(2) && (object1(3)==object2(3)){
retval = true;
else
retval = false;
Or both will perform the same?
Thanks for Advice
Strictly speaking the most efficient path would be:
retval = object1(0) == object2(0) && object1(1) == object2(1).....
This basically does the same as your loop, but doesn't have to compare the result to true to determine the outcome of the condition.
However, I strongly recommend keeping the loop, as it is far easier to adapt to add or remove numbers.
You need to measure. But in any case the first code can be simplified quite a bit:
for (i = 0; i < 4; ++i)
if (object1(i) != object2(i))
return false;
return true;
Now choose the more readable form. I’d choose the loop here, unless you have confirmed that there is a performance problem caused by this code.
If the optimization flags are on, then the compiler might produce same machine instructtions for both code, unlooping the for loop completely, as the exact number of iteration is known to the compiler:
loop unrolling
By the way, if you care so much, then you could write this:
bool retValue = (object1(0)==object2(0)) &&
(object1(1)==object2(1)) &&
(object1(2)==object2(2)) &&
(object1(3)==object2(3));
which avoids both: for loop, as well as if-else branch, and it doesn't depend on compiler optimization.
As always with optimization, the one and single rule is MEASURE.
Furthermore, I guess that the compiler could optimize this code in some ways you (and I) couldn't even imagine. Therefore I'd suggest to write it in the most readable form.
I like to play with the Try out LLVM and Clang page for this:
struct Object {
int operator()(int i) const;
};
bool loop(Object const& left, Object const& right) {
bool retval = false;
for (int i = 0; i < 4; i++) {
if (left(i) == right(i) )
retval = true;
else {
retval = false;
break;
}
}
return true;
}
bool inlineif(Object const& left, Object const& right) {
bool retval = true;
if ( left(0) == right(0) &&
left(1) == right(1) &&
left(2) == right(2) &&
left(3) == right(3))
retval = true;
else
retval = false;
return retval;
}
bool betterloop(Object const& left, Object const& right) {
for (int i = 0; i < 4; ++i)
if (left(i) != right(i))
return false;
return true;
}
bool betterif(Object const& left, Object const& right) {
return left(0) == right(0) &&
left(1) == right(1) &&
left(2) == right(2) &&
left(3) == right(3);
}
Produces the following IR for loops (regardless of how they are written):
define zeroext i1 #_Z4loopRK6ObjectS1_(%struct.Object* %left, %struct.Object* %right) uwtable {
br label %1
; <label>:1 ; preds = %7, %0
%i.0 = phi i32 [ 0, %0 ], [ %8, %7 ]
%2 = icmp slt i32 %i.0, 4
br i1 %2, label %3, label %9
; <label>:3 ; preds = %1
%4 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %left, i32 %i.0)
%5 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %right, i32 %i.0)
%6 = icmp eq i32 %4, %5
br i1 %6, label %7, label %9
; <label>:7 ; preds = %3
%8 = add nsw i32 %i.0, 1
br label %1
; <label>:9 ; preds = %3, %1
ret i1 true
}
And a very similar IR for the two if (so I'll give only one):
define zeroext i1 #_Z8betterifRK6ObjectS1_(%struct.Object* %left, %struct.Object* %right) uwtable {
%1 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %left, i32 0)
%2 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %right, i32 0)
%3 = icmp eq i32 %1, %2
br i1 %3, label %4, label %16
; <label>:4 ; preds = %0
%5 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %left, i32 1)
%6 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %right, i32 1)
%7 = icmp eq i32 %5, %6
br i1 %7, label %8, label %16
; <label>:8 ; preds = %4
%9 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %left, i32 2)
%10 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %right, i32 2)
%11 = icmp eq i32 %9, %10
br i1 %11, label %12, label %16
; <label>:12 ; preds = %8
%13 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %left, i32 3)
%14 = tail call i32 #_ZNK6ObjectclEi(%struct.Object* %right, i32 3)
%15 = icmp eq i32 %13, %14
br label %16
; <label>:16 ; preds = %12, %8, %4, %0
%17 = phi i1 [ false, %8 ], [ false, %4 ], [ false, %0 ], [ %15, %12 ]
ret i1 %17
}
The important instructions here is br which is the branching instruction. It can be used either as a simple goto or with conditions on the edges:
br i1 %11, label %12, label %16
means if i1 is true, go to label %12, otherwise go to label %16.
It seems that "naturally" LLVM will not unroll the traditional loop version, so the if version performs better here. I am quite surprised, actually, that it does not and I cannot figure out why it would not...
So, the inline if code might be a bit faster, but it might also be unnoticeable depending on the cost of left(i) == right(i) (and even then), as CPU are quite good at branch prediction.