How to get the value of a string literal in LLVM IR? - c++

I'm new to LLVM. I'm trying to write a basic Pass that will inspect the arguments of a printf call, when it is given the Intermediate Representation.
If the format string is not a string literal, then of course I can't inspect it. But quite often, it is.
The sample IR I'm trying to inspect is:
#.str = private unnamed_addr constant [7 x i8] c"Hi %u\0A\00", align 1
define i32 #main() nounwind {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
%call = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([7 x i8]* #.str, i32 0, i32 0), i32 1)
ret i32 0
}
declare i32 #printf(i8*, ...)
I found the preexisting Pass called ExternalFunctionsPassedConstants, which seemed relevant:
struct ExternalFunctionsPassedConstants : public ModulePass {
static char ID; // Pass ID, replacement for typeid
ExternalFunctionsPassedConstants() : ModulePass(ID) {}
virtual bool runOnModule(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->isDeclaration()) continue;
bool PrintedFn = false;
for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
UI != E; ++UI) {
Instruction *User = dyn_cast<Instruction>(*UI);
if (!User) continue;
CallSite CS(cast<Value>(User));
if (!CS) continue;
...
So I added the code:
if (I->getName() == "printf") {
errs() << "printf() arg0 type: "
<< CS.getArgument(0)->getType()->getTypeID() << "\n";
}
So far, so good -- I see that the type ID is 14, which means it's a PointerTyID.
But now, how do I get the contents of the string literal that is being passed as an argument, so I can validate the number of expected arguments against the number actually given?

CS.getArgument(0)
represents the GetElementPtrConstantExpr
i8* getelementptr inbounds ([7 x i8]* #.str, i32 0, i32 0)
, it is an User object. The string you want (i.e. #.str) is this GetElementPtrConstantExpr's first operand.
So, you can get the string literal through
CS.getArgument(0).getOperand(0)
However, I have not tested this code. If there are any mistakes, please tell me.

Related

LLVM IR How to pass struct to function

I'm making my own c-like language and I'm trying to pass a struct to a function. The struct
is representing an array(one member is a pointer to the array and the other member is the length). If I call the function "test" like this: call void #test(%structintarray %a) I get error: '%a' defined with type '%structintarray*' but expected '%structintarray = type { i32*, i32 }' . But if I call "test" like this: call void #test(%structintarray* %a) I get error: '#test' defined with type 'void (%structintarray)*' but expected 'void (%structintarray*)*' I don't understand this second error.
What I'm I doing wrong here?
`
void test(int[] a) {
}
int main() {
int[] a = new int[5];
test(a);
return 0;
}
generates;
%structintarray = type { i32*, i32 }
define void #test(%structintarray %__p__a) {
entry: %a = alloca %structintarray, align 4
store %structintarray %__p__a , %structintarray* %a, align 4
ret void
}
define i32 #main() {
entry: %t0 = call noalias i8* #calloc(i32 5 , i32 4)
%t1 = bitcast i8* %t0 to i32*
%a = alloca %structintarray, align 4
%t2 = getelementptr %structintarray, %structintarray* %a, i32 0, i32 0
store i32* %t1 , i32** %t2, align 4 ; pointer to array
%t3 = getelementptr %structintarray, %structintarray* %a, i32 0, i32 1
store i32 5 , i32* %t3, align 4 ; size of array
call void #test(%structintarray %a)
ret i32 0
}

Identify annotated variable in an LLVM pass

How can I identify an annotated variable in an LLVM pass?
#include <stdio.h>
int main (){
int x __attribute__((annotate("my_var")))= 0;
int a,b;
x = x + 1;
a = 5;
b = 6;
x = x + a;
return x;
}
For example, I want to identify the instructions which have the annotated variable (x in this case) and print them out (x = x+1; and x = x+a)
How can I achieve this?
This is the .ll file generated using LLVM
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
#.str = private unnamed_addr constant [7 x i8] c"my_var\00", section "llvm.metadata"
#.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section "llvm.metadata"
; Function Attrs: noinline nounwind optnone
define i32 #main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
%4 = alloca i32, align 4
store i32 0, i32* %1, align 4
%5 = bitcast i32* %2 to i8*
call void #llvm.var.annotation(i8* %5, i8* getelementptr inbounds ([7 x i8], [7 x i8]* #.s$
store i32 0, i32* %2, align 4
%6 = load i32, i32* %2, align 4
%7 = add nsw i32 %6, 1
store i32 %7, i32* %2, align 4
store i32 5, i32* %3, align 4
store i32 6, i32* %4, align 4
%8 = load i32, i32* %2, align 4
%9 = load i32, i32* %3, align 4
%10 = add nsw i32 %8, %9
store i32 %10, i32* %2, align 4
%11 = load i32, i32* %2, align 4
ret i32 %11
}
; Function Attrs: nounwind
declare void #llvm.var.annotation(i8*, i8*, i8*, i32) #1
attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" $
attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
I recently encountered similiary problem, as I searched Google still not found a solution.
But in the end , I found "ollvm" project's Utils.cpp ,it solved my problem.
In your case,
%5 = bitcast i32* %2 to i8*
call void #llvm.var.annotation(i8* %5, i8* getelementptr inbounds ([7 x i8], [7 x i8]* #.s$
as we can see there is a call to #llvm.var.annotation , in our pass ,
we can loop through instructions over a function , and search for "call" instruction.
Then get the called function's name:
Function *fn = callInst->getCalledFunction();
StringRef fn_name = fn->getName();
and compare the called function's name with "llvm.var.annotation" .
If they match ,then we found the location of "int x " in your case .
The function "llvm.var.annotation" is documented in llvm's doc :
http://llvm.org/docs/LangRef.html#llvm-var-annotation-intrinsic
If you have learn the function "llvm.var.annotation"'s prototype,
then you know that it's second argument is a pointer ,the pointer
points to "my_var\00" in your case . If you thought you can simply
convert it to a GlobalVariable ,then you will failed to get what
you wanted . The actual second argument passed to "llvm.var.annotation"
is
i8* getelementptr inbounds ([7 x i8], [7 x i8]* #.s$
in your case.
It's a expression but a GlobalVariable !!! By knowing this , we can
finally get the annotation of our target variable by :
ConstantExpr *ce =
cast<ConstantExpr>(callInst->getOperand(1));
if (ce) {
if (ce->getOpcode() == Instruction::GetElementPtr) {
if (GlobalVariable *annoteStr =
dyn_cast<GlobalVariable>(ce->getOperand(0))) {
if (ConstantDataSequential *data =
dyn_cast<ConstantDataSequential>(
annoteStr->getInitializer())) {
if (data->isString()) {
errs() << "Found data " << data->getAsString();
}
}
}
}
Hope you already solved the problem .
Have a nice day .
You have to loop on instructions and identify calls to llvm.var.annotation
First argument is a pointer to the annotated variable (i8*).
To get the actual annotated variable, you then need to find what this pointer points to.
In your case, this is the source operand of the bitcast instruction.

llvm pass replaceAllUsesWith type not match?

use a encryptedString replace a GlobalVariable but type not match.
the GlobalVariable a const char * string.
code like that:
GlobalVariable* GV = *it;
//get clear text string
std::string clearstr = getGlobalStringValue(GV);
GlobalVariable::LinkageTypes lt = GV->getLinkage();
//encrypt current string
std::string encryptedString = stringEncryption(clearstr);
//create new global string with the encrypted string
std::ostringstream oss;
oss << ".encstr" << encryptedStringCounter << "_" << sys::Process::GetRandomNumber();
Constant *cryptedStr = ConstantDataArray::getString(M.getContext(), encryptedString, true);
GlobalVariable* gCryptedStr = new GlobalVariable(M, cryptedStr->getType(), true, GV->getLinkage(), cryptedStr, oss.str());
StringMapGlobalVars[oss.str()] = gCryptedStr;
//replace use of clear string with encrypted string
GV->replaceAllUsesWith(gCryptedStr);
but failed with:
Assertion failed: (New->getType() == getType() && "replaceAllUses of
value with new value of different type!"),
At first: I recommend replacing everything with the right type in LLVM IR that's why this assertion is there.
However:
You get this assertion because your strings does not match in length. A global string is represented as an array of characters (i.e. i8 values). So the type of your string is [len x i8] where len is the length of your string.
#.str = private unnamed_addr constant [12 x i8] c"hello world\00", align 1
What you can do is write your own replacement function like this:
template<typename T>
void ReplaceUnsafe(T *from, T *to) {
while (!from->use_empty()) {
auto &U = *from->use_begin();
U.set(to);
}
from->eraseFromParent();
}
However, this is (as the function name indicates) unsafe and here is why:
Consider the following C/C++ code:
int main() {
return "hello world"[9];
}
which will just return the int representation of l.
Compiled to IR it looks like this:
#.str = private unnamed_addr constant [12 x i8] c"hello world\00", align 1
; Function Attrs: nounwind
define i32 #main() #0 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
%0 = load i8* getelementptr inbounds ([12 x i8]* #.str, i32 0, i64 9), align 1
%conv = sext i8 %0 to i32
ret i32 %conv
}
if the string is now replaced with somiting of unequal type (e.g., something of type [7 x i8]), then you may end up with a problem because your GEP instruction has the 9 as contant index. This will result in an out of bounds access. I don't know if the llvm verify pass catches this when it looks at GEP instructions (if you run it).
Constant *cryptedStr = ConstantDataArray::getString(M.getContext(), encryptedString, true);
change to
Constant *cryptedStr = ConstantDataArray::getString(M.getContext(), encryptedString, false);

Need insights about writing a pass

For my source code, I have the following IR:
; ModuleID = '<stdin>'
#.str = private unnamed_addr constant [9 x i8] c"SOME_ENV_VAR\00", align 1
#.str1 = private unnamed_addr constant [26 x i8] c"Need to set $ENV_Variable.\0A\00", align 1
; Function Attrs: nounwind
define void #foo(i8* %bar) #0 {
entry:
%bar.addr = alloca i8*, align 4
%baz = alloca i8*, align 4
store i8* %bar, i8** %bar.addr, align 4
%call = call i8* #getenv(i8* getelementptr inbounds ([9 x i8]* #.str, i32 0, i32 0)) #2
store i8* %call, i8** %baz, align 4
%0 = load i8** %baz, align 4
%cmp = icmp eq i8* %0, null
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
%call1 = call i32 (i8*, ...)* #printf(i8* getelementptr inbounds ([26 x i8]* #.str1, i32 0, i32 0))
br label %if.end
if.else: ; preds = %entry
%1 = load i8** %bar.addr, align 4
%2 = load i8** %baz, align 4
%call2 = call i8* #strcpy(i8* %1, i8* %2) #2
br label %if.end
if.end: ; preds = %if.else, %if.then
ret void
}
; Function Attrs: nounwind
declare i8* #getenv(i8*) #0
declare i32 #printf(i8*, ...) #1
; Function Attrs: nounwind
declare i8* #strcpy(i8*, i8*) #0
I intend to write a pass, which when compiled (using LLVM), produces bitcode where the call to strcpy(dest,src) is replaced with strncpy(dest,src,n).
I've written the following code so far:
#include <stdlib.h>
#include <stdio.h>
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/IR/IRBuilder.h"
using namespace llvm;
namespace
{
Module* makeLLVMModule() {
Module* mod = new Module(llvm::StringRef("CustomPass"),getGlobalContext());
Constant* c = mod->getOrInsertFunction(llvm::StringRef("foo"),Type::getInt32Ty(getGlobalContext()),NULL);
Function* foo = cast<Function>(c);
Function::arg_iterator args =foo->arg_begin();
Value* bar = args++;
BasicBlock* Entry = BasicBlock::Create(getGlobalContext(),llvm::Twine("Entry"), foo);
BasicBlock* False = BasicBlock::Create(getGlobalContext(),llvm::Twine("False"), foo);
BasicBlock* True = BasicBlock::Create(getGlobalContext(),llvm::Twine("True"), foo);
char* pPath;
pPath = getenv("SOME_ENV_VAR");
IRBuilder<> builder(Entry);
Value* envVarDoesntExist = builder.CreateICmpEQ(llvm::StringRef(pPath),Constant::getNullValue(Value),llvm::Twine("temp"));
//---1
builder.CreateCondBr(envVarDoesntExist, False, True);
builder.SetInsertPoint(True);
builder.CreateCall3(strncpy,bar,llvm::StringRef(pPath),45,llvm::Twine("temp"));
//---2
builder.SetInsertPoint(False);
builder.CreateCall(printf,llvm::StringRef("Need to set $ENV_Variable.\n"),llvm::Twine("temp"));
//---1
return mod;
}
}
char funcP::ID = 0;
static RegisterPass<funcP> X("funcp", "funcP", false, false);
From ---1:How to convert llvm::StringRef to Value* ?
From ---2:How to convert char* to Value*
Could Constant::getNullValue(Value) be used for getting a NULL value?
I intend to write a pass, which when compiled (using LLVM), produces bitcode where the call to strcpy(dest,src) is replaced with strncpy(dest,src,n).
Then what you need to do is to locate the call instruction and change it. There's no need to recreate the entire flow, it's already in your source code.
All you need to do is to create a function pass, iterate over all the instructions in the function, and if the instruction is a call instruction and the callee's name is strcpy then create a new call instruction to your new function, then replace the old instruction with the new instruction.
Also there seems to be some fundamental misunderstanding in your code between values in the compiler (such as 45 and all the StringRefs) and values in the code you are processing (instances of one of the subtypes of llvm::Value). Specifically, you can't just use 45 as a parameter to a function in the code you are processing - you have to create a constant int from that number, and then you can use that constant.
One final note - you can implicitly construct a StringRef from a const char*, you don't need to explicitly call the StringRef's constructor all over the place. Same with Twine.

llvm get annotations

I updated my previous question under a new form.
Hello everyone,
I have the following LLVM IR :
#.str = private unnamed_addr constant [3 x i8] c"DS\00", section "llvm.metadata"
#llvm.global.annotations = appending global [1 x { i8*, i8*, i8*, i32 }] [{ i8*, i8*, i8*, i32 } { i8* bitcast (i32* #f to i8*), i8* getelementptr inbounds ([3 x i8]* #.str, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* #.str1, i32 0, i32 0), i32 18 }], section "llvm.metadata"
I need to get #f (or maybe I can get somehow the definition of #f = global i32 0, align 4 ) and also I need to get "DS" from #.str. In my target code I have :
__attribute__((annotate("DS"))) int f=0;
I have problems to parse #llvm.global.annotations and I assume I will have with #.str. What I tried:
1.
for (Module::global_iterator I = F.global_begin(), E = F.global_end(); I != E; ++I) {
if (I->getName() == "llvm.global.annotations") {
Value *V = cast<Value>(I->getOperand(0));
errs()<<"\n "<<*(V)<<"\n";
errs()<<"\n "<<*(V->getType())<<"\n";
RESULT :
[1 x { i8*, i8*, i8*, i32 }] [{ i8*, i8*, i8*, i32 } { i8* bitcast (i32* #f to i8*), i8* getelementptr inbounds ([3 x i8]* #.str, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* #.str1, i32 0, i32 0), i32 18 }]
[1 x { i8*, i8*, i8*, i32 }]
2.
errs()<<"\n "<<(V->getValueID())<<"\n";
if(V->getValueID() == Value::ConstantArrayVal) {
ConstantArray *ca = (ConstantArray *)V;
errs()<<"\n "<<(ca[0])<<"\n"; }
RESULT :
[1 x { i8*, i8*, i8*, i32 }] [{ i8*, i8*, i8*, i32 } { i8* bitcast (i32* #f to i8*), i8* getelementptr inbounds ([3 x i8]* #.str, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* #.str1, i32 0, i32 0), i32 18 }]
Any help is welcomed ! Thank you !
Quite a late answer, but Google led me here and I thought that providing a full LLVM pass that discovers free text annotation would be helpful.
This LLVM pass would instrument only function marked with __attribute((annotate("someFreeTextAnnotation"))).
The code follows:
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Constants.h"
#include <set>
using namespace llvm;
const char *AnnotationString = "someFreeTextAnnotation";
namespace {
struct Hello : public FunctionPass {
static char ID;
Hello() : FunctionPass(ID) {}
std::set<Function*> annotFuncs;
virtual bool doInitialization(Module &M)override{
getAnnotatedFunctions(&M);
return false;
}
bool shouldInstrumentFunc(Function &F){
return annotFuncs.find(&F)!=annotFuncs.end();
}
void getAnnotatedFunctions(Module *M){
for (Module::global_iterator I = M->global_begin(),
E = M->global_end();
I != E;
++I) {
if (I->getName() == "llvm.global.annotations") {
ConstantArray *CA = dyn_cast<ConstantArray>(I->getOperand(0));
for(auto OI = CA->op_begin(); OI != CA->op_end(); ++OI){
ConstantStruct *CS = dyn_cast<ConstantStruct>(OI->get());
Function *FUNC = dyn_cast<Function>(CS->getOperand(0)->getOperand(0));
GlobalVariable *AnnotationGL = dyn_cast<GlobalVariable>(CS->getOperand(1)->getOperand(0));
StringRef annotation = dyn_cast<ConstantDataArray>(AnnotationGL->getInitializer())->getAsCString();
if(annotation.compare(AnnotationString)==0){
annotFuncs.insert(FUNC);
//errs() << "Found annotated function " << FUNC->getName()<<"\n";
}
}
}
}
}
bool runOnFunction(Function &F) override {
if(shouldInstrumentFunc(F)==false)
return false;
errs() << "Instrumenting " << F.getName() << "\n";
return false;
}
}; // end of struct Hello
} // end of anonymous namespace
char Hello::ID = 0;
static RegisterPass<Hello> X("hello", "Discover annotation attribute",
false /* Only looks at CFG */,
false /* Analysis Pass */);
Use runOnModule() instead of runOnFunction() if you are doing so. Alternatively, you can take the module. llvm.global.annotations is defined outside functions. Inside do something like:
for (Module::global_iterator I = F.global_begin(), E = F.global_end(); I != E; ++I) {
if (I->getName() == "llvm.global.annotations")
{
errs()<<"\nllvm.global.annotations\n";
//1. find out what global variable is by "parsing" the IR
//2. get through the module till you find a load #f
//3. you can add metadata to the load function and you can easily get the metadata from the normal pass
}
}
I solved it.
I cast the entire annotated expression to Value*. Then, in order to avoid ugly things like getAsString(), I check if V->getValueID() == Value::ConstantArrayVal in order to cast it to ConstantArray. Because it contains only array[0], I cast array0>getOperand(0) to ConstantStruct. Therefore, from ConstantStruct you can get all the four operands. Now to do is only to get the names of #f, #str from every field. This is done by ConstantStruct->getOperand(0)->getOperand(0).