Why is the object prefix converted to function argument? - c++

In the learncpp article about the hidden this pointer, the author mentioned that the compiler converts the object prefix to an argument passed by address to the function.
In the example:
simple.setID(2);
Will be converted to:
setID(&simple, 2); // note that simple has been changed from an object prefix to a function argument!
Why does the compiler do this? I've tried searching other documentation about it but couldn't find any. I've asked other people but they say it is a mistake or the compiler doesn't do that.
I have a second question on this topic. Let's go back to the example:
simple.setID(2); //Will be converted to setID(&simple, 2);
If the compiler converts it, won't it just look exactly like a function that has a name of setID and has two parameters?
void setID(MyClass* obj, int id) {
return;
}
int main() {
MyClass simple;
simple.setID(2); //Will be converted to setID(&simple, 2);
setID(&simple, 2);
}
Line 6 and 7 would look exactly the same.

object prefix to an argument passed by address to the function
This refers to how implementations use to translate it to machine code (but they could do it any other way)
Why does the compiler do this?
In some way, you need to be able to refer to the object in the called member function, and one way is to just handle it like an argument.
If the compiler converts it, won't it just look exactly like a function that has a name of setID and has two parameters?
If you have this code:
struct Test {
int v = 0;
Test(int v ) : v(v) {
}
void test(int a) {
int v = this->v;
int r = a;
}
};
void test(Test* t, int a) {
int v = t->v;
int r = a + v;
}
int main() {
Test a(2);
a.test(1);
test(&a, 1);
return 0;
}
gcc-12 will create this assembly code (for x86 and if optimizations are turned off):
Test::Test(int) [base object constructor]:
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-8], rdi
mov DWORD PTR [rbp-12], esi
mov rax, QWORD PTR [rbp-8]
mov edx, DWORD PTR [rbp-12]
mov DWORD PTR [rax], edx
nop
pop rbp
ret
Test::test(int a):
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-24], rdi
mov DWORD PTR [rbp-28], esi
// int v = this->v;
mov rax, QWORD PTR [rbp-24]
mov eax, DWORD PTR [rax]
mov DWORD PTR [rbp-4], eax
// int r = a;
mov eax, DWORD PTR [rbp-28]
mov DWORD PTR [rbp-8], eax
// end of function
nop
pop rbp
ret
test(Test* t, int a):
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-24], rdi
mov DWORD PTR [rbp-28], esi
// int v = t->v;
mov rax, QWORD PTR [rbp-24]
mov eax, DWORD PTR [rax]
mov DWORD PTR [rbp-4], eax
// int r = a + v;
mov edx, DWORD PTR [rbp-28]
mov eax, DWORD PTR [rbp-4]
add eax, edx
mov DWORD PTR [rbp-8], eax
// end of function
nop
pop rbp
ret
main:
push rbp
mov rbp, rsp
sub rsp, 16
lea rax, [rbp-4]
mov esi, 2
mov rdi, rax
call Test::Test(int) [complete object constructor]
// a.test(1);
lea rax, [rbp-4]
mov esi, 1
mov rdi, rax
call Test::test(int)
// test(&a, 1);
lea rax, [rbp-4]
mov esi, 1
mov rdi, rax
call test(Test*, int)
// end of main
mov eax, 0
leave
ret
So the machine code generated with no optimizations, looks identical for test(&a, 1) and a.test(1). And that's what the statement refers to.
But again that is an implementation detail how the compiler translates c++ to machine code, and not related to c++ itself.

Related

Modulus in Assembly x64 linux question C++ [duplicate]

This question already has answers here:
Why does GCC use multiplication by a strange number in implementing integer division?
(5 answers)
Divide Signed Integer By 2 compiles to complex assembly output, not just a shift
(1 answer)
Closed 1 year ago.
I have these functions in C++
int f1(int a)
{
int x = a / 2;
}
int f2(int a)
{
int y = a % 2;
}
int f3(int a)
{
int z = a % 7;
}
int f4(int a,int b)
{
int xy = a % b;
}
And i saw their assembly code but couldn't understand what they are doing.I couldn't even find a good referance or some explained example for the same. Here is the assembly
f1(int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
mov eax, DWORD PTR [rbp-20]
mov edx, eax
shr edx, 31
add eax, edx
sar eax
mov DWORD PTR [rbp-4], eax
nop
pop rbp
ret
f2(int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
mov eax, DWORD PTR [rbp-20]
cdq
shr edx, 31
add eax, edx
and eax, 1
sub eax, edx
mov DWORD PTR [rbp-4], eax
nop
pop rbp
ret
f3(int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
mov eax, DWORD PTR [rbp-20]
movsx rdx, eax
imul rdx, rdx, -1840700269
shr rdx, 32
add edx, eax
sar edx, 2
mov esi, eax
sar esi, 31
mov ecx, edx
sub ecx, esi
mov edx, ecx
sal edx, 3
sub edx, ecx
sub eax, edx
mov DWORD PTR [rbp-4], eax
nop
pop rbp
ret
f4(int, int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
mov DWORD PTR [rbp-24], esi
mov eax, DWORD PTR [rbp-20]
cdq
idiv DWORD PTR [rbp-24]
mov DWORD PTR [rbp-4], edx
nop
pop rbp
ret
Can you please tell by some example or what steps it is following to calculate the answers in all these three cases and why would they work just fine instead of normal divide

when coding in cpp, what's the difference of memory consumption between reference and varibale [duplicate]

This question already has answers here:
How is a reference variable represented in memory?
(1 answer)
memory of a reference variable in c++?
(3 answers)
How does a C++ reference look, memory-wise?
(9 answers)
C++ do references occupy memory
(2 answers)
Closed 1 year ago.
I compile following code
int foo(int num) {
int &numReference = num;
int numVarible = num;
return numVarible;
}
output assemble translation.
foo(int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
lea rax, [rbp-20]
mov QWORD PTR [rbp-8], rax
mov eax, DWORD PTR [rbp-20]
mov DWORD PTR [rbp-12], eax
mov eax, DWORD PTR [rbp-12]
pop rbp
ret
int &numReference = num; output assemble:
lea rax, [rbp-20]
mov QWORD PTR [rbp-8], rax
int numVarible = num; output assemble:
mov eax, DWORD PTR [rbp-20]
mov DWORD PTR [rbp-12], eax
the question 1 is what's the difference about memeory comsumption.
When I change code from "return numVarible;" to "return numReference ;".
assemble changes from
mov eax, DWORD PTR [rbp-12]
to
mov rax, QWORD PTR [rbp-8]
mov eax, DWORD PTR [rax]
the question 2 is: when using reference, why assemble shows more line?

C++ inline asm move WCHAR in 32-bit register

I am trying to practice the inline ASM in C++ :) Maybe outdated, but it is interesting, to know how CPU is executing the code.
So, what I am trying to do here, is to loop through processes and get a handle of needed one :) I am using for that already created methods from tlhelp32
I have this code:
HANDLE RetHandle = nullptr, snap;
int SizeOfPE = sizeof(PROCESSENTRY32), pid; PROCESSENTRY32 pe;
int PA = PROCESS_ALL_ACCESS;
const char* Pname = "explorer.exe";
__asm
{
mov eax, pe
mov ebx, this
mov ecx, [ebx]pe.dwSize
mov ecx, SizeOfPE
mov[ebx]pe.dwSize, ecx
mov eax, PA
mov ebx,0
call CreateToolhelp32Snapshot
mov eax,snap
label1:
mov eax, snap
mov ebx, [pe]
call Process32First
cmp eax,1
jne exitLabel
Process32NextLoop:
mov eax, snap
mov ebx, [pe]
call Process32Next
cmp eax, 1
jne Process32NextLoop
mov edx, pe
mov ecx, [edx].szExeFile
cmp ecx, Pname
je ExitLoop
jne Process32NextLoop
ExitLoop:
mov eax, [ebx].th32ProcessID
mov pid, eax
ExitLabel:
ret
}
Apparently, it is throwing error in th32ProcessID as well, however, it is just regular int.
Have been searching, but haven't found the equivalent for movl in C++

Passing a r-value-reference to constructor to reduce copies

I have the following code lines
#include <stdio.h>
#include <utility>
class A
{
public: // member functions
explicit A(int && Val)
{
_val = std::move(Val); // \2\
}
virtual ~A(){}
private: // member variables
int _val = 0;
private: // member functions
A(const A &) = delete;
A& operator = (const A &) = delete;
A(A &&) = delete;
A&& operator = (A &&) = delete;
};
int main()
{
A a01{3}; // \1\
return 0;
}
I would like to ask how many copies did I make from \1\ to \2\?
Your code doesn't compile, but after making the changes needed for it to compile, it does nothing and compiles into this x86 assembly because none of it's values are ever used:
main:
xor eax, eax
ret
https://godbolt.org/z/q70EMb
Modifying the code so that it requires the output of the _val member variable (with a print statement) shows that with optimizations it simply moves the value 0x03 into a register and prints it:
.LC0:
.string "%d\n"
main:
sub rsp, 8
mov esi, 3
mov edi, OFFSET FLAT:.LC0
xor eax, eax
call printf
xor eax, eax
add rsp, 8
ret
https://godbolt.org/z/JG73Ll
If you disable optimizations in an attempt to get the compiler to output a more verbose version of the program:
A::A(int&&):
push rbp
mov rbp, rsp
sub rsp, 16
mov QWORD PTR [rbp-8], rdi
mov QWORD PTR [rbp-16], rsi
mov rax, QWORD PTR [rbp-8]
mov DWORD PTR [rax], 0
mov rax, QWORD PTR [rbp-16]
mov rdi, rax
call std::remove_reference<int&>::type&& std::move<int&>(int&)
mov edx, DWORD PTR [rax]
mov rax, QWORD PTR [rbp-8]
mov DWORD PTR [rax], edx
nop
leave
ret
.LC0:
.string "%d\n"
main:
push rbp
mov rbp, rsp
sub rsp, 16
mov DWORD PTR [rbp-4], 3
lea rdx, [rbp-4]
lea rax, [rbp-8]
mov rsi, rdx
mov rdi, rax
call A::A(int&&)
mov eax, DWORD PTR [rbp-8]
mov esi, eax
mov edi, OFFSET FLAT:.LC0
mov eax, 0
call printf
mov eax, 0
leave
ret
std::remove_reference<int&>::type&& std::move<int&>(int&):
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-8], rdi
mov rax, QWORD PTR [rbp-8]
pop rbp
ret
https://godbolt.org/z/ZTK40d
The answer to your question depends on how your program is compiled and how copy elision is enforced, as well as if there is any benefit in the case of an int to not "copying" a value, since an int* and int likely take up the same amount of memory.
your are merely assigning a value, not copying. Nevertheless, you can have a static member in your class that is incremented everytime this method is called!
class A
{
public: // member functions
static int counter = 0;
explicit A(int && Val)
{
_val = std::move(Val); // \2\
counter++;
}
....

A temporary array is assigned but not a temporary primary value

I am amazed that this C++ code is compiled:
int main()
{
(int[10]){}[0]=15;
return 0;
}
The equivalent assembly is
main:
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-48], 0
mov QWORD PTR [rbp-40], 0
mov QWORD PTR [rbp-32], 0
mov QWORD PTR [rbp-24], 0
mov QWORD PTR [rbp-16], 0
mov DWORD PTR [rbp-48], 15
mov eax, 0
pop rbp
ret
According to this code, an array is defined without having any name and then assigned.
Interestingly, when there is no array, the code does not compile:
int main()
{
(int){}=15; /* <Compilation failed> */
return 0;
}
1- Why is the first expression (maybe you call it assigning to an xvalue) legal in C++ for a temporary array but not the second one for a basic primary type? Why the language is designed this way?
2- What is the application of such a temporary array?