Related to C++ and Assembly, what is ebp+8? - c++

I have the following C++ code:
#include <tuple>
std::tuple<int, bool> foo()
{
return std::make_tuple(128, true);
}
int main()
{
auto result = foo();
}
The following is the disassembled version of the foo() function:
push ebp
mov ebp, esp
sub esp, 24
mov BYTE PTR [ebp-13], 1 // second argument
mov DWORD PTR [ebp-12], 128 // first argument
mov eax, DWORD PTR [ebp+8] // what is this? why we need this here?
sub esp, 4
lea edx, [ebp-13]
push edx // second
lea edx, [ebp-12]
push edx // first
push eax // same as "ebp+8", what is this?
call std::tuple<std::__decay_and_strip<int>::__type, std::__decay_and_strip<bool>::__type> std::make_tuple<int, bool>(int&&, bool&&)
add esp, 12
mov eax, DWORD PTR [ebp+8]
leave
ret 4
As I know ebp+X is for access function arguments, but there is nothing like this for foo, so why does the compiler use it?
It seems like it's the first parameter for std::make_tuple().
EDIT:
I'm not using optimization, I just want to learn RE.
Part of main in Assembly:
lea eax, [ebp-16] // loaction of local variable
sub esp, 12
push eax // as hidden argument for foo
call foo()
add esp, 12

The calling convention specifies that non-trivial objects are returned through a hidden pointer passed as argument. That's what you are seeing. Technically, your code is implemented like this:
std::tuple<int, bool>* foo(std::tuple<int, bool>* result)
{
*result = std::make_tuple(128, true);
return result;
}
int main()
{
std::tuple<int, bool> result;
foo(&result);
}

Related

Do modern compilers optimize local variables that just refer to subset of objects?

Do most modern compilers end up optimizing the following code so that extra instructions aren't used for the object inner?
func Test(TypeObject *object):
InnerTypedObject *inner = object->inner
print(inner->a)
print(inner->b)
print(inner->c)
I figured that compilers would be able to figure out that inner->a and object->inner.a refer to the same thing, so it would avoid allocating inner altogether. I figured the local variable is probably saved on a register, so I'm not really concerned about performance. Mainly wanted to know if we'd get the same generated machine code.
Thanks to Jerry Coffin for the comment - my original answer was actually quite wrong...
For this code:
struct TypeObject {
int a;
int b;
int c;
};
void print(int x);
void test(TypeObject *object) {
print(object->a);
print(object->b);
print(object->c);
}
https://godbolt.org/g/SrNWkp produces something like this:
test(TypeObject*):
push rbx // save the rbx register
mov rbx, rdi // copy the parameter (which is "object") to rbx
mov edi, DWORD PTR [rbx] // copy inner->a to edi
call print(int)
mov edi, DWORD PTR [rbx+4] // copy inner->b to edi
call print(int)
mov edi, DWORD PTR [rbx+8] // copy inner->c to edi
jmp print(int)
pop rbx // restore rbx
And for this code:
struct InnerTypedObject {
int a;
int b;
int c;
};
struct TypeObject {
InnerTypedObject * inner;
};
void print(int x);
void test(TypeObject *object) {
InnerTypedObject *inner = object->inner;
print(inner->a);
print(inner->b);
print(inner->c);
}
https://godbolt.org/g/NC2pa3 produces something like this:
test(TypeObject*):
push rbx // save the rbx register
mov rbx, QWORD PTR [rdi] // copy "*object" (which is "inner") to rbx
mov edi, DWORD PTR [rbx] // copy inner->a to edi
call print(int)
mov edi, DWORD PTR [rbx+4] // copy inner->b to edi
call print(int)
mov edi, DWORD PTR [rbx+8] // copy inner->c to edi
jmp print(int)
pop rbx // restore rbx
So the code is still dereferencing object - it stores the pointer once and then uses it three times just like the original code did. The reason for not being able to optimize it better is that what is stored in a pointer is extremely hard to track so the optimizer has to assume it doesn't know what is in there for sure.
Even though both bits of assembly have the same number of instructions, there is an extra memory dereference in the one with "inner" so it could be expensive if the data isn't already in the cache.

C/C++ returning struct by value under the hood

(This question is specific to my machine's architecture and calling conventions, Windows x86_64)
I don't exactly remember where I had read this, or if I had recalled it correctly, but I had heard that, when a function should return some struct or object by value, it will either stuff it in rax (if the object can fit in the register width of 64 bits) or be passed a pointer to where the resulting object would be (I'm guessing allocated in the calling function's stack frame) in rcx, where it would do all the usual initialization, and then a mov rax, rcx for the return trip. That is, something like
extern some_struct create_it(); // implemented in assembly
would really have a secret parameter like
extern some_struct create_it(some_struct* secret_param_pointing_to_where_i_will_be);
Did my memory serve me right, or am I incorrect? How are large objects (i.e. wider than the register width) returned by value from functions?
Here's a simple disassembling of a code exampling what you're saying
typedef struct
{
int b;
int c;
int d;
int e;
int f;
int g;
char x;
} A;
A foo(int b, int c)
{
A myA = {b, c, 5, 6, 7, 8, 10};
return myA;
}
int main()
{
A myA = foo(5,9);
return 0;
}
and here's the disassembly of the foo function, and the main function calling it
main:
push ebp
mov ebp, esp
and esp, 0FFFFFFF0h
sub esp, 30h
call ___main
lea eax, [esp+20] ; placing the addr of myA in eax
mov dword ptr [esp+8], 9 ; param passing
mov dword ptr [esp+4], 5 ; param passing
mov [esp], eax ; passing myA addr as a param
call _foo
mov eax, 0
leave
retn
foo:
push ebp
mov ebp, esp
sub esp, 20h
mov eax, [ebp+12]
mov [ebp-28], eax
mov eax, [ebp+16]
mov [ebp-24], eax
mov dword ptr [ebp-20], 5
mov dword ptr [ebp-16], 6
mov dword ptr [ebp-12], 7
mov dword ptr [ebp-8], 9
mov byte ptr [ebp-4], 0Ah
mov eax, [ebp+8]
mov edx, [ebp-28]
mov [eax], edx
mov edx, [ebp-24]
mov [eax+4], edx
mov edx, [ebp-20]
mov [eax+8], edx
mov edx, [ebp-16]
mov [eax+0Ch], edx
mov edx, [ebp-12]
mov [eax+10h], edx
mov edx, [ebp-8]
mov [eax+14h], edx
mov edx, [ebp-4]
mov [eax+18h], edx
mov eax, [ebp+8]
leave
retn
now let's go through what just happened, so when calling foo the paramaters were passed in the following way, 9 was at highest address, then 5 then the address the myA in main begins
lea eax, [esp+20] ; placing the addr of myA in eax
mov dword ptr [esp+8], 9 ; param passing
mov dword ptr [esp+4], 5 ; param passing
mov [esp], eax ; passing myA addr as a param
within foo there is some local myA which is stored on the stack frame, since the stack is going downwards, the lowest address of myA begins in [ebp - 28], the -28 offset could be caused by struct alignments so I'm guessing the size of the struct should be 28 bytes here and not 25 as expected. and as we can see in foo after the local myA of foo was created and filled with parameters and immediate values, it is copied and re-written to the address of myA passed from main ( this is the actual meaning of return by value )
mov eax, [ebp+8]
mov edx, [ebp-28]
[ebp + 8] is where the address of main::myA was stored ( memory address go upwards hence ebp + old ebp ( 4 bytes ) + return address ( 4 bytes )) at overall ebp + 8 to get to the first byte of main::myA, as said earlier foo::myA is stored within [ebp-28] as stack goes downwards
mov [eax], edx
place foo::myA.b in the address of the first data member of main::myA which is main::myA.b
mov edx, [ebp-24]
mov [eax+4], edx
place the value that resides in the address of foo::myA.c in edx, and place that value within the address of main::myA.b + 4 bytes which is main::myA.c
as you can see this process repeats itself through out the function
mov edx, [ebp-20]
mov [eax+8], edx
mov edx, [ebp-16]
mov [eax+0Ch], edx
mov edx, [ebp-12]
mov [eax+10h], edx
mov edx, [ebp-8]
mov [eax+14h], edx
mov edx, [ebp-4]
mov [eax+18h], edx
mov eax, [ebp+8]
which basically proves that when returning a struct by val, that could not be placed in as a param, what happens is that the address of where the return value should reside in is passed as a param to the function and within the function being called the values of the returned struct are copied into the address passed as a parameter...
hope this exampled helped you visualize what happens under the hood a little bit better :)
EDIT
I hope that you've noticed that my example was using 32 bit assembler and I KNOW you've asked regarding x86-64, but I'm currently unable to disassemble code on a 64 bit machine so I hope you take my word on it that the concept is exactly the same both for 64 bit and 32 bit, and that the calling convention is nearly the same
That is exactly correct. The caller passes an extra argument which is the address of the return value. Normally it will be on the caller's stack frame but there are no guarantees.
The precise mechanics are specified by the platform ABI, but this mechanism is very common.
Various commentators have left useful links with documentation for calling conventions, so I'll hoist some of them into this answer:
Wikipedia article on x86 calling conventions
Agner Fog's collection of optimization resources, including a summary of calling conventions (Direct link to 57-page PDF document.)
Microsoft Developer Network (MSDN) documentation on calling conventions.
StackOverflow x86 tag wiki has lots of useful links.

Passing parameters one by one, or by wrapping them in an array, struct or tuple

When passing arguments to a function I always assumed that passing arguments one by one is not different from passing them wrapped in an array or a struct or a tuple. However, a simple experiment showed that I was wrong.
The following program when compiled with GCC:
int test(int a, int b, int c, int d) {
return a + b + c + d;
}
int test(std::array<int, 4> arr) {
return arr[0] + arr[1] + arr[2] + arr[3];
}
struct abcd {
int a; int b; int c; int d;
};
int test(abcd s) {
return s.a + s.b + s.c + s.d;
}
int test(std::tuple<int, int, int, int> tup) {
return std::get<0>(tup) + std::get<1>(tup) + std::get<2>(tup) + std::get<3>(tup);
}
...produces a variety of assembly outputs:
impl_test(int, int, int, int):
lea eax, [rdi+rsi]
add eax, edx
add eax, ecx
ret
impl_test(std::array<int, 4ul>):
mov rax, rdi
sar rax, 32
add eax, edi
add eax, esi
sar rsi, 32
add eax, esi
ret
impl_test(abcd):
mov rax, rdi
sar rax, 32
add eax, edi
add eax, esi
sar rsi, 32
add eax, esi
ret
impl_test(std::tuple<int, int, int, int>):
mov eax, DWORD PTR [rdi+8]
add eax, DWORD PTR [rdi+12]
add eax, DWORD PTR [rdi+4]
add eax, DWORD PTR [rdi]
ret
main:
push rbp
push rbx
mov ecx, 4
mov edx, 3
movabs rbp, 8589934592
mov esi, 2
sub rsp, 24
mov edi, 1
movabs rbx, 17179869184
call int test<int, int, int, int>(int, int, int, int)
mov rdi, rbp
mov rsi, rbx
or rbx, 3
or rdi, 1
or rsi, 3
call int test<std::array<int, 4ul> >(std::array<int, 4ul>)
mov rdi, rbp
mov rsi, rbx
or rdi, 1
call int test<abcd>(abcd)
mov rdi, rsp
mov DWORD PTR [rsp], 4
mov DWORD PTR [rsp+4], 3
mov DWORD PTR [rsp+8], 2
mov DWORD PTR [rsp+12], 1
call int test<std::tuple<int, int, int, int> >(std::tuple<int, int, int, int>)
add rsp, 24
xor eax, eax
pop rbx
pop rbp
ret
Why is there a difference?
When a function is called (that is, not inlined, constexpr evaluated or eliminated), the way arguments are passed depends on many factors including:
Whether the argument is an integer or floating-point if the argument is of a primitive type.
The type of the argument.
Whether its address is taken in some non-eliminated code in the callee.
The default or specified calling convention.
Whether Whole Program Optimization (WPO) is being used.
Whether the callee is in a shared library, static library or object file, or in the same translation unit.
The specified floating-point behavior.
The target platform.
The position of the parameter in the parameter list.
Let's get back to the example you provided. You compiled the code with -02 so dead code won't be eliminated and function inlining is disabled. So all functions have to be called. The target platform is x64.
The first function has four 4-byte integer parameters. Therefore, all of them are passed through registers.
The second function has one fixed-size array of four 4-byte integers. The compiler decided to use two registers (rdi and rsi) to pass the four integers where rdi = 0x200000001 and rsi = 0x400000003. Notice how the four integers (1, 2, 3, 4) are compactly passed using these two registers.
Passing the integers as a structure rather then one by one made the compiler use different techniques to pass them. But there is a trade off here between the size of code, speed and number of registers required.
The same thing goes for the third function.
The last function, however, contains calls to std::get which require the address of the passed tuple. So the address is stored in rdi to be used by the std::get function. Since you're compiling with C++14, std::get is marked with constexpr. The compiler was able to evaluate the function and therefore the memory access has been emitted in test function rather than emitting a call to the std::get function. Notice that this is different from inlining.

classes from c++ to assembly language

I can't understand how classes are implemented in assembly language. I inspected the source code of a c++ application( compiler of visual studio ) that uses a class but it looks like a normal code with no special functions or something other. Where is the constructor of this class and how does it work? I guess the parameters of the constructor are passed by stack, but what does function unknown_libname_1 do?
.text:00261050
.text:00261050 ; int __cdecl main(int argc, const char **argv, const char **envp)
.text:00261050 _main proc near ; CODE XREF: ___tmainCRTStartup+10Ap
.text:00261050
.text:00261050 var_10 = byte ptr -10h
.text:00261050 var_8 = byte ptr -8
.text:00261050 argc = dword ptr 8
.text:00261050 argv = dword ptr 0Ch
.text:00261050 envp = dword ptr 10h
.text:00261050
.text:00261050 push ebp
.text:00261051 mov ebp, esp
.text:00261053 sub esp, 10h
.text:00261056 push 4
.text:00261058 push 3
.text:0026105A lea ecx, [ebp+var_8]
.text:0026105D call unknown_libname_1 ; Microsoft VisualC 2-10/net runtime
.text:00261062 push 6
.text:00261064 push 5
.text:00261066 lea ecx, [ebp+var_10]
.text:00261069 call unknown_libname_1 ; Microsoft VisualC 2-10/net runtime
.text:0026106E mov eax, ds:?endl#std##YAAAV?$basic_ostream#DU?$char_traits#D#std###1#AAV21##Z ; std::endl(std::basic_ostream<char,std::char_traits<char>> &)
.text:00261073 push eax
.text:00261074 lea ecx, [ebp+var_8]
.text:00261077 call sub_261000
.text:0026107C push eax
.text:0026107D push offset aRectArea ; "rect area: "
.text:00261082 mov ecx, ds:?cout#std##3V?$basic_ostream#DU?$char_traits#D#std###1#A ; std::basic_ostream<char,std::char_traits<char>> std::cout
.text:00261088 push ecx
.text:00261089 call sub_2612D0
.text:0026108E add esp, 8
.text:00261091 mov ecx, eax
.text:00261093 call ds:??6?$basic_ostream#DU?$char_traits#D#std###std##QAEAAV01#H#Z ; std::basic_ostream<char,std::char_traits<char>>::operator<<(int)
.text:00261099 mov ecx, eax
.text:0026109B call ds:??6?$basic_ostream#DU?$char_traits#D#std###std##QAEAAV01#P6AAAV01#AAV01##Z#Z ; std::basic_ostream<char,std::char_traits<char>>::operator<<(std::basic_ostream<char,std::char_traits<char>> & (*)(std::basic_ostream<char,std::char_traits<char>> &))
.text:002610A1 mov edx, ds:?endl#std##YAAAV?$basic_ostream#DU?$char_traits#D#std###1#AAV21##Z ; std::endl(std::basic_ostream<char,std::char_traits<char>> &)
.text:002610A7 push edx
.text:002610A8 lea ecx, [ebp+var_10]
.text:002610AB call sub_261000
.text:002610B0 push eax
.text:002610B1 push offset aRectbArea ; "rectb area: "
.text:002610B6 mov eax, ds:?cout#std##3V?$basic_ostream#DU?$char_traits#D#std###1#A ; std::basic_ostream<char,std::char_traits<char>> std::cout
.text:002610BB push eax
.text:002610BC call sub_2612D0
.text:002610C1 add esp, 8
.text:002610C4 mov ecx, eax
.text:002610C6 call ds:??6?$basic_ostream#DU?$char_traits#D#std###std##QAEAAV01#H#Z ; std::basic_ostream<char,std::char_traits<char>>::operator<<(int)
.text:002610CC mov ecx, eax
.text:002610CE call ds:??6?$basic_ostream#DU?$char_traits#D#std###std##QAEAAV01#P6AAAV01#AAV01##Z#Z ; std::basic_ostream<char,std::char_traits<char>>::operator<<(std::basic_ostream<char,std::char_traits<char>> & (*)(std::basic_ostream<char,std::char_traits<char>> &))
.text:002610D4 xor eax, eax
.text:002610D6 mov esp, ebp
.text:002610D8 pop ebp
.text:002610D9 retn
.text:0026
function sub_261000:
sub_261000 proc near
var_4= dword ptr -4
push ebp
mov ebp, esp
push ecx
mov [ebp+var_4], ecx
mov eax, [ebp+var_4]
mov ecx, [ebp+var_4]
mov eax, [eax]
imul eax, [ecx+4]
mov esp, ebp
pop ebp
retn
sub_261000 endp
Assembly doesn't have any concept of classes. When you create an object of class type on the stack, it just makes room for all of its member objects. A member function is just like a normal function, but it is also passed a pointer to the beginning of these member objects, which is the this pointer. The compiled functions just access members relative to this pointer. The constructor is just another function that initialises these member objects.
You could think of this:
class A {
private:
int x;
short y;
public:
A(int arg) : x(arg), y(6) { }
void print() {
std::cout << x << ',' << y << std::endl;
}
};
int main() {
A a(5);
a.print()
}
As being transformed into something like this made-up invalid C++:
void A_construct(A* this, int arg) {
this->x = arg;
this->y = 6;
}
void A_print(A* this) {
std::cout << this->x << ',' << this->y << std::endl;
}
int main() {
int x;
short y;
A_construct(this_cast<A*>(&x), 5);
A_print(this_cast<A*>(&x));
}
The only reason the made-up this_cast is there is to allow the A* so I can continue to use this->x to mean "Access the x object of that A". Just a convenience for illustration.
Other than these run-time details, the only other effect that classes have is that they place some compile-time restrictions on your code. For example, you can't write code that access a private member of a class from outside of it. That's not enforced in the assembly in any way.
I have not really an idea what you are searching for :-)
But at first you have to keep in mind:
1) the compiler will try to optimize all your code, independent of your language. For that you will typically see no inlined functions at all.
2) All calculations from constant values will be done in compile time as part of optimization. So you may only the the processing of the values if they are not compile time constant!
3) Initialization of global objects will be done by a simple copy loop at start of your program which is running before main. There is nothing to see from the code itself.
4) You can only see a data structure of class. But here you have to think a about padding and other things.
Sometimes it is a good idea to look in the produced assembler code to get a feeling which type of programming results in which kind of executable. But today most compilers do a really perfect job which makes it nearly impossible to see how the code was presented in the high level language. But yes, it is good for learning :-)

How does returning values from a function work?

I recently had a serious bug, where I forgot to return a value in a function. The problem was that even though nothing was returned it worked fine under Linux/Windows and only crashed under Mac. I discovered the bug when I turned on all compiler warnings.
So here is a simple example:
#include <iostream>
class A{
public:
A(int p1, int p2, int p3): v1(p1), v2(p2), v3(p3)
{
}
int v1;
int v2;
int v3;
};
A* getA(){
A* p = new A(1,2,3);
// return p;
}
int main(){
A* a = getA();
std::cerr << "A: v1=" << a->v1 << " v2=" << a->v2 << " v3=" << a->v3 << std::endl;
return 0;
}
My question is how can this work under Linux/Windows without crashing? How is the returning of values done on lower level?
On Intel architecture, simple values (integers and pointers) are usually returned in eax register. This register (among others) is also used as temporary storage when moving values in memory and as operand during calculations. So whatever value left in that register is treated as the return value, and in your case it turned out to be exactly what you wanted to be returned.
Probably by luck, 'a' left in a register that happens to be used for returning single pointer results, something like that.
The calling/ conventions and function result returns are architecture-dependent, so it's not surprising that your code works on Windows/Linux but not on a Mac.
There are two major ways for a compiler to return a value:
Put a value in a register before returning, and
Have the caller pass a block of stack memory for the return value, and write the value into that block [more info]
The #1 is usually used with anything that fits into a register; #2 is for everything else (large structs, arrays, et cetera).
In your case, the compiler uses #1 both for the return of new and for the return of your function. On Linux and Windows, the compiler did not perform any value-distorting operations on the register with the returned value between writing it into the pointer variable and returning from your function; on Mac, it did. Hence the difference in the results that you see: in the first case, the left-over value in the return register happened to co-inside with the value that you wanted to return anyway.
First off, you need to slightly modify your example to get it to compile. The function must have at least an execution path that returns a value.
A* getA(){
if(false)
return NULL;
A* p = new A(1,2,3);
// return p;
}
Second, it's obviously undefined behavior, which means anything can happen, but I guess this answer won't satisfy you.
Third, in Windows it works in Debug mode, but if you compile under Release, it doesn't.
The following is compiled under Debug:
A* p = new A(1,2,3);
00021535 push 0Ch
00021537 call operator new (211FEh)
0002153C add esp,4
0002153F mov dword ptr [ebp-0E0h],eax
00021545 mov dword ptr [ebp-4],0
0002154C cmp dword ptr [ebp-0E0h],0
00021553 je getA+7Eh (2156Eh)
00021555 push 3
00021557 push 2
00021559 push 1
0002155B mov ecx,dword ptr [ebp-0E0h]
00021561 call A::A (21271h)
00021566 mov dword ptr [ebp-0F4h],eax
0002156C jmp getA+88h (21578h)
0002156E mov dword ptr [ebp-0F4h],0
00021578 mov eax,dword ptr [ebp-0F4h]
0002157E mov dword ptr [ebp-0ECh],eax
00021584 mov dword ptr [ebp-4],0FFFFFFFFh
0002158B mov ecx,dword ptr [ebp-0ECh]
00021591 mov dword ptr [ebp-14h],ecx
The second instruction, the call to operator new, moves into eax the pointer to the newly created instance.
A* a = getA();
0010484E call getA (1012ADh)
00104853 mov dword ptr [a],eax
The calling context expects eax to contain the returned value, but it does not, it contains the last pointer allocated by new, which is incidentally, p.
So that's why it works.
As Kerrek SB mentioned, your code has ventured into the realm of undefined behavior.
Basically, your code is going to compile down to assembly. In assembly, there's no concept of a function requiring a return type, there's just an expectation. I'm the most comfortable with MIPS, so I shall use MIPS to illustrate.
Assume you have the following code:
int add(x, y)
{
return x + y;
}
This is going to be translated to something like:
add:
add $v0, $a0, $a1 #add $a0 and $a1 and store it in $v0
jr $ra #jump back to where ever this code was jumped to from
To add 5 and 4, the code would be called something like:
addi $a0, $0, 5 # 5 is the first param
addi $a1, $0, 4 # 4 is the second param
jal add
# $v0 now contains 9
Note that unlike C, there's no explicit requirement that $v0 contain the return value, just an expectation. So, what happens if you don't actually push anything into $v0? Well, $v0 always has some value, so the value will be whatever it last was.
Note: This post makes some simplifications. Also, you're computer is likely not running MIPS... But hopefully the example holds, and if you learned assembly at a university, MIPS might be what you know anyway.
The way of returning of value from the function depends on architecture and the type of value. It could be done thru registers or thru stack.
Typically in the x86 architecture the value is returned in EAX register if it is an integral type: char, int or pointer.
When you don't specify the return value, that value is undefined. This is only your luck that your code sometimes worked correctly.
When popping values from the stack in IBM PC architecture there is no physical destruction of the old values ​​of data stored there. They just become unavailable through the operation of the stack, but still remain in the same memory cell.
Of course, the previous values ​​of these data will be destroyed during the subsequent pushing of new data on the stack.
So probably you are just lucky enough, and nothing is added to stack during your function's call and return surrounding code.
Regarding the following statement from n3242 draft C++ Standard, paragraph 6.6.3.2, your example yields undefined behavior:
Flowing off the end of a function is equivalent to a return with no
value; this results in undefined behavior in a value-returning
function.
The best way to see what actually happens is to check the assembly code generated by the given compiler on a given architecture. For the following code:
#pragma warning(default:4716)
int foo(int a, int b)
{
int c = a + b;
}
int main()
{
int n = foo(1, 2);
}
...VS2010 compiler (in Debug mode, on Intel 32-bit machine) generates the following assembly:
#pragma warning(default:4716)
int foo(int a, int b)
{
011C1490 push ebp
011C1491 mov ebp,esp
011C1493 sub esp,0CCh
011C1499 push ebx
011C149A push esi
011C149B push edi
011C149C lea edi,[ebp-0CCh]
011C14A2 mov ecx,33h
011C14A7 mov eax,0CCCCCCCCh
011C14AC rep stos dword ptr es:[edi]
int c = a + b;
011C14AE mov eax,dword ptr [a]
011C14B1 add eax,dword ptr [b]
011C14B4 mov dword ptr [c],eax
}
...
int main()
{
011C14D0 push ebp
011C14D1 mov ebp,esp
011C14D3 sub esp,0CCh
011C14D9 push ebx
011C14DA push esi
011C14DB push edi
011C14DC lea edi,[ebp-0CCh]
011C14E2 mov ecx,33h
011C14E7 mov eax,0CCCCCCCCh
011C14EC rep stos dword ptr es:[edi]
int n = foo(1, 2);
011C14EE push 2
011C14F0 push 1
011C14F2 call foo (11C1122h)
011C14F7 add esp,8
011C14FA mov dword ptr [n],eax
}
The result of addition operation in foo() is stored in eax register (accumulator) and its content is used as a return value of the function, moved to variable n.
eax is used to store a return value (pointer) in the following example as well:
#pragma warning(default:4716)
int* foo(int a)
{
int* p = new int(a);
}
int main()
{
int* pn = foo(1);
if(pn)
{
int n = *pn;
delete pn;
}
}
Assembly code:
#pragma warning(default:4716)
int* foo(int a)
{
000C1520 push ebp
000C1521 mov ebp,esp
000C1523 sub esp,0DCh
000C1529 push ebx
000C152A push esi
000C152B push edi
000C152C lea edi,[ebp-0DCh]
000C1532 mov ecx,37h
000C1537 mov eax,0CCCCCCCCh
000C153C rep stos dword ptr es:[edi]
int* p = new int(a);
000C153E push 4
000C1540 call operator new (0C1253h)
000C1545 add esp,4
000C1548 mov dword ptr [ebp-0D4h],eax
000C154E cmp dword ptr [ebp-0D4h],0
000C1555 je foo+50h (0C1570h)
000C1557 mov eax,dword ptr [ebp-0D4h]
000C155D mov ecx,dword ptr [a]
000C1560 mov dword ptr [eax],ecx
000C1562 mov edx,dword ptr [ebp-0D4h]
000C1568 mov dword ptr [ebp-0DCh],edx
000C156E jmp foo+5Ah (0C157Ah)
std::operator<<<std::char_traits<char> >:
000C1570 mov dword ptr [ebp-0DCh],0
000C157A mov eax,dword ptr [ebp-0DCh]
000C1580 mov dword ptr [p],eax
}
...
int main()
{
000C1610 push ebp
000C1611 mov ebp,esp
000C1613 sub esp,0E4h
000C1619 push ebx
000C161A push esi
000C161B push edi
000C161C lea edi,[ebp-0E4h]
000C1622 mov ecx,39h
000C1627 mov eax,0CCCCCCCCh
000C162C rep stos dword ptr es:[edi]
int* pn = foo(1);
000C162E push 1
000C1630 call foo (0C124Eh)
000C1635 add esp,4
000C1638 mov dword ptr [pn],eax
if(pn)
000C163B cmp dword ptr [pn],0
000C163F je main+51h (0C1661h)
{
int n = *pn;
000C1641 mov eax,dword ptr [pn]
000C1644 mov ecx,dword ptr [eax]
000C1646 mov dword ptr [n],ecx
delete pn;
000C1649 mov eax,dword ptr [pn]
000C164C mov dword ptr [ebp-0E0h],eax
000C1652 mov ecx,dword ptr [ebp-0E0h]
000C1658 push ecx
000C1659 call operator delete (0C1249h)
000C165E add esp,4
}
}
VS2010 compiler issues warning 4716 in both examples. By default this warning is promoted to an error.