abi difference on linux/amd64 C vs C++ - c++

I have C library with such API:
#ifdef __cplusplus
extern "C" {
#endif
struct Foo {
void *p;
int len;
};
struct Foo f(void *opaque, int param);
void foo_free(struct Foo *);
#ifdef __cplusplus
}
#endif
to simplify my C++ life I decide do simple thing:
struct Foo {
void *p;
int len;
#ifdef __cplusplus
~Foo() { foo_free(this); }
#endif
};
And after that things become crazy: for example if I call
f(0xfffeeea0, 40) in C++, then on C side I got 0x7fff905d2050 -69984:
assember without destructor:
0x000055555555467a <+0>: push %rbp
0x000055555555467b <+1>: mov %rsp,%rbp
0x000055555555467e <+4>: sub $0x10,%rsp
0x0000555555554682 <+8>: mov $0x28,%esi
0x0000555555554687 <+13>: mov $0xfffeeea0,%edi
0x000055555555468c <+18>: callq 0x5555555546a0 <f>
0x0000555555554691 <+23>: mov %rax,-0x10(%rbp)
0x0000555555554695 <+27>: mov %rdx,-0x8(%rbp)
0x0000555555554699 <+31>: mov $0x0,%eax
0x000055555555469e <+36>: leaveq
0x000055555555469f <+37>: retq
assember with destructor:
0x00000000000006da <+0>: push %rbp
0x00000000000006db <+1>: mov %rsp,%rbp
0x00000000000006de <+4>: sub $0x20,%rsp
0x00000000000006e2 <+8>: mov %fs:0x28,%rax
0x00000000000006eb <+17>: mov %rax,-0x8(%rbp)
0x00000000000006ef <+21>: xor %eax,%eax
0x00000000000006f1 <+23>: lea -0x20(%rbp),%rax
0x00000000000006f5 <+27>: mov $0x28,%edx
0x00000000000006fa <+32>: mov $0xfffeeea0,%esi
0x00000000000006ff <+37>: mov %rax,%rdi
0x0000000000000702 <+40>: callq 0x739 <f>
0x0000000000000707 <+45>: lea -0x20(%rbp),%rax
0x000000000000070b <+49>: mov %rax,%rdi
0x000000000000070e <+52>: callq 0x72e <Foo::~Foo()>
0x0000000000000713 <+57>: mov $0x0,%eax
0x0000000000000718 <+62>: mov -0x8(%rbp),%rcx
0x000000000000071c <+66>: xor %fs:0x28,%rcx
0x0000000000000725 <+75>: je 0x72c <main()+82>
0x0000000000000727 <+77>: callq 0x5c0 <__stack_chk_fail#plt>
0x000000000000072c <+82>: leaveq
0x000000000000072d <+83>: retq
I wonder what is going on?
I can understand why compiler should handle return in different
way, but why it moves arguments in different registers %esi vs %edi.
For clearness I understand that I do wrong thing, and I rewrite code with
some kind of smart pointers instead without touching real Foo.
But I wonder how ABI of c++ and c works in this particular case.
full example:
//test.cpp
extern "C" {
struct Foo {
void *p;
int len;
~Foo() {/*call free*/}
};
struct Foo f(void *opaque, int param);
}
int main()
{
auto foo = f(reinterpret_cast<void *>(0xfffeeea0), 40);
}
//test.c
#include <stdio.h>
struct Foo {
void *p;
int len;
};
struct Foo f(void *opaque, int param)
{
printf("!!! %p %d\n", opaque, param);
struct Foo ret = {0, 0};
return ret;
}
#makefile:
prog: test.cpp test.c
gcc -Wall -ggdb -std=c11 -c -o test.c.o test.c
g++ -Wall -ggdb -std=c++11 -o $# test.cpp test.c.o
./prog

In the first version of your code (no destructor), we have:
// allocate 16 bytes on the stack (for a Foo instance)
sub $0x10,%rsp
// load two (constant) arguments into %edi and %esi
mov $0x28,%esi
mov $0xfffeeea0,%edi
// call f
callq 0x5555555546a0 <f>
// a 2-word struct was returned by value (in %rax/%rdx).
// move the values to the corresponding slots on the stack
mov %rax,-0x10(%rbp)
mov %rdx,-0x8(%rbp)
In the second version (with a destructor):
// load address of Foo instance into %rax
lea -0x20(%rbp),%rax
// load three arguments:
// - 40 in %edx
// - 0xfffeeea0 in %esi
// - &foo in %rdi
mov $0x28,%edx
mov $0xfffeeea0,%esi
mov %rax,%rdi
// ... and call f
callq 0x739 <f>
// ignore f's return value; load &foo into %rax again
lea -0x20(%rbp),%rax
// call ~Foo on &foo
mov %rax,%rdi
callq 0x72e <Foo::~Foo()>
My guess is that without a destructor the struct is treated like a plain 2-word tuple and returned by value.
But with a destructor the compiler assumes it can't just copy the member values around, so it transforms the struct return value into a hidden pointer argument:
struct Foo f(void *opaque, int param);
// actually implemented as:
void f(struct Foo *_hidden, void *opaque, int param);
Normally f would then take care of writing the return value into *_hidden.
Because the caller and the implementer of the function see a different return type, they disagree about the number of parameters the function actually has. The C++ code passes 3 arguments, but the C code only looks at two of them. It misinterprets the address of the Foo instance as the opaque pointer, and what was supposed to be the opaque pointer ends up in param.
In other words, the presence of a destructor means Foo is no longer a POD type, which inhibits simple return-by-value through registers.

Related

Calling a standard-library-function in MASM

I want to get started in MASM in a mixed C++/Assembly way.
I am currently trying to call a standard-library-function (e.g. printf) from a PROC in assembly, that I then call in C++.
I have the code working after I declared printf's signature in my cpp-file. But I do not understand why I have to do this and if I can avoid that.
My cpp-file:
#include <stdio.h>
extern "C" {
extern int __stdcall foo(int, int);
}
extern int __stdcall printf(const char*, ...); // When I remove this line I get Linker-Error "LNK2019: unresolved external symbol"
int main()
{
foo(5, 5);
}
My asm-file:
.model flat, stdcall
EXTERN printf :PROC ; declare printf
.data
tstStr db "Mult: %i",0Ah,"Add: %i",0 ; 0Ah is the backslash - escapes are not supported
.code
foo PROC x:DWORD, y:DWORD
mov eax, x
mov ebx, y
add eax, ebx
push eax
mov eax, x
mul ebx
push eax
push OFFSET tstStr
call printf
ret
foo ENDP
END
Some Updates
In response to the comments I tried to rework the code to be eligible for the cdecl calling-convention. Unfortunatly this did not solve the problem (the code runs fine with the extern declaration, but throws an error without).
But by trial and error i found out, that the extern seems to force external linkage, even though the keyword should not be needed, because external linkage should be the default for function declarations.
I can omit the declaration by using the function in my cpp-code (i.e. if a add a printf("\0"); somewhere in the source file the linker is fine with it and everythings works correctly.
The new (but not really better) cpp-file:
#include <stdio.h>
extern "C" {
extern int __cdecl foo(int, int);
}
extern int __cdecl printf(const char*, ...); // omiting the extern results in a linker error
int main()
{
//printf("\0"); // this would replace the declaration
foo(5, 5);
return 0;
}
The asm-file:
.model flat, c
EXTERN printf :PROC
.data
tstStr db "Mult: %i",0Ah,"Add: %i",0Ah,0 ; 0Ah is the backslash - escapes are not supported
.code
foo PROC
push ebp
mov ebp, esp
mov eax, [ebp+8]
mov ebx, [ebp+12]
add eax, ebx
push eax
mov eax, [ebp+8]
mul ebx
push eax
push OFFSET tstStr
call printf
add esp, 12
pop ebp
ret
foo ENDP
END
My best guess is that this has to do with the fact that Microsoft refactored the C library starting with VS 2015 and some of the C library is now inlined (including printf) and isn't actually in the default .lib files.
My guess is in this declaration:
extern int __cdecl printf(const char*, ...);
extern forces the old legacy libraries to be included in the link process. Those libraries contain the non-inlined function printf. If the C++ code doesn't force the MS linker to include the legacy C library then the MASM code's use of printf will become unresolved.
I believe this is related to this Stackoverflow question and my answer in 2015. If you want to remove extern int __cdecl printf(const char*, ...); from the C++ code you may wish to consider adding this line to your MASM code:
includelib legacy_stdio_definitions.lib
Your MASM code would look like this if you are using CDECL calling convention and mixing C/C++ with assembly:
.model flat, C ; Default to C language
includelib legacy_stdio_definitions.lib
EXTERN printf :PROC ; declare printf
.data
tstStr db "Mult: %i",0Ah,"Add: %i",0 ; 0Ah is the backslash - escapes are not supported
.code
foo PROC x:DWORD, y:DWORD
mov eax, x
mov ebx, y
add eax, ebx
push eax
mov eax, x
mul ebx
push eax
push OFFSET tstStr
call printf
ret
foo ENDP
END
Your C++ code would be:
#include <stdio.h>
extern "C" {
extern int foo(int, int); /* __cdecl removed since it is the default */
}
int main()
{
//printf("\0"); // this would replace the declaration
foo(5, 5);
return 0;
}
The alternative to passing the includelib line in the assembly code is to add legacy_stdio_definitions.lib to the dependency list in the linker options of your Visual Studio project or the command line options if you invoke the linker manually.
Calling Convention Bug in your MASM Code
You can read about the CDECL calling convention for 32-bit Windows code in the Microsoft documentation as well as this Wiki article. Microsoft summarizes the CDECL calling convention as:
On x86 platforms, all arguments are widened to 32 bits when they are passed. Return values are also widened to 32 bits and returned in the EAX register, except for 8-byte structures, which are returned in the EDX:EAX register pair. Larger structures are returned in the EAX register as pointers to hidden return structures. Parameters are pushed onto the stack from right to left. Structures that are not PODs will not be returned in registers.
The compiler generates prologue and epilogue code to save and restore the ESI, EDI, EBX, and EBP registers, if they are used in the function.
The last paragraph is important in relation to your code. The ESI, EDI, EBX, and EBP registers are non-volatile and must be saved and restored by the called function if they are modified. Your code clobbers EBX, you must save and restore it. You can get MASM to do that by using the USES directive in a PROC statement:
foo PROC uses EBX x:DWORD, y:DWORD
mov eax, x
mov ebx, y
add eax, ebx
push eax
mov eax, x
mul ebx
push eax
push OFFSET tstStr
call printf
add esp, 12 ; Remove the parameters pushed on the stack for
; the printf call. The stack needs to be
; properly restored. If not done, the function
; prologue can't properly restore EBX
; (and any registers listed by USES)
ret
foo ENDP
uses EBX tell MASM to generate extra prologue and epilogue code to save EBX at the start and restore EBX when the function does a ret instruction. The generated instructions would look something like:
0000 _foo:
0000 55 push ebp
0001 8B EC mov ebp,esp
0003 53 push ebx
0004 8B 45 08 mov eax,0x8[ebp]
0007 8B 5D 0C mov ebx,0xc[ebp]
000A 03 C3 add eax,ebx
000C 50 push eax
000D 8B 45 08 mov eax,0x8[ebp]
0010 F7 E3 mul ebx
0012 50 push eax
0013 68 00 00 00 00 push tstStr
0018 E8 00 00 00 00 call _printf
001D 83 C4 0C add esp,0x0000000c
0020 5B pop ebx
0021 C9 leave
0022 C3 ret
That's indeed a bit pointless, isn't it?
Linkers are often pretty dumb things. They need to be told that an object file requires printf. Linkers can't figure that out from a missing printf symbol, stupidly enough.
The C++ compiler will tell the linker that it needs printf when you write extern int __stdcall printf(const char*, ...);. Or, and that's the normal way, the compiler will tell the linker so when you actually call printf. But your C++ code doesn't call it!
Assemblers are also pretty dumb. Your assembler clearly fails to tell the linker that it needs printf from C++.
The general solution is not to do complex things in assembly. That's just not what assembly is good for. Calls from C to assembly generally work well, calls the other way are problematic.

namespace in debug flags of in-class defined friend functions

I'm dealing with a class that defines a friend function in the class without outside declaration
namespace our_namespace {
template <typename T>
struct our_container {
friend our_container set_union(our_container const &, our_container const &) {
// meaningless for the example here, just a valid definition
// no valid semantics
return our_container{};
}
};
} // namespace our_namespace
As discussed (e.g. here or here) the function set_union is not in the our_namespace namespace but will be found by argument dependent lookup:
auto foo(std::vector<our_namespace::our_container<float>> in) {
// works:
return set_union(in[0], in[1]);
}
I noticed however that in the debug flags set_union appears to be in the our_namespace namespace
mov rdi, qword ptr [rbp - 40] # 8-byte Reload
mov rsi, rax
call our_namespace::set_union(our_namespace::our_container<float> const&, our_namespace::our_container<float> const&)
add rsp, 48
pop rbp
ret
our_namespace::set_union(our_namespace::our_container<float> const&, our_namespace::our_container<float> const&): # #our_namespace::set_union(our_namespace::our_container<float> const&, our_namespace::our_container<float> const&)
push rbp
mov rbp, rsp
mov qword ptr [rbp - 16], rdi
mov qword ptr [rbp - 24], rsi
pop rbp
ret
although I can't call it as our_namespace::set_union
auto foo(std::vector<our_namespace::our_container<float>> in) {
// fails:
return our_namespace::set_union(in[0], in[1]);
}
Any hints about how the debug information is to be understood?
EDIT: The set_union function body is only a strawdog example here to have a valid definition.
The C++ standard only defines compiler behavior in regards to the code compilation and behavior of the resulting program. It doesn't define all the aspects of code generation, and in particular, it doesn't define debug symbols.
So your compiler correctly (as per Standard) disallows calling the function through namespace it is not in. But since the function does exist and you should be able to debug it, it needs to put debug symbol somewhere. Enclosing namespace seems to be a reasonable choice.

How to pass operator << for string as parameter?

I want to do something like that:
vector<string> road_map;
// do some stuff
for_each(road_map.begin(), road_map.end(), bind(cout, &ostream::operator<<));
Note: I don't want to use lambda for this purpose, like that:
[](const string& str){ cout << str << endl; }
C++ compiler will create aux code for lambda. That's why I don't want to use lambda in such case. I suppose that there is more lightweight solution for such problem. Of course it is not critical, if there is not simple solution I just will use lambda.
This answer is mainly used to investigate the C++ compiler will create aux code for lambda claim.
You should note that there is no member function ostream::operator<< taking a std::string. There's only a free-standing function opertator<<(std::ostream&,const std::string&) defined in the <string> header.
I used gcc godbolt, you can see the example Live here
So I made a version using a lambda and a version using std::bind:
With bind you get
void funcBind(std::vector<std::string>& a)
{
using namespace std;
using func_t = std::ostream&(*)(std::ostream&, const std::string&);
func_t fptr = &operator<<; //select the right overload
std::for_each(
a.begin(),
a.end(),
std::bind(
fptr,
std::ref(std::cout),
std::placeholders::_1)
);
}
and this assembly on x86_64
push rbp
push rbx
sub rsp, 8
mov rbp, QWORD PTR [rdi+8]
mov rbx, QWORD PTR [rdi]
cmp rbx, rbp
je .L22
.L24:
mov rdx, QWORD PTR [rbx+8]
mov rsi, QWORD PTR [rbx]
mov edi, OFFSET FLAT:std::cout
add rbx, 32
call std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
cmp rbp, rbx
jne .L24
.L22:
add rsp, 8
pop rbx
pop rbp
ret
and with a lambda you get:
void funcLambda(std::vector<std::string>& a)
{
std::for_each(
a.begin(),
a.end(),
[](const std::string& b){std::cout << b;});
}
and this assembly
push rbp
push rbx
sub rsp, 8
mov rbp, QWORD PTR [rdi+8]
mov rbx, QWORD PTR [rdi]
cmp rbx, rbp
je .L27
.L29:
mov rdx, QWORD PTR [rbx+8]
mov rsi, QWORD PTR [rbx]
mov edi, OFFSET FLAT:std::cout
add rbx, 32
call std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
cmp rbp, rbx
jne .L29
.L27:
add rsp, 8
pop rbx
pop rbp
ret
so you don't actually see any difference with any appreciable level of optimization enable
You can use an ostream_iterator to generate the output.
#include <string>
#include <vector>
#include <iostream>
#include <algorithm>
#include <iterator>
int main()
{
std::vector<std::string> road_map{"ab", "cde"};
// do some stuff
std::copy(road_map.begin(), road_map.end(),
std::ostream_iterator<std::string>(std::cout, "\n"));
}

why object code generated for noexcept and throw() is same in c++11?

Code using noexcept .
//hello.cpp
class A{
public:
A(){}
~A(){}
};
void fun() noexcept{ //c++11 style
A a[10];
}
int main()
{
fun();
}
Code using throw() .
//hello1.cpp
class A{
public:
A(){}
~A(){}
};
void fun() throw(){//c++98 style
A a[10];
}
int main()
{
fun();
}
As per various online links and scott meyer's book "If, at runtime, an exception leaves fun, fun’s exception specification is violated. With the
C++98 exception specification, the call stack is unwound to f’s caller, and, after some
actions not relevant here, program execution is terminated. With the C++11 exception
specification, runtime behavior is slightly different: the stack is only possibly
unwound before program execution is terminated." He said code using noexcept is more optimized than code using throw() .
But when I have generated machine code for above program , i found code generated for both cases is exactly same .
$ g++ --std=c++11 hello1.cpp -O0 -S -o throw1.s
$ g++ --std=c++11 hello.cpp -O0 -S -o throw.s
diff is below .
$ diff throw.s throw1.s
1c1
< .file "hello.cpp"
---
> .file "hello1.cpp"
Machine code generated is as below for function "fun" for both cases .
.LFB1202:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %r12
pushq %rbx
subq $16, %rsp
.cfi_offset 12, -24
.cfi_offset 3, -32
leaq -32(%rbp), %rax
movl $9, %ebx
movq %rax, %r12
jmp .L5
.L6:
movq %r12, %rdi
call _ZN1AC1Ev
addq $1, %r12
subq $1, %rbx
.L5:
cmpq $-1, %rbx
jne .L6
leaq -32(%rbp), %rbx
addq $10, %rbx
.L8:
leaq -32(%rbp), %rax
cmpq %rax, %rbx
je .L4
subq $1, %rbx
movq %rbx, %rdi
call _ZN1AD1Ev
jmp .L8
.L4:
addq $16, %rsp
popq %rbx
popq %r12
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1202:
.size _Z3funv, .-_Z3funv
.globl main
.type main, #function
What is advantage of using noexcept when noexcept and throw are generating the same code ?
They are generating the same code because you are not throwing anything. Your test program is so simple that the compiler can trivially analyze it, determine that it is not throwing an exception, and in fact not doing anything at all! With optimizations enabled (-O1 and higher), the object code:
fun():
rep ret
main:
xor eax, eax
ret
shows that your test code is being optimized simply to the most trivial valid C++ application:
int main()
{
return 0;
}
If you want to really test the difference in object code generation for the two types of exception specifiers, you need to use a real (i.e., non-trivial) test program. Something that actually throws an exception, and where that throw cannot be analyzed out by a bit of compile-time analysis:
void fun(int args) throw() // C++98 style
{
if (args == 0)
{
throw "Not enough arguments!";
}
else
{
// do something
}
}
int main(int argc, char** argv)
{
fun(argc);
return 0;
}
In this code, an exception is conditionally thrown depending on the value of an input parameter (argc) passed to the main function. It is impossible for the compiler to know, at compile-time, what the value of this argument will be, so it cannot optimize out either this conditional check or the throwing of the exception. That forces it to generate exception-throwing and stack-unwinding code.
Now we can compare the resulting object code. Using GCC 5.3, with -O3 and -std=c++11, I get the following:
C++98 style (throw())
.LC0:
.string "Not enough arguments!"
fun(int):
test edi, edi
je .L9
rep ret
.L9:
push rax
mov edi, 8
call __cxa_allocate_exception
xor edx, edx
mov QWORD PTR [rax], OFFSET FLAT:.LC0
mov esi, OFFSET FLAT:typeinfo for char const*
mov rdi, rax
call __cxa_throw
add rdx, 1
mov rdi, rax
je .L4
call _Unwind_Resume
.L4:
call __cxa_call_unexpected
main:
sub rsp, 8
call fun(int)
xor eax, eax
add rsp, 8
ret
C++11 style (noexcept)
.LC0:
.string "Not enough arguments!"
fun(int) [clone .part.0]:
push rax
mov edi, 8
call __cxa_allocate_exception
xor edx, edx
mov QWORD PTR [rax], OFFSET FLAT:.LC0
mov esi, OFFSET FLAT:typeinfo for char const*
mov rdi, rax
call __cxa_throw
fun(int):
test edi, edi
je .L8
rep ret
.L8:
push rax
call fun(int) [clone .part.0]
main:
test edi, edi
je .L12
xor eax, eax
ret
.L12:
push rax
call fun(int) [clone .part.0]
Note that they are clearly different. Just as Meyers et al. have claimed, the C++98 style throw() specification, which indicates that a function does not throw, causes a standards-compliant compiler to emit code to unwind the stack and call std::unexpected when an exception is thrown from inside of that function. That is exactly what happens here. Because fun is marked throw() but in fact does throw, the object code shows the compiler emitting a call to __cxa_call_unexpected.
Clang is also standards-compliant here and does the same thing. I won't reproduce the object code, because it's longer and harder to follow (you can see it on Matt Godbolt's excellent site), but putting the C++98 style exception specification on the function causes the compiler to explicitly call std::terminate if the function throws in violation of its specification, whereas the C++11 style exception specification does not result in a call to std::terminate.

Inline Assembly GCD won't work

I've been writing a simple c++ program that uses Assembly to take the GCD of 2 numbers and output them as an example used in a tutorial I watched. I understand what it's doing, but I don't understand why it won't work.
EDIT: Should add that when it runs, it doesn't output anything at all.
#include <iostream>
using namespace std;
int gcd(int a, int b)
{
int result;
_asm
{
push ebp
mov ebp, esp
mov eax, a
mov ebx, b
looptop:
cmp eax, 0
je goback
cmp eax, ebx
jge modulo
xchg eax, ebx
modulo:
idiv ebx
mov eax, edx
jmp looptop
goback:
mov eax, ebx
mov esp, ebp
pop ebp
mov result, edx
}
return result;
}
int main()
{
cout << gcd(46,90) << endl;
return 0;
}
I'm running it on a 32bit Windows system, any help would be appreciated. When compiling, I get 4 errors:
warning C4731: 'gcd' : frame pointer register 'ebp' modified by inline assembly code
warning C4731: 'gcd' : frame pointer register 'ebp' modified by inline assembly code
warning C4731: 'main' : frame pointer register 'ebp' modified by inline assembly code
warning C4731: 'main' : frame pointer register 'ebp' modified by inline assembly code
The compiler will insert these or equivalent instructions for you at the beginning and end of the function:
push ebp
mov ebp, esp
...
mov esp, ebp
pop ebp
If you add them manually, you won't be able to access the function's parameters through ebp, which is why the compiler is issuing warnings.
Remove these 4 instructions.
Also, start using the debugger. Today.