c++ code compiled with -fsanitize=address crashes - c++

I used gcc 6.3.0 with address sanitizer to compile the following code:
#include <iostream>
int increment(int &x)
{
x++;
return x;
}
int main()
{
int x = 0;
increment(x);
return 0;
}
The code gets compiled and instrumented. Objdump (-S) of the compiled code:
000008d0 <_Z9incrementRi>:
int increment(int &x) {
8d0: 55 push %ebp
8d1: 89 e5 mov %esp,%ebp
8d3: 56 push %esi
8d4: 53 push %ebx
8d5: 83 ec 08 sub $0x8,%esp
8d8: e8 9f 01 00 00 call a7c <__x86.get_pc_thunk.cx>
8dd: 81 c1 b3 89 00 00 add $0x89b3,%ecx
++x;
8e3: 8b 45 08 mov 0x8(%ebp),%eax
8e6: 89 c2 mov %eax,%edx
8e8: c1 ea 03 shr $0x3,%edx
8eb: 81 c2 00 00 00 20 add $0x20000000,%edx
8f1: 0f b6 12 movzbl (%edx),%edx
8f4: 84 d2 test %dl,%dl
8f6: 0f 95 45 f7 setne -0x9(%ebp)
8fa: 89 c6 mov %eax,%esi
8fc: 83 e6 07 and $0x7,%esi
8ff: 8d 5e 03 lea 0x3(%esi),%ebx
902: 38 d3 cmp %dl,%bl
904: 0f 9d c2 setge %dl
907: 22 55 f7 and -0x9(%ebp),%dl
90a: 84 d2 test %dl,%dl
90c: 74 0b je 919 <_Z9incrementRi+0x49>
90e: 83 ec 04 sub $0x4,%esp
911: 50 push %eax
912: 89 cb mov %ecx,%ebx
914: e8 a0 07 00 00 call 10b9 <__asan_report_load4>
919: 8b 45 08 mov 0x8(%ebp),%eax
91c: 8b 00 mov (%eax),%eax
91e: 8d 50 01 lea 0x1(%eax),%edx
921: 8b 45 08 mov 0x8(%ebp),%eax
924: 89 10 mov %edx,(%eax)
}
926: 90 nop
927: 8d 65 f8 lea -0x8(%ebp),%esp
92a: 5b pop %ebx
92b: 5e pop %esi
92c: 5d pop %ebp
92d: c3 ret
0000092e <main>:
int main(void)
{
92e: 8d 4c 24 04 lea 0x4(%esp),%ecx
932: 83 e4 f8 and $0xfffffff8,%esp
935: ff 71 fc pushl -0x4(%ecx)
938: 55 push %ebp
939: 89 e5 mov %esp,%ebp
93b: 57 push %edi
93c: 56 push %esi
93d: 53 push %ebx
93e: 51 push %ecx
93f: 83 ec 60 sub $0x60,%esp
942: e8 39 01 00 00 call a80 <__x86.get_pc_thunk.bx>
947: 81 c3 49 89 00 00 add $0x8949,%ebx
94d: 8d 75 90 lea -0x70(%ebp),%esi
950: 89 f7 mov %esi,%edi
952: 8d 83 d0 05 00 00 lea 0x5d0(%ebx),%eax
958: 83 38 00 cmpl $0x0,(%eax)
95b: 74 13 je 970 <main+0x42>
95d: 83 ec 04 sub $0x4,%esp
960: 6a 60 push $0x60
962: e8 b4 02 00 00 call c1b <__asan_stack_malloc_1>
967: 83 c4 08 add $0x8,%esp
96a: 85 c0 test %eax,%eax
96c: 74 02 je 970 <main+0x42>
96e: 89 c6 mov %eax,%esi
970: 8d 46 60 lea 0x60(%esi),%eax
973: c7 06 b3 8a b5 41 movl $0x41b58ab3,(%esi)
979: 8d 93 d0 e8 ff ff lea -0x1730(%ebx),%edx
97f: 89 56 04 mov %edx,0x4(%esi)
982: 8d 93 9e 76 ff ff lea -0x8962(%ebx),%edx
988: 89 56 08 mov %edx,0x8(%esi)
98b: 89 f3 mov %esi,%ebx
98d: c1 eb 03 shr $0x3,%ebx
990: c7 83 00 00 00 20 f1 movl $0xf1f1f1f1,0x20000000(%ebx)
997: f1 f1 f1
99a: c7 83 04 00 00 20 04 movl $0xf4f4f404,0x20000004(%ebx)
9a1: f4 f4 f4
9a4: c7 83 08 00 00 20 f3 movl $0xf3f3f3f3,0x20000008(%ebx)
9ab: f3 f3 f3
int x = 0;
9ae: c7 40 c0 00 00 00 00 movl $0x0,-0x40(%eax)
increment(x);
9b5: 83 ec 04 sub $0x4,%esp
9b8: 83 e8 40 sub $0x40,%eax
9bb: 50 push %eax
9bc: e8 0f ff ff ff call 8d0 <_Z9incrementRi>
9c1: 83 c4 08 add $0x8,%esp
return 0;
9c4: b8 00 00 00 00 mov $0x0,%eax
{
9c9: 39 f7 cmp %esi,%edi
9cb: 74 26 je 9f3 <main+0xc5>
9cd: c7 06 0e 36 e0 45 movl $0x45e0360e,(%esi)
9d3: c7 83 00 00 00 20 f5 movl $0xf5f5f5f5,0x20000000(%ebx)
9da: f5 f5 f5
9dd: c7 83 04 00 00 20 f5 movl $0xf5f5f5f5,0x20000004(%ebx)
9e4: f5 f5 f5
9e7: c7 83 08 00 00 20 f5 movl $0xf5f5f5f5,0x20000008(%ebx)
9ee: f5 f5 f5
9f1: eb 1e jmp a11 <main+0xe3>
9f3: c7 83 00 00 00 20 00 movl $0x0,0x20000000(%ebx)
9fa: 00 00 00
9fd: c7 83 04 00 00 20 00 movl $0x0,0x20000004(%ebx)
a04: 00 00 00
a07: c7 83 08 00 00 20 00 movl $0x0,0x20000008(%ebx)
a0e: 00 00 00
}
a11: 8d 65 f0 lea -0x10(%ebp),%esp
a14: 59 pop %ecx
a15: 5b pop %ebx
a16: 5e pop %esi
a17: 5f pop %edi
a18: 5d pop %ebp
a19: 8d 61 fc lea -0x4(%ecx),%esp
a1c: c3 ret
Execution crashes on instrumented code at line:
990: c7 83 00 00 00 20 f1 movl $0xf1f1f1f1,0x20000000(%ebx)
before increment(int &x) function is called.
ASAN option "stack-use-after-return" was enabled.
The code was compiled with:
gcc -O0 -g -fsanitize=address main.cpp
If the integer variable x is defined as a global variable the, code doesn't get instrumented and crash does not happen.
Before I posted my question, I found this question, that is very similar to my problem with address sanitizer.
So my question would be:
Why the execution of the code crashed at the mentioned line?
Is it possible that the instrumentation went wrong at some point?
Edit
GCC version and configure flags
Configured with:
../gcc-6.3.0/configure --prefix=/opt/V6.3.0 --target=i686-elf --with-pic
--with-newlib --enable-fully-dynamic-string --enable-languages=c,c++
--disable-initfini-array --disable-nls --disable-shared --disable-multilib
--disable-threads --disable-tls --disable-win32-registry --enable-sjlj-
exceptions --enable-frame-pointer --disable-__cxa_atexit --disable-libgomp
--disable-libquadmath --disable-libssp --disable-libada --disable-libitm
--disable-libstdcxx-verbose --disable-libstdcxx-visibility --with-default-
libstdcxx-abi=gcc4-compatible --without-headers
Thread model: single
gcc version 6.3.0 (GCC)

Related

How are non-static, non-virtual methods implemented in C++?

I wanted to know how methods are implemented in C++. I wanted to know how methods are implemented "under the hood".
So, I have made a simple C++ program which has a class with 1 non static field and 1 non static, non virtual method.
Then I instantiated the class in the main function and called the method. I have used objdump -d option in order to see the CPU instructions of this program. I have a x86-64 processor.
Here's the code:
#include<stdio.h>
class TestClass {
public:
int x;
int xPlus2(){
return x + 2;
}
};
int main(){
TestClass tc1 = {5};
int variable = tc1.xPlus2();
printf("%d \n", variable);
return 0;
}
Here are instructions for the method xPlus2:
0000000000402c30 <_ZN9TestClass6xPlus2Ev>:
402c30: 55 push %rbp
402c31: 48 89 e5 mov %rsp,%rbp
402c34: 48 89 4d 10 mov %rcx,0x10(%rbp)
402c38: 48 8b 45 10 mov 0x10(%rbp),%rax
402c3c: 8b 00 mov (%rax),%eax
402c3e: 83 c0 02 add $0x2,%eax
402c41: 5d pop %rbp
402c42: c3 retq
402c43: 90 nop
402c44: 90 nop
402c45: 90 nop
402c46: 90 nop
402c47: 90 nop
402c48: 90 nop
402c49: 90 nop
402c4a: 90 nop
402c4b: 90 nop
402c4c: 90 nop
402c4d: 90 nop
402c4e: 90 nop
402c4f: 90 nop
If I understand it correctly, these instructions can be replaced by just 3 instructions, because I believe that I don't need to use the stack, I think the compiler used it redundantly:
mov (%rcx), eax
add $2, eax
retq
and then maybe I still need lots of nop instructions for synchronization purposes or whatnot. If you look at the CPU instructions, it looks like the value that x field has is stored at the location in memory which rcx register holds. You will see the rest of the CPU instructions in a moment. It is a little bit hard for me to track what has happened here (especially what is going on with the call of _main function), I don't even know what parts of assembly are important to look at. Compiler produces main function (as I expected), but then it also produced _main function which is called from the main, there are some weird functions in between those two as well.
Here are other parts of the assembly that I think may be interesting:
0000000000401550 <main>:
401550: 55 push %rbp
401551: 48 89 e5 mov %rsp,%rbp
401554: 48 83 ec 30 sub $0x30,%rsp
401558: e8 e3 00 00 00 callq 401640 <__main>
40155d: c7 45 f8 05 00 00 00 movl $0x5,-0x8(%rbp)
401564: 48 8d 45 f8 lea -0x8(%rbp),%rax
401568: 48 89 c1 mov %rax,%rcx
40156b: e8 c0 16 00 00 callq 402c30 <_ZN9TestClass6xPlus2Ev>
401570: 89 45 fc mov %eax,-0x4(%rbp)
401573: 8b 45 fc mov -0x4(%rbp),%eax
401576: 89 c2 mov %eax,%edx
401578: 48 8d 0d 81 2a 00 00 lea 0x2a81(%rip),%rcx # 404000 <.rdata>
40157f: e8 ec 14 00 00 callq 402a70 <printf>
401584: b8 00 00 00 00 mov $0x0,%eax
401589: 48 83 c4 30 add $0x30,%rsp
40158d: 5d pop %rbp
40158e: c3 retq
40158f: 90 nop
0000000000401590 <__do_global_dtors>:
401590: 48 83 ec 28 sub $0x28,%rsp
401594: 48 8b 05 75 1a 00 00 mov 0x1a75(%rip),%rax # 403010 <p.93846>
40159b: 48 8b 00 mov (%rax),%rax
40159e: 48 85 c0 test %rax,%rax
4015a1: 74 1d je 4015c0 <__do_global_dtors+0x30>
4015a3: ff d0 callq *%rax
4015a5: 48 8b 05 64 1a 00 00 mov 0x1a64(%rip),%rax # 403010 <p.93846>
4015ac: 48 8d 50 08 lea 0x8(%rax),%rdx
4015b0: 48 8b 40 08 mov 0x8(%rax),%rax
4015b4: 48 89 15 55 1a 00 00 mov %rdx,0x1a55(%rip) # 403010 <p.93846>
4015bb: 48 85 c0 test %rax,%rax
4015be: 75 e3 jne 4015a3 <__do_global_dtors+0x13>
4015c0: 48 83 c4 28 add $0x28,%rsp
4015c4: c3 retq
4015c5: 90 nop
4015c6: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
4015cd: 00 00 00
00000000004015d0 <__do_global_ctors>:
4015d0: 56 push %rsi
4015d1: 53 push %rbx
4015d2: 48 83 ec 28 sub $0x28,%rsp
4015d6: 48 8b 0d 23 2d 00 00 mov 0x2d23(%rip),%rcx # 404300 <.refptr.__CTOR_LIST__>
4015dd: 48 8b 11 mov (%rcx),%rdx
4015e0: 83 fa ff cmp $0xffffffff,%edx
4015e3: 89 d0 mov %edx,%eax
4015e5: 74 39 je 401620 <__do_global_ctors+0x50>
4015e7: 85 c0 test %eax,%eax
4015e9: 74 20 je 40160b <__do_global_ctors+0x3b>
4015eb: 89 c2 mov %eax,%edx
4015ed: 83 e8 01 sub $0x1,%eax
4015f0: 48 8d 1c d1 lea (%rcx,%rdx,8),%rbx
4015f4: 48 29 c2 sub %rax,%rdx
4015f7: 48 8d 74 d1 f8 lea -0x8(%rcx,%rdx,8),%rsi
4015fc: 0f 1f 40 00 nopl 0x0(%rax)
401600: ff 13 callq *(%rbx)
401602: 48 83 eb 08 sub $0x8,%rbx
401606: 48 39 f3 cmp %rsi,%rbx
401609: 75 f5 jne 401600 <__do_global_ctors+0x30>
40160b: 48 8d 0d 7e ff ff ff lea -0x82(%rip),%rcx # 401590 <__do_global_dtors>
401612: 48 83 c4 28 add $0x28,%rsp
401616: 5b pop %rbx
401617: 5e pop %rsi
401618: e9 f3 fe ff ff jmpq 401510 <atexit>
40161d: 0f 1f 00 nopl (%rax)
401620: 31 c0 xor %eax,%eax
401622: eb 02 jmp 401626 <__do_global_ctors+0x56>
401624: 89 d0 mov %edx,%eax
401626: 44 8d 40 01 lea 0x1(%rax),%r8d
40162a: 4a 83 3c c1 00 cmpq $0x0,(%rcx,%r8,8)
40162f: 4c 89 c2 mov %r8,%rdx
401632: 75 f0 jne 401624 <__do_global_ctors+0x54>
401634: eb b1 jmp 4015e7 <__do_global_ctors+0x17>
401636: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
40163d: 00 00 00
0000000000401640 <__main>:
401640: 8b 05 ea 59 00 00 mov 0x59ea(%rip),%eax # 407030 <initialized>
401646: 85 c0 test %eax,%eax
401648: 74 06 je 401650 <__main+0x10>
40164a: c3 retq
40164b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
401650: c7 05 d6 59 00 00 01 movl $0x1,0x59d6(%rip) # 407030 <initialized>
401657: 00 00 00
40165a: e9 71 ff ff ff jmpq 4015d0 <__do_global_ctors>
40165f: 90 nop
I think what you are looking for are these instructions:
40155d: c7 45 f8 05 00 00 00 movl $0x5,-0x8(%rbp)
401564: 48 8d 45 f8 lea -0x8(%rbp),%rax
401568: 48 89 c1 mov %rax,%rcx
40156b: e8 c0 16 00 00 callq 402c30 <_ZN9TestClass6xPlus2Ev>
401570: 89 45 fc mov %eax,-0x4(%rbp)
These match with the code from main:
TestClass tc1 = {5};
int variable = tc1.xPlus2();
At address 40155d the field tc1.x is initialized with the value 5.
At address 401564 the pointer to tc1 is loaded into the register %rax
At address 401568 the pointer to tc1 is copied into the register %rcx
At address 40156b is the call of the method tc1.xPlus2()
At address 401570 the result is store in variable
Your observations are mostly correct. rcx holds the this pointer to the object on which the method was called. x is stored in the first area of memory that the this pointer points to, so that is why rcx was dereferenced and the result added to. It is the responsibility of the caller to make sure that rcx is the address of the object before invoking the function. We can see main prepare rcx by setting it to an address in its stack frame. You are correct that the compiler produced inefficient code here and did not need to use the stack. Compiling with higher optimization levels -O1, -O2, or -O3 will likely fix that. These higher optimizations will probably get rid of the nops too, since they are used for function alignment. You can mostly ignore __main. It's used for libc initialization.

add #if defined preprocessor directives will affect alignment of class member

I have a class which has many members in it
int latest_encode_usage_ = 67;
int perf_target_framerate_ = 0;
std::map<uint8_t, uint16_t> pre_dlbitrate_;
#if defined(WEBRTC_TRANSCODE_CASE)
bool is_screen_share_;
#endif
and I have defined WEBRTC_TRANSCODE_CASE in build.gn
defines += ["WEBRTC_TRANSCODE_CASE"]
so the is_screen_share_ is visible in the class, I compile this code with RelWithDebInfo and got a shared library called liba.so, then I remove the preprocessor directives
bool is_perf_adaption_avalaible_;
int latest_encode_usage_ = 67;
int perf_target_framerate_ = 0;
std::map<uint8_t, uint16_t> pre_dlbitrate_;
bool is_screen_share_
and compile the code with RelWithDebInfo got libb.so, when I execute the command diff <(objdump -d liba.so) <(objdump -d libb.so) and got
1714248c1714248
< 7225b9: bf b8 0f 00 00 mov $0xfb8,%edi
---
> 7225b9: bf c0 0f 00 00 mov $0xfc0,%edi
, it seems 8 bytes different, but why ? I just don't understand. It's part of liba.so's objdump:
0000000000722580 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11ContentTypeE>:
722580: 55 push %rbp
722581: 48 89 e5 mov %rsp,%rbp
722584: 41 57 push %r15
722586: 41 56 push %r14
722588: 41 55 push %r13
72258a: 41 54 push %r12
72258c: 49 89 d5 mov %rdx,%r13
72258f: 53 push %rbx
722590: 48 89 fb mov %rdi,%rbx
722593: bf 90 00 00 00 mov $0x90,%edi
722598: 49 89 cf mov %rcx,%r15
72259b: 41 89 f6 mov %esi,%r14d
72259e: 48 83 ec 28 sub $0x28,%rsp
7225a2: 44 89 45 bc mov %r8d,-0x44(%rbp)
7225a6: e8 e5 0a 61 00 callq d33090 <_Znwm>
7225ab: 4c 89 ee mov %r13,%rsi
7225ae: 48 89 c7 mov %rax,%rdi
7225b1: 49 89 c4 mov %rax,%r12
7225b4: e8 17 0a 00 00 callq 722fd0 <_ZN6webrtc20OveruseFrameDetectorC1EPNS_25CpuOveruseMetricsObserverE>
7225b9: bf b8 0f 00 00 mov $0xfb8,%edi
7225be: 4c 89 65 c8 mov %r12,-0x38(%rbp)
7225c2: e8 c9 0a 61 00 callq d33090 <_Znwm>
7225c7: 44 8b 45 bc mov -0x44(%rbp),%r8d
7225cb: 48 89 c7 mov %rax,%rdi
7225ce: 4c 89 f9 mov %r15,%rcx
7225d1: 4c 89 ea mov %r13,%rdx
7225d4: 44 89 f6 mov %r14d,%esi
7225d7: 49 89 c4 mov %rax,%r12
7225da: 45 89 c1 mov %r8d,%r9d
7225dd: 4c 8d 45 c8 lea -0x38(%rbp),%r8
7225e1: e8 3a 3f 00 00 callq 726520 <_ZN6webrtc18VideoStreamEncoderC1EjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsESt10unique_ptrINS_20OveruseFra
7225e6: 48 8b 7d c8 mov -0x38(%rbp),%rdi
7225ea: 48 85 ff test %rdi,%rdi
7225ed: 74 06 je 7225f5 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11C
it's part of libb.so's objdump:
0000000000722580 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11ContentTypeE>:
722580: 55 push %rbp
722581: 48 89 e5 mov %rsp,%rbp
722584: 41 57 push %r15
722586: 41 56 push %r14
722588: 41 55 push %r13
72258a: 41 54 push %r12
72258c: 49 89 d5 mov %rdx,%r13
72258f: 53 push %rbx
722590: 48 89 fb mov %rdi,%rbx
722593: bf 90 00 00 00 mov $0x90,%edi
722598: 49 89 cf mov %rcx,%r15
72259b: 41 89 f6 mov %esi,%r14d
72259e: 48 83 ec 28 sub $0x28,%rsp
7225a2: 44 89 45 bc mov %r8d,-0x44(%rbp)
7225a6: e8 e5 0a 61 00 callq d33090 <_Znwm>
7225ab: 4c 89 ee mov %r13,%rsi
7225ae: 48 89 c7 mov %rax,%rdi
7225b1: 49 89 c4 mov %rax,%r12
7225b4: e8 17 0a 00 00 callq 722fd0 <_ZN6webrtc20OveruseFrameDetectorC1EPNS_25CpuOveruseMetricsObserverE>
7225b9: bf c0 0f 00 00 mov $0xfc0,%edi
7225be: 4c 89 65 c8 mov %r12,-0x38(%rbp)
7225c2: e8 c9 0a 61 00 callq d33090 <_Znwm>
7225c7: 44 8b 45 bc mov -0x44(%rbp),%r8d
7225cb: 48 89 c7 mov %rax,%rdi
7225ce: 4c 89 f9 mov %r15,%rcx
7225d1: 4c 89 ea mov %r13,%rdx
7225d4: 44 89 f6 mov %r14d,%esi
7225d7: 49 89 c4 mov %rax,%r12
7225da: 45 89 c1 mov %r8d,%r9d
7225dd: 4c 8d 45 c8 lea -0x38(%rbp),%r8
7225e1: e8 3a 3f 00 00 callq 726520 <_ZN6webrtc18VideoStreamEncoderC1EjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsESt10unique_ptrINS_20OveruseFra
7225e6: 48 8b 7d c8 mov -0x38(%rbp),%rdi
7225ea: 48 85 ff test %rdi,%rdi
7225ed: 74 06 je 7225f5 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11C.
Any help will be appreciateed!

What does assembly code of function "do_compare" exactly do?

The do_compare function is in the libstdc++ library. It basically checks two strings and returns -1, 1, or 0 accordingly.
Here is the C++ code:
template<typename _CharT>
int
collate<_CharT>::
do_compare(const _CharT* __lo1, const _CharT* __hi1,
const _CharT* __lo2, const _CharT* __hi2) const
{
// strcoll assumes zero-terminated strings so we make a copy
// and then put a zero at the end.
const string_type __one(__lo1, __hi1);
const string_type __two(__lo2, __hi2);
const _CharT* __p = __one.c_str();
const _CharT* __pend = __one.data() + __one.length();
const _CharT* __q = __two.c_str();
const _CharT* __qend = __two.data() + __two.length();
// strcoll stops when it sees a nul character so we break
// the strings into zero-terminated substrings and pass those
// to strcoll.
for (;;)
{
const int __res = _M_compare(__p, __q);
if (__res)
return __res;
__p += char_traits<_CharT>::length(__p);
__q += char_traits<_CharT>::length(__q);
if (__p == __pend && __q == __qend)
return 0;
else if (__p == __pend)
return -1;
else if (__q == __qend)
return 1;
__p++;
__q++;
}
}
I have to put the entire assembly code of do_compare to show my problem, sorry:
0000000000101c40 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4>:
101c40: 41 57 push %r15
101c42: 41 56 push %r14
101c44: 49 89 fe mov %rdi,%r14
101c47: 48 89 f7 mov %rsi,%rdi
101c4a: 48 89 d6 mov %rdx,%rsi
101c4d: 41 55 push %r13
101c4f: 41 54 push %r12
101c51: 55 push %rbp
101c52: 4c 89 c5 mov %r8,%rbp
101c55: 53 push %rbx
101c56: 48 89 cb mov %rcx,%rbx
101c59: 48 83 ec 38 sub $0x38,%rsp
101c5d: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
101c64: 00 00
101c66: 48 89 44 24 28 mov %rax,0x28(%rsp)
101c6b: 31 c0 xor %eax,%eax
101c6d: 4c 8d 6c 24 27 lea 0x27(%rsp),%r13
101c72: 4c 89 ea mov %r13,%rdx
101c75: 4c 89 6c 24 18 mov %r13,0x18(%rsp)
101c7a: e8 f1 a2 f8 ff callq 8bf70 <_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag#plt>
101c7f: 4c 89 ea mov %r13,%rdx
101c82: 48 89 ee mov %rbp,%rsi
101c85: 48 89 df mov %rbx,%rdi
101c88: 49 89 c7 mov %rax,%r15
101c8b: 48 89 44 24 08 mov %rax,0x8(%rsp)
101c90: e8 db a2 f8 ff callq 8bf70 <_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag#plt>
101c95: 4d 8b 67 e8 mov -0x18(%r15),%r12
101c99: 4c 8b 68 e8 mov -0x18(%rax),%r13
101c9d: 48 89 c5 mov %rax,%rbp
101ca0: 48 89 44 24 10 mov %rax,0x10(%rsp)
101ca5: 4c 89 fb mov %r15,%rbx
101ca8: 4d 01 fc add %r15,%r12
101cab: 49 01 c5 add %rax,%r13
101cae: eb 32 jmp 101ce2 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xa2>
101cb0: 48 89 df mov %rbx,%rdi
101cb3: e8 98 87 f8 ff callq 8a450 <strlen#plt>
101cb8: 48 89 ef mov %rbp,%rdi
101cbb: 48 01 c3 add %rax,%rbx
101cbe: e8 8d 87 f8 ff callq 8a450 <strlen#plt>
101cc3: 48 01 c5 add %rax,%rbp
101cc6: 49 39 dc cmp %rbx,%r12
101cc9: 75 05 jne 101cd0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x90>
101ccb: 49 39 ed cmp %rbp,%r13
101cce: 74 27 je 101cf7 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xb7>
101cd0: 49 39 dc cmp %rbx,%r12
101cd3: 74 6b je 101d40 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x100>
101cd5: 49 39 ed cmp %rbp,%r13
101cd8: 74 76 je 101d50 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x110>
101cda: 48 83 c3 01 add $0x1,%rbx
101cde: 48 83 c5 01 add $0x1,%rbp
101ce2: 48 89 ea mov %rbp,%rdx
101ce5: 48 89 de mov %rbx,%rsi
101ce8: 4c 89 f7 mov %r14,%rdi
101ceb: e8 20 8b f8 ff callq 8a810 <_ZNKSt7collateIcE10_M_compareEPKcS2_#plt>
101cf0: 41 89 c7 mov %eax,%r15d
101cf3: 85 c0 test %eax,%eax
101cf5: 74 b9 je 101cb0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x70>
101cf7: 48 8b 7c 24 10 mov 0x10(%rsp),%rdi
101cfc: 48 8b 1d 9d 08 28 00 mov 0x28089d(%rip),%rbx # 3825a0 <_ZNSs4_Rep20_S_empty_rep_storageE##GLIBCXX_3.4-0x57e0>
101d03: 48 83 ef 18 sub $0x18,%rdi
101d07: 48 39 df cmp %rbx,%rdi
101d0a: 75 54 jne 101d60 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x120>
101d0c: 48 8b 7c 24 08 mov 0x8(%rsp),%rdi
101d11: 48 83 ef 18 sub $0x18,%rdi
101d15: 48 39 df cmp %rbx,%rdi
101d18: 75 56 jne 101d70 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x130>
101d1a: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx
101d1f: 64 48 33 0c 25 28 00 xor %fs:0x28,%rcx
101d26: 00 00
101d28: 44 89 f8 mov %r15d,%eax
101d2b: 75 4f jne 101d7c <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x13c>
101d2d: 48 83 c4 38 add $0x38,%rsp
101d31: 5b pop %rbx
101d32: 5d pop %rbp
101d33: 41 5c pop %r12
101d35: 41 5d pop %r13
101d37: 41 5e pop %r14
101d39: 41 5f pop %r15
101d3b: c3 retq
101d3c: 0f 1f 40 00 nopl 0x0(%rax)
101d40: 41 bf ff ff ff ff mov $0xffffffff,%r15d
101d46: eb af jmp 101cf7 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xb7>
101d48: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
101d4f: 00
101d50: 41 bf 01 00 00 00 mov $0x1,%r15d
101d56: eb 9f jmp 101cf7 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xb7>
101d58: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
101d5f: 00
101d60: 48 8b 74 24 18 mov 0x18(%rsp),%rsi
101d65: e8 96 fe ff ff callq 101c00 <_ZNSt14codecvt_bynameIcc11__mbstate_tED0Ev##GLIBCXX_3.4+0x20>
101d6a: eb a0 jmp 101d0c <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xcc>
101d6c: 0f 1f 40 00 nopl 0x0(%rax)
101d70: 48 8b 74 24 18 mov 0x18(%rsp),%rsi
101d75: e8 86 fe ff ff callq 101c00 <_ZNSt14codecvt_bynameIcc11__mbstate_tED0Ev##GLIBCXX_3.4+0x20>
101d7a: eb 9e jmp 101d1a <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xda>
101d7c: e8 7f 95 f8 ff callq 8b300 <__stack_chk_fail#plt>
101d81: 48 89 c3 mov %rax,%rbx
101d84: 48 8b 7c 24 08 mov 0x8(%rsp),%rdi
101d89: 48 83 ef 18 sub $0x18,%rdi
101d8d: 48 3b 3d 0c 08 28 00 cmp 0x28080c(%rip),%rdi # 3825a0 <_ZNSs4_Rep20_S_empty_rep_storageE##GLIBCXX_3.4-0x57e0>
101d94: 74 0a je 101da0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x160>
101d96: 48 8b 74 24 18 mov 0x18(%rsp),%rsi
101d9b: e8 60 fe ff ff callq 101c00 <_ZNSt14codecvt_bynameIcc11__mbstate_tED0Ev##GLIBCXX_3.4+0x20>
101da0: 48 89 df mov %rbx,%rdi
101da3: e8 e8 a1 f8 ff callq 8bf90 <_Unwind_Resume#plt>
101da8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
101daf: 00
*******101db0: 53 push %rbx
101db1: 48 89 fb mov %rdi,%rbx
101db4: 48 8b 3f mov (%rdi),%rdi
101db7: 89 f0 mov %esi,%eax
101db9: 48 85 ff test %rdi,%rdi
101dbc: 74 05 je 101dc3 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x183>
101dbe: 83 fe ff cmp $0xffffffff,%esi
101dc1: 74 05 je 101dc8 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x188>
101dc3: 5b pop %rbx
101dc4: c3 retq
101dc5: 0f 1f 00 nopl (%rax)
101dc8: 48 8b 47 10 mov 0x10(%rdi),%rax
101dcc: 48 3b 47 18 cmp 0x18(%rdi),%rax
101dd0: 73 0e jae 101de0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x1a0>
101dd2: 0f b6 00 movzbl (%rax),%eax
101dd5: 5b pop %rbx
101dd6: c3 retq
101dd7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
101dde: 00 00
101de0: 48 8b 07 mov (%rdi),%rax
101de3: ff 50 48 callq *0x48(%rax)
101de6: 83 f8 ff cmp $0xffffffff,%eax
101de9: 75 d8 jne 101dc3 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x183>
101deb: 48 c7 03 00 00 00 00 movq $0x0,(%rbx)
101df2: 5b pop %rbx
101df3: c3 retq
101df4: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
101dfb: 00 00 00
101dfe: 66 90 xchg %ax,%ax
101e00: 55 push %rbp
101e01: 89 f5 mov %esi,%ebp
101e03: 53 push %rbx
101e04: 48 89 fb mov %rdi,%rbx
101e07: 48 83 ec 08 sub $0x8,%rsp
101e0b: e8 b0 88 f8 ff callq 8a6c0 <_ZNKSt5ctypeIcE13_M_widen_initEv#plt>
101e10: 48 8b 03 mov (%rbx),%rax
101e13: 48 8b 40 30 mov 0x30(%rax),%rax
101e17: 48 3b 05 7a 11 28 00 cmp 0x28117a(%rip),%rax # 382f98 <_ZNKSt5ctypeIcE8do_widenEc##GLIBCXX_3.4+0x2e2c48>
101e1e: 75 10 jne 101e30 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x1f0>
101e20: 48 83 c4 08 add $0x8,%rsp
101e24: 89 e8 mov %ebp,%eax
101e26: 5b pop %rbx
101e27: 5d pop %rbp
101e28: c3 retq
101e29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
101e30: 48 83 c4 08 add $0x8,%rsp
101e34: 40 0f be f5 movsbl %bpl,%esi
101e38: 48 89 df mov %rbx,%rdi
101e3b: 5b pop %rbx
101e3c: 5d pop %rbp
101e3d: ff e0 jmpq *%rax
101e3f: 90 nop
It seems to me that the assembly code not only performs the C++ code logic but also adds other logic.
As an example, the function _M_extract_int in libstdc++ which coverts a char to int calls this function as the following:
callq 0x101db0
The instruction address 0x101db0 is in the middle of the assembly code. The code section from 0x101db0 to 0x101dbc seems to have nothing to do with the above C++ code. Really confused about what is going on here...

Member initializer list, pointer initialization without argument

In a large framework which used to use many smart pointers and now uses raw pointers, I come across situations like this quite often:
class A {
public:
int* m;
A() : m() {}
};
The reason is because int* m used to be a smart pointer and so the initializer list called a default constructor. Now that int* m is a raw pointer I am not certain if this is equivalent to:
class A {
public:
int* m;
A() : m(nullptr) {}
};
Without the explicit nullptr is A::m still initialized to zero? A look at no optimization objdump -d makes it appear to be yes but I am not certain. The reason I feel that the answer is yes is due to this line in the objdump -d (I posted more of the objdump -d below):
400644: 48 c7 00 00 00 00 00 movq $0x0,(%rax)
Little program that tries to find undefined behavior:
class A {
public:
int* m;
A() : m(nullptr) {}
};
int main() {
A buf[1000000];
unsigned int count = 0;
for (unsigned int i = 0; i < 1000000; ++i) {
count += buf[i].m ? 1 : 0;
}
return count;
}
Compilation, execution, and return value:
g++ -std=c++14 -O0 foo.cpp
./a.out; echo $?
0
Relevant assembly sections from objdump -d:
00000000004005b8 <main>:
4005b8: 55 push %rbp
4005b9: 48 89 e5 mov %rsp,%rbp
4005bc: 41 54 push %r12
4005be: 53 push %rbx
4005bf: 48 81 ec 10 12 7a 00 sub $0x7a1210,%rsp
4005c6: 48 8d 85 e0 ed 85 ff lea -0x7a1220(%rbp),%rax
4005cd: bb 3f 42 0f 00 mov $0xf423f,%ebx
4005d2: 49 89 c4 mov %rax,%r12
4005d5: eb 10 jmp 4005e7 <main+0x2f>
4005d7: 4c 89 e7 mov %r12,%rdi
4005da: e8 59 00 00 00 callq 400638 <_ZN1AC1Ev>
4005df: 49 83 c4 08 add $0x8,%r12
4005e3: 48 83 eb 01 sub $0x1,%rbx
4005e7: 48 83 fb ff cmp $0xffffffffffffffff,%rbx
4005eb: 75 ea jne 4005d7 <main+0x1f>
4005ed: c7 45 ec 00 00 00 00 movl $0x0,-0x14(%rbp)
4005f4: c7 45 e8 00 00 00 00 movl $0x0,-0x18(%rbp)
4005fb: eb 23 jmp 400620 <main+0x68>
4005fd: 8b 45 e8 mov -0x18(%rbp),%eax
400600: 48 8b 84 c5 e0 ed 85 mov -0x7a1220(%rbp,%rax,8),%rax
400607: ff
400608: 48 85 c0 test %rax,%rax
40060b: 74 07 je 400614 <main+0x5c>
40060d: b8 01 00 00 00 mov $0x1,%eax
400612: eb 05 jmp 400619 <main+0x61>
400614: b8 00 00 00 00 mov $0x0,%eax
400619: 01 45 ec add %eax,-0x14(%rbp)
40061c: 83 45 e8 01 addl $0x1,-0x18(%rbp)
400620: 81 7d e8 3f 42 0f 00 cmpl $0xf423f,-0x18(%rbp)
400627: 76 d4 jbe 4005fd <main+0x45>
400629: 8b 45 ec mov -0x14(%rbp),%eax
40062c: 48 81 c4 10 12 7a 00 add $0x7a1210,%rsp
400633: 5b pop %rbx
400634: 41 5c pop %r12
400636: 5d pop %rbp
400637: c3 retq
0000000000400638 <_ZN1AC1Ev>:
400638: 55 push %rbp
400639: 48 89 e5 mov %rsp,%rbp
40063c: 48 89 7d f8 mov %rdi,-0x8(%rbp)
400640: 48 8b 45 f8 mov -0x8(%rbp),%rax
400644: 48 c7 00 00 00 00 00 movq $0x0,(%rax)
40064b: 5d pop %rbp
40064c: c3 retq
40064d: 0f 1f 00 nopl (%rax)
Empty () initializer stands for default-initialization in C++98 and for value-initialization in C++03 and later. For scalar types (including pointers) value-initialization/default-initialization leads to zero-initialization.
Which means that in your case m() and m(nullptr) will have exactly the same effect: in both cases m is initialized as a null pointer. In C++ it was like that since the beginning of standardized times.

SSE2 movddup Not Moving Values

Can't someone explain to me why the output of this program is [nan, nan]? The code is supposed to load the value of d into the high and low 64-bits of the XMM1 register and then move the contents of XMM1 into a. Because a is not initialized to a set of specific values, D initializes each element to nan. If the movupd instruction was not in the objdump, I would understand the result, but the instruction is there. Thoughts?
import std.stdio;
void main()
{
enum double d = 1.0 / cast(double)2;
double[] a = new double[2];
auto aptr = a.ptr;
asm
{
movddup XMM1, d;
movupd [aptr], XMM1;
}
writeln(a);
}
Here is the objdump of the main function:
0000000000426b88 <_Dmain>:
426b88: 55 push %rbp
426b89: 48 8b ec mov %rsp,%rbp
426b8c: 48 83 ec 50 sub $0x50,%rsp
426b90: f2 48 0f 10 05 77 81 rex.W movsd 0x28177(%rip),%xmm0
426b97: 02 00
426b99: f2 48 0f 11 45 b0 rex.W movsd %xmm0,-0x50(%rbp)
426b9f: 48 be 02 00 00 00 00 movabs $0x2,%rsi
426ba6: 00 00 00
426ba9: f2 48 0f 10 05 66 81 rex.W movsd 0x28166(%rip),%xmm0
426bb0: 02 00
426bb2: 48 8d 7d c0 lea -0x40(%rbp),%rdi
426bb6: e8 65 d1 00 00 callq 433d20 <_memsetDouble>
426bbb: f2 48 0f 10 0d 4c 81 rex.W movsd 0x2814c(%rip),%xmm1
426bc2: 02 00
426bc4: f2 48 0f 11 4d c0 rex.W movsd %xmm1,-0x40(%rbp)
426bca: f2 48 0f 10 15 3d 81 rex.W movsd 0x2813d(%rip),%xmm2
426bd1: 02 00
426bd3: f2 48 0f 11 55 c8 rex.W movsd %xmm2,-0x38(%rbp)
426bd9: 48 8d 45 c0 lea -0x40(%rbp),%rax
426bdd: 48 89 45 d0 mov %rax,-0x30(%rbp)
426be1: 48 8d 55 e0 lea -0x20(%rbp),%rdx
426be5: 48 b8 02 00 00 00 00 movabs $0x2,%rax
426bec: 00 00 00
426bef: 48 89 c1 mov %rax,%rcx
426bf2: 49 89 d0 mov %rdx,%r8
426bf5: 51 push %rcx
426bf6: 41 50 push %r8
426bf8: 48 be 02 00 00 00 00 movabs $0x2,%rsi
426bff: 00 00 00
426c02: 48 bf c0 84 65 00 00 movabs $0x6584c0,%rdi
426c09: 00 00 00
426c0c: e8 87 ce 00 00 callq 433a98 <_d_arrayliteralTX>
426c11: 48 89 45 f0 mov %rax,-0x10(%rbp)
426c15: f2 48 0f 10 05 02 81 rex.W movsd 0x28102(%rip),%xmm0
426c1c: 02 00
426c1e: f2 48 0f 11 00 rex.W movsd %xmm0,(%rax)
426c23: f2 48 0f 10 0d f4 80 rex.W movsd 0x280f4(%rip),%xmm1
426c2a: 02 00
426c2c: 48 8b 45 f0 mov -0x10(%rbp),%rax
426c30: f2 48 0f 11 48 08 rex.W movsd %xmm1,0x8(%rax)
426c36: 48 8b 55 f0 mov -0x10(%rbp),%rdx
426c3a: 48 be 02 00 00 00 00 movabs $0x2,%rsi
426c41: 00 00 00
426c44: 41 58 pop %r8
426c46: 59 pop %rcx
426c47: 48 bf 08 00 00 00 00 movabs $0x8,%rdi
426c4e: 00 00 00
426c51: e8 8e 95 00 00 callq 4301e4 <_d_arraycopy>
426c56: f2 0f 12 4d b0 movddup -0x50(%rbp),%xmm1
426c5b: 66 0f 11 4d d0 movupd %xmm1,-0x30(%rbp)
426c60: ff 75 c8 pushq -0x38(%rbp)
426c63: ff 75 c0 pushq -0x40(%rbp)
426c66: e8 09 00 00 00 callq 426c74 <_D3std5stdio16__T7writelnTG2dZ7writelnFG2dZv>
426c6b: 48 83 c4 10 add $0x10,%rsp
426c6f: 31 c0 xor %eax,%eax
426c71: c9 leaveq
426c72: c3 retq
426c73: 90 nop
I looked into it, and apparently the compiler decides that by movupd [aptr], XMM1 you really mean movupd aptr, XMM1. Loading aptr into a register beforehand (mov aptr, RAX; movupd [RAX], XMM1) will make it work.
You should probably file a bug report.