The do_compare function is in the libstdc++ library. It basically checks two strings and returns -1, 1, or 0 accordingly.
Here is the C++ code:
template<typename _CharT>
int
collate<_CharT>::
do_compare(const _CharT* __lo1, const _CharT* __hi1,
const _CharT* __lo2, const _CharT* __hi2) const
{
// strcoll assumes zero-terminated strings so we make a copy
// and then put a zero at the end.
const string_type __one(__lo1, __hi1);
const string_type __two(__lo2, __hi2);
const _CharT* __p = __one.c_str();
const _CharT* __pend = __one.data() + __one.length();
const _CharT* __q = __two.c_str();
const _CharT* __qend = __two.data() + __two.length();
// strcoll stops when it sees a nul character so we break
// the strings into zero-terminated substrings and pass those
// to strcoll.
for (;;)
{
const int __res = _M_compare(__p, __q);
if (__res)
return __res;
__p += char_traits<_CharT>::length(__p);
__q += char_traits<_CharT>::length(__q);
if (__p == __pend && __q == __qend)
return 0;
else if (__p == __pend)
return -1;
else if (__q == __qend)
return 1;
__p++;
__q++;
}
}
I have to put the entire assembly code of do_compare to show my problem, sorry:
0000000000101c40 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4>:
101c40: 41 57 push %r15
101c42: 41 56 push %r14
101c44: 49 89 fe mov %rdi,%r14
101c47: 48 89 f7 mov %rsi,%rdi
101c4a: 48 89 d6 mov %rdx,%rsi
101c4d: 41 55 push %r13
101c4f: 41 54 push %r12
101c51: 55 push %rbp
101c52: 4c 89 c5 mov %r8,%rbp
101c55: 53 push %rbx
101c56: 48 89 cb mov %rcx,%rbx
101c59: 48 83 ec 38 sub $0x38,%rsp
101c5d: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
101c64: 00 00
101c66: 48 89 44 24 28 mov %rax,0x28(%rsp)
101c6b: 31 c0 xor %eax,%eax
101c6d: 4c 8d 6c 24 27 lea 0x27(%rsp),%r13
101c72: 4c 89 ea mov %r13,%rdx
101c75: 4c 89 6c 24 18 mov %r13,0x18(%rsp)
101c7a: e8 f1 a2 f8 ff callq 8bf70 <_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag#plt>
101c7f: 4c 89 ea mov %r13,%rdx
101c82: 48 89 ee mov %rbp,%rsi
101c85: 48 89 df mov %rbx,%rdi
101c88: 49 89 c7 mov %rax,%r15
101c8b: 48 89 44 24 08 mov %rax,0x8(%rsp)
101c90: e8 db a2 f8 ff callq 8bf70 <_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag#plt>
101c95: 4d 8b 67 e8 mov -0x18(%r15),%r12
101c99: 4c 8b 68 e8 mov -0x18(%rax),%r13
101c9d: 48 89 c5 mov %rax,%rbp
101ca0: 48 89 44 24 10 mov %rax,0x10(%rsp)
101ca5: 4c 89 fb mov %r15,%rbx
101ca8: 4d 01 fc add %r15,%r12
101cab: 49 01 c5 add %rax,%r13
101cae: eb 32 jmp 101ce2 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xa2>
101cb0: 48 89 df mov %rbx,%rdi
101cb3: e8 98 87 f8 ff callq 8a450 <strlen#plt>
101cb8: 48 89 ef mov %rbp,%rdi
101cbb: 48 01 c3 add %rax,%rbx
101cbe: e8 8d 87 f8 ff callq 8a450 <strlen#plt>
101cc3: 48 01 c5 add %rax,%rbp
101cc6: 49 39 dc cmp %rbx,%r12
101cc9: 75 05 jne 101cd0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x90>
101ccb: 49 39 ed cmp %rbp,%r13
101cce: 74 27 je 101cf7 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xb7>
101cd0: 49 39 dc cmp %rbx,%r12
101cd3: 74 6b je 101d40 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x100>
101cd5: 49 39 ed cmp %rbp,%r13
101cd8: 74 76 je 101d50 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x110>
101cda: 48 83 c3 01 add $0x1,%rbx
101cde: 48 83 c5 01 add $0x1,%rbp
101ce2: 48 89 ea mov %rbp,%rdx
101ce5: 48 89 de mov %rbx,%rsi
101ce8: 4c 89 f7 mov %r14,%rdi
101ceb: e8 20 8b f8 ff callq 8a810 <_ZNKSt7collateIcE10_M_compareEPKcS2_#plt>
101cf0: 41 89 c7 mov %eax,%r15d
101cf3: 85 c0 test %eax,%eax
101cf5: 74 b9 je 101cb0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x70>
101cf7: 48 8b 7c 24 10 mov 0x10(%rsp),%rdi
101cfc: 48 8b 1d 9d 08 28 00 mov 0x28089d(%rip),%rbx # 3825a0 <_ZNSs4_Rep20_S_empty_rep_storageE##GLIBCXX_3.4-0x57e0>
101d03: 48 83 ef 18 sub $0x18,%rdi
101d07: 48 39 df cmp %rbx,%rdi
101d0a: 75 54 jne 101d60 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x120>
101d0c: 48 8b 7c 24 08 mov 0x8(%rsp),%rdi
101d11: 48 83 ef 18 sub $0x18,%rdi
101d15: 48 39 df cmp %rbx,%rdi
101d18: 75 56 jne 101d70 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x130>
101d1a: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx
101d1f: 64 48 33 0c 25 28 00 xor %fs:0x28,%rcx
101d26: 00 00
101d28: 44 89 f8 mov %r15d,%eax
101d2b: 75 4f jne 101d7c <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x13c>
101d2d: 48 83 c4 38 add $0x38,%rsp
101d31: 5b pop %rbx
101d32: 5d pop %rbp
101d33: 41 5c pop %r12
101d35: 41 5d pop %r13
101d37: 41 5e pop %r14
101d39: 41 5f pop %r15
101d3b: c3 retq
101d3c: 0f 1f 40 00 nopl 0x0(%rax)
101d40: 41 bf ff ff ff ff mov $0xffffffff,%r15d
101d46: eb af jmp 101cf7 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xb7>
101d48: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
101d4f: 00
101d50: 41 bf 01 00 00 00 mov $0x1,%r15d
101d56: eb 9f jmp 101cf7 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xb7>
101d58: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
101d5f: 00
101d60: 48 8b 74 24 18 mov 0x18(%rsp),%rsi
101d65: e8 96 fe ff ff callq 101c00 <_ZNSt14codecvt_bynameIcc11__mbstate_tED0Ev##GLIBCXX_3.4+0x20>
101d6a: eb a0 jmp 101d0c <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xcc>
101d6c: 0f 1f 40 00 nopl 0x0(%rax)
101d70: 48 8b 74 24 18 mov 0x18(%rsp),%rsi
101d75: e8 86 fe ff ff callq 101c00 <_ZNSt14codecvt_bynameIcc11__mbstate_tED0Ev##GLIBCXX_3.4+0x20>
101d7a: eb 9e jmp 101d1a <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xda>
101d7c: e8 7f 95 f8 ff callq 8b300 <__stack_chk_fail#plt>
101d81: 48 89 c3 mov %rax,%rbx
101d84: 48 8b 7c 24 08 mov 0x8(%rsp),%rdi
101d89: 48 83 ef 18 sub $0x18,%rdi
101d8d: 48 3b 3d 0c 08 28 00 cmp 0x28080c(%rip),%rdi # 3825a0 <_ZNSs4_Rep20_S_empty_rep_storageE##GLIBCXX_3.4-0x57e0>
101d94: 74 0a je 101da0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x160>
101d96: 48 8b 74 24 18 mov 0x18(%rsp),%rsi
101d9b: e8 60 fe ff ff callq 101c00 <_ZNSt14codecvt_bynameIcc11__mbstate_tED0Ev##GLIBCXX_3.4+0x20>
101da0: 48 89 df mov %rbx,%rdi
101da3: e8 e8 a1 f8 ff callq 8bf90 <_Unwind_Resume#plt>
101da8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
101daf: 00
*******101db0: 53 push %rbx
101db1: 48 89 fb mov %rdi,%rbx
101db4: 48 8b 3f mov (%rdi),%rdi
101db7: 89 f0 mov %esi,%eax
101db9: 48 85 ff test %rdi,%rdi
101dbc: 74 05 je 101dc3 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x183>
101dbe: 83 fe ff cmp $0xffffffff,%esi
101dc1: 74 05 je 101dc8 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x188>
101dc3: 5b pop %rbx
101dc4: c3 retq
101dc5: 0f 1f 00 nopl (%rax)
101dc8: 48 8b 47 10 mov 0x10(%rdi),%rax
101dcc: 48 3b 47 18 cmp 0x18(%rdi),%rax
101dd0: 73 0e jae 101de0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x1a0>
101dd2: 0f b6 00 movzbl (%rax),%eax
101dd5: 5b pop %rbx
101dd6: c3 retq
101dd7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
101dde: 00 00
101de0: 48 8b 07 mov (%rdi),%rax
101de3: ff 50 48 callq *0x48(%rax)
101de6: 83 f8 ff cmp $0xffffffff,%eax
101de9: 75 d8 jne 101dc3 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x183>
101deb: 48 c7 03 00 00 00 00 movq $0x0,(%rbx)
101df2: 5b pop %rbx
101df3: c3 retq
101df4: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
101dfb: 00 00 00
101dfe: 66 90 xchg %ax,%ax
101e00: 55 push %rbp
101e01: 89 f5 mov %esi,%ebp
101e03: 53 push %rbx
101e04: 48 89 fb mov %rdi,%rbx
101e07: 48 83 ec 08 sub $0x8,%rsp
101e0b: e8 b0 88 f8 ff callq 8a6c0 <_ZNKSt5ctypeIcE13_M_widen_initEv#plt>
101e10: 48 8b 03 mov (%rbx),%rax
101e13: 48 8b 40 30 mov 0x30(%rax),%rax
101e17: 48 3b 05 7a 11 28 00 cmp 0x28117a(%rip),%rax # 382f98 <_ZNKSt5ctypeIcE8do_widenEc##GLIBCXX_3.4+0x2e2c48>
101e1e: 75 10 jne 101e30 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x1f0>
101e20: 48 83 c4 08 add $0x8,%rsp
101e24: 89 e8 mov %ebp,%eax
101e26: 5b pop %rbx
101e27: 5d pop %rbp
101e28: c3 retq
101e29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
101e30: 48 83 c4 08 add $0x8,%rsp
101e34: 40 0f be f5 movsbl %bpl,%esi
101e38: 48 89 df mov %rbx,%rdi
101e3b: 5b pop %rbx
101e3c: 5d pop %rbp
101e3d: ff e0 jmpq *%rax
101e3f: 90 nop
It seems to me that the assembly code not only performs the C++ code logic but also adds other logic.
As an example, the function _M_extract_int in libstdc++ which coverts a char to int calls this function as the following:
callq 0x101db0
The instruction address 0x101db0 is in the middle of the assembly code. The code section from 0x101db0 to 0x101dbc seems to have nothing to do with the above C++ code. Really confused about what is going on here...
Related
In a case where I get this error:
error: assuming signed overflow does not occur when simplifying conditional
I looked at the assembly and the if() uses:
d34: 48 83 fa 01 cmp $0x1,%rdx
d38: 7e 54 jle d8e <main+0x3ae>
Interestingly enough, %rdx is defined as an unsigned (std::size_t) and the number $0x1 is also defined as an unsigned (2UL in the original). So why would g++ decide to use jle instead of jbe?
Note: Just in case, I tried with if(colons > 1UL) ... and that did not help. Same error, same results in assembly.
C++ code to reproduce the error:
#include <algorithm>
#include <string>
#include <iostream>
int main(int argc, char * argv[])
{
std::string const in(argv[1]);
std::size_t const colons(std::count(in.begin(), in.end(), ':'));
if(colons >= 2UL)
{
std::cerr << "2 or more colons...\n";
}
else
{
std::cerr << "no or just one colon.\n";
}
return 0;
}
Command line used to reproduce the error:
g++ -Werror=strict-overflow -std=c++17 -O3 -o a a.cpp
To compile anyway, just don't use the -Werror=strict-overflow option.
The complete result (this is a bit of a killer since the std::count() gets overly optimized for speed):
00000000000009e0 <main>:
9e0: 41 55 push %r13
9e2: 41 54 push %r12
9e4: 55 push %rbp
9e5: 53 push %rbx
9e6: 48 83 ec 38 sub $0x38,%rsp
9ea: 4c 8b 66 08 mov 0x8(%rsi),%r12
9ee: 48 89 e3 mov %rsp,%rbx
9f1: 4c 8d 6b 10 lea 0x10(%rbx),%r13
9f5: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
9fc: 00 00
9fe: 48 89 44 24 28 mov %rax,0x28(%rsp)
a03: 31 c0 xor %eax,%eax
a05: 4d 85 e4 test %r12,%r12
a08: 4c 89 2c 24 mov %r13,(%rsp)
a0c: 0f 84 e8 03 00 00 je dfa <main+0x41a>
a12: 4c 89 e7 mov %r12,%rdi
a15: e8 16 ff ff ff callq 930 <strlen#plt>
a1a: 48 83 f8 0f cmp $0xf,%rax
a1e: 48 89 c5 mov %rax,%rbp
a21: 0f 87 7c 03 00 00 ja da3 <main+0x3c3>
a27: 48 83 f8 01 cmp $0x1,%rax
a2b: 0f 84 4f 03 00 00 je d80 <main+0x3a0>
a31: 48 85 c0 test %rax,%rax
a34: 0f 85 cc 03 00 00 jne e06 <main+0x426>
a3a: 48 8b 04 24 mov (%rsp),%rax
a3e: 48 89 6c 24 08 mov %rbp,0x8(%rsp)
a43: c6 04 28 00 movb $0x0,(%rax,%rbp,1)
a47: 48 8b 04 24 mov (%rsp),%rax
a4b: 48 8b 54 24 08 mov 0x8(%rsp),%rdx
a50: 48 8d 34 10 lea (%rax,%rdx,1),%rsi
a54: 48 39 f0 cmp %rsi,%rax
a57: 0f 84 31 03 00 00 je d8e <main+0x3ae>
a5d: 48 89 c1 mov %rax,%rcx
a60: 49 89 f1 mov %rsi,%r9
a63: 48 83 ea 01 sub $0x1,%rdx
a67: 48 f7 d9 neg %rcx
a6a: 49 29 c1 sub %rax,%r9
a6d: 41 ba 12 00 00 00 mov $0x12,%r10d
a73: 83 e1 0f and $0xf,%ecx
a76: 48 8d 78 01 lea 0x1(%rax),%rdi
a7a: 4c 8d 41 0f lea 0xf(%rcx),%r8
a7e: 49 83 f8 12 cmp $0x12,%r8
a82: 4d 0f 42 c2 cmovb %r10,%r8
a86: 4c 39 c2 cmp %r8,%rdx
a89: 0f 82 4b 03 00 00 jb dda <main+0x3fa>
a8f: 48 85 c9 test %rcx,%rcx
a92: 0f 84 52 03 00 00 je dea <main+0x40a>
a98: 45 31 d2 xor %r10d,%r10d
a9b: 80 38 3a cmpb $0x3a,(%rax)
a9e: 41 0f 94 c2 sete %r10b
aa2: 48 83 f9 01 cmp $0x1,%rcx
aa6: 0f 84 34 01 00 00 je be0 <main+0x200>
aac: 80 78 01 3a cmpb $0x3a,0x1(%rax)
ab0: 75 04 jne ab6 <main+0xd6>
ab2: 49 83 c2 01 add $0x1,%r10
ab6: 48 83 f9 02 cmp $0x2,%rcx
aba: 48 8d 78 02 lea 0x2(%rax),%rdi
abe: 0f 84 1c 01 00 00 je be0 <main+0x200>
ac4: 80 78 02 3a cmpb $0x3a,0x2(%rax)
ac8: 75 04 jne ace <main+0xee>
aca: 49 83 c2 01 add $0x1,%r10
ace: 48 83 f9 03 cmp $0x3,%rcx
ad2: 48 8d 78 03 lea 0x3(%rax),%rdi
ad6: 0f 84 04 01 00 00 je be0 <main+0x200>
adc: 80 78 03 3a cmpb $0x3a,0x3(%rax)
ae0: 75 04 jne ae6 <main+0x106>
ae2: 49 83 c2 01 add $0x1,%r10
ae6: 48 83 f9 04 cmp $0x4,%rcx
aea: 48 8d 78 04 lea 0x4(%rax),%rdi
aee: 0f 84 ec 00 00 00 je be0 <main+0x200>
af4: 80 78 04 3a cmpb $0x3a,0x4(%rax)
af8: 75 04 jne afe <main+0x11e>
afa: 49 83 c2 01 add $0x1,%r10
afe: 48 83 f9 05 cmp $0x5,%rcx
b02: 48 8d 78 05 lea 0x5(%rax),%rdi
b06: 0f 84 d4 00 00 00 je be0 <main+0x200>
b0c: 80 78 05 3a cmpb $0x3a,0x5(%rax)
b10: 75 04 jne b16 <main+0x136>
b12: 49 83 c2 01 add $0x1,%r10
b16: 48 83 f9 06 cmp $0x6,%rcx
b1a: 48 8d 78 06 lea 0x6(%rax),%rdi
b1e: 0f 84 bc 00 00 00 je be0 <main+0x200>
b24: 80 78 06 3a cmpb $0x3a,0x6(%rax)
b28: 0f 84 9a 02 00 00 je dc8 <main+0x3e8>
b2e: 48 83 f9 07 cmp $0x7,%rcx
b32: 48 8d 78 07 lea 0x7(%rax),%rdi
b36: 0f 84 a4 00 00 00 je be0 <main+0x200>
b3c: 80 78 07 3a cmpb $0x3a,0x7(%rax)
b40: 0f 84 8b 02 00 00 je dd1 <main+0x3f1>
b46: 48 83 f9 08 cmp $0x8,%rcx
b4a: 48 8d 78 08 lea 0x8(%rax),%rdi
b4e: 0f 84 8c 00 00 00 je be0 <main+0x200>
b54: 80 78 08 3a cmpb $0x3a,0x8(%rax)
b58: 75 04 jne b5e <main+0x17e>
b5a: 49 83 c2 01 add $0x1,%r10
b5e: 48 83 f9 09 cmp $0x9,%rcx
b62: 48 8d 78 09 lea 0x9(%rax),%rdi
b66: 74 78 je be0 <main+0x200>
b68: 80 78 09 3a cmpb $0x3a,0x9(%rax)
b6c: 75 04 jne b72 <main+0x192>
b6e: 49 83 c2 01 add $0x1,%r10
b72: 48 83 f9 0a cmp $0xa,%rcx
b76: 48 8d 78 0a lea 0xa(%rax),%rdi
b7a: 74 64 je be0 <main+0x200>
b7c: 80 78 0a 3a cmpb $0x3a,0xa(%rax)
b80: 75 04 jne b86 <main+0x1a6>
b82: 49 83 c2 01 add $0x1,%r10
b86: 48 83 f9 0b cmp $0xb,%rcx
b8a: 48 8d 78 0b lea 0xb(%rax),%rdi
b8e: 74 50 je be0 <main+0x200>
b90: 80 78 0b 3a cmpb $0x3a,0xb(%rax)
b94: 75 04 jne b9a <main+0x1ba>
b96: 49 83 c2 01 add $0x1,%r10
b9a: 48 83 f9 0c cmp $0xc,%rcx
b9e: 48 8d 78 0c lea 0xc(%rax),%rdi
ba2: 74 3c je be0 <main+0x200>
ba4: 80 78 0c 3a cmpb $0x3a,0xc(%rax)
ba8: 75 04 jne bae <main+0x1ce>
baa: 49 83 c2 01 add $0x1,%r10
bae: 48 83 f9 0d cmp $0xd,%rcx
bb2: 48 8d 78 0d lea 0xd(%rax),%rdi
bb6: 74 28 je be0 <main+0x200>
bb8: 80 78 0d 3a cmpb $0x3a,0xd(%rax)
bbc: 75 04 jne bc2 <main+0x1e2>
bbe: 49 83 c2 01 add $0x1,%r10
bc2: 48 83 f9 0f cmp $0xf,%rcx
bc6: 48 8d 78 0e lea 0xe(%rax),%rdi
bca: 75 14 jne be0 <main+0x200>
bcc: 80 78 0e 3a cmpb $0x3a,0xe(%rax)
bd0: 0f 84 0b 02 00 00 je de1 <main+0x401>
bd6: 48 8d 78 0f lea 0xf(%rax),%rdi
bda: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
be0: 49 29 c9 sub %rcx,%r9
be3: 66 45 0f ef c0 pxor %xmm8,%xmm8
be8: 66 0f ef e4 pxor %xmm4,%xmm4
bec: 4d 89 c8 mov %r9,%r8
bef: 66 0f ef db pxor %xmm3,%xmm3
bf3: 48 01 c8 add %rcx,%rax
bf6: 66 0f ef d2 pxor %xmm2,%xmm2
bfa: 49 c1 e8 04 shr $0x4,%r8
bfe: 66 0f 6f 35 5a 04 00 movdqa 0x45a(%rip),%xmm6 # 1060 <_IO_stdin_used+0x70>
c05: 00
c06: 31 c9 xor %ecx,%ecx
c08: 66 0f 6f 2d 60 04 00 movdqa 0x460(%rip),%xmm5 # 1070 <_IO_stdin_used+0x80>
c0f: 00
c10: 66 0f 6f cc movdqa %xmm4,%xmm1
c14: 66 44 0f 6f da movdqa %xmm2,%xmm11
c19: 66 0f 6f 00 movdqa (%rax),%xmm0
c1d: 48 83 c1 01 add $0x1,%rcx
c21: 48 83 c0 10 add $0x10,%rax
c25: 49 39 c8 cmp %rcx,%r8
c28: 66 0f 74 c6 pcmpeqb %xmm6,%xmm0
c2c: 66 0f db c5 pand %xmm5,%xmm0
c30: 66 0f 64 c8 pcmpgtb %xmm0,%xmm1
c34: 66 0f 6f f8 movdqa %xmm0,%xmm7
c38: 66 0f 60 f9 punpcklbw %xmm1,%xmm7
c3c: 66 0f 68 c1 punpckhbw %xmm1,%xmm0
c40: 66 0f 6f cb movdqa %xmm3,%xmm1
c44: 66 44 0f 6f d7 movdqa %xmm7,%xmm10
c49: 66 0f 65 cf pcmpgtw %xmm7,%xmm1
c4d: 66 44 0f 6f c8 movdqa %xmm0,%xmm9
c52: 66 44 0f 61 d1 punpcklwd %xmm1,%xmm10
c57: 66 0f 69 f9 punpckhwd %xmm1,%xmm7
c5b: 66 0f 6f cb movdqa %xmm3,%xmm1
c5f: 66 0f 65 c8 pcmpgtw %xmm0,%xmm1
c63: 66 45 0f 66 da pcmpgtd %xmm10,%xmm11
c68: 66 44 0f 61 c9 punpcklwd %xmm1,%xmm9
c6d: 66 0f 69 c1 punpckhwd %xmm1,%xmm0
c71: 66 41 0f 6f ca movdqa %xmm10,%xmm1
c76: 66 45 0f 6a d3 punpckhdq %xmm11,%xmm10
c7b: 66 41 0f 62 cb punpckldq %xmm11,%xmm1
c80: 66 41 0f d4 c8 paddq %xmm8,%xmm1
c85: 66 44 0f 6f c2 movdqa %xmm2,%xmm8
c8a: 66 41 0f d4 ca paddq %xmm10,%xmm1
c8f: 66 44 0f 6f d7 movdqa %xmm7,%xmm10
c94: 66 44 0f 66 c7 pcmpgtd %xmm7,%xmm8
c99: 66 41 0f 6a f8 punpckhdq %xmm8,%xmm7
c9e: 66 45 0f 62 d0 punpckldq %xmm8,%xmm10
ca3: 66 45 0f 6f c1 movdqa %xmm9,%xmm8
ca8: 66 41 0f d4 ca paddq %xmm10,%xmm1
cad: 66 0f d4 cf paddq %xmm7,%xmm1
cb1: 66 0f 6f fa movdqa %xmm2,%xmm7
cb5: 66 41 0f 66 f9 pcmpgtd %xmm9,%xmm7
cba: 66 44 0f 62 c7 punpckldq %xmm7,%xmm8
cbf: 66 44 0f 6a cf punpckhdq %xmm7,%xmm9
cc4: 66 0f 6f fa movdqa %xmm2,%xmm7
cc8: 66 41 0f d4 c8 paddq %xmm8,%xmm1
ccd: 66 0f 66 f8 pcmpgtd %xmm0,%xmm7
cd1: 66 44 0f 6f c0 movdqa %xmm0,%xmm8
cd6: 66 41 0f d4 c9 paddq %xmm9,%xmm1
cdb: 66 44 0f 62 c7 punpckldq %xmm7,%xmm8
ce0: 66 0f 6a c7 punpckhdq %xmm7,%xmm0
ce4: 66 41 0f d4 c8 paddq %xmm8,%xmm1
ce9: 66 0f d4 c8 paddq %xmm0,%xmm1
ced: 66 44 0f 6f c1 movdqa %xmm1,%xmm8
cf2: 0f 87 18 ff ff ff ja c10 <main+0x230>
cf8: 66 0f 73 d9 08 psrldq $0x8,%xmm1
cfd: 4c 89 c9 mov %r9,%rcx
d00: 66 41 0f d4 c8 paddq %xmm8,%xmm1
d05: 66 48 0f 7e ca movq %xmm1,%rdx
d0a: 48 83 e1 f0 and $0xfffffffffffffff0,%rcx
d0e: 48 8d 04 0f lea (%rdi,%rcx,1),%rax
d12: 4c 01 d2 add %r10,%rdx
d15: 49 39 c9 cmp %rcx,%r9
d18: 74 1a je d34 <main+0x354>
d1a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
d20: 31 c9 xor %ecx,%ecx
d22: 80 38 3a cmpb $0x3a,(%rax)
d25: 0f 94 c1 sete %cl
d28: 48 83 c0 01 add $0x1,%rax
d2c: 48 01 ca add %rcx,%rdx
d2f: 48 39 c6 cmp %rax,%rsi
d32: 75 ec jne d20 <main+0x340>
# Area of interest:
d34: 48 83 fa 01 cmp $0x1,%rdx
d38: 7e 54 jle d8e <main+0x3ae>
d3a: 48 8d 35 e7 02 00 00 lea 0x2e7(%rip),%rsi # 1028 <_IO_stdin_used+0x38>
d41: 48 8d 3d d8 12 20 00 lea 0x2012d8(%rip),%rdi # 202020 <_ZSt4cerr##GLIBCXX_3.4>
d48: e8 33 fc ff ff callq 980 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc#plt>
d4d: 48 8b 3c 24 mov (%rsp),%rdi
d51: 48 83 c3 10 add $0x10,%rbx
d55: 48 39 df cmp %rbx,%rdi
d58: 74 05 je d5f <main+0x37f>
d5a: e8 11 fc ff ff callq 970 <_ZdlPv#plt>
d5f: 31 c0 xor %eax,%eax
d61: 48 8b 5c 24 28 mov 0x28(%rsp),%rbx
d66: 64 48 33 1c 25 28 00 xor %fs:0x28,%rbx
d6d: 00 00
d6f: 0f 85 80 00 00 00 jne df5 <main+0x415>
d75: 48 83 c4 38 add $0x38,%rsp
d79: 5b pop %rbx
d7a: 5d pop %rbp
d7b: 41 5c pop %r12
d7d: 41 5d pop %r13
d7f: c3 retq
d80: 41 0f b6 04 24 movzbl (%r12),%eax
d85: 88 44 24 10 mov %al,0x10(%rsp)
d89: e9 ac fc ff ff jmpq a3a <main+0x5a>
d8e: 48 8d 35 a8 02 00 00 lea 0x2a8(%rip),%rsi # 103d <_IO_stdin_used+0x4d>
d95: 48 8d 3d 84 12 20 00 lea 0x201284(%rip),%rdi # 202020 <_ZSt4cerr##GLIBCXX_3.4>
d9c: e8 df fb ff ff callq 980 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc#plt>
da1: eb aa jmp d4d <main+0x36d>
da3: 48 8d 78 01 lea 0x1(%rax),%rdi
da7: e8 e4 fb ff ff callq 990 <_Znwm#plt>
dac: 48 89 6c 24 10 mov %rbp,0x10(%rsp)
db1: 48 89 04 24 mov %rax,(%rsp)
db5: 48 89 ea mov %rbp,%rdx
db8: 4c 89 e6 mov %r12,%rsi
dbb: 48 89 c7 mov %rax,%rdi
dbe: e8 8d fb ff ff callq 950 <memcpy#plt>
dc3: e9 72 fc ff ff jmpq a3a <main+0x5a>
dc8: 49 83 c2 01 add $0x1,%r10
dcc: e9 5d fd ff ff jmpq b2e <main+0x14e>
dd1: 49 83 c2 01 add $0x1,%r10
dd5: e9 6c fd ff ff jmpq b46 <main+0x166>
dda: 31 d2 xor %edx,%edx
ddc: e9 3f ff ff ff jmpq d20 <main+0x340>
de1: 49 83 c2 01 add $0x1,%r10
de5: e9 ec fd ff ff jmpq bd6 <main+0x1f6>
dea: 48 89 c7 mov %rax,%rdi
ded: 45 31 d2 xor %r10d,%r10d
df0: e9 eb fd ff ff jmpq be0 <main+0x200>
df5: e8 a6 fb ff ff callq 9a0 <__stack_chk_fail#plt>
dfa: 48 8d 3d f7 01 00 00 lea 0x1f7(%rip),%rdi # ff8 <_IO_stdin_used+0x8>
e01: e8 3a fb ff ff callq 940 <_ZSt19__throw_logic_errorPKc#plt>
e06: 4c 89 e8 mov %r13,%rax
e09: eb aa jmp db5 <main+0x3d5>
e0b: 48 8b 3c 24 mov (%rsp),%rdi
e0f: 48 83 c3 10 add $0x10,%rbx
e13: 48 89 c5 mov %rax,%rbp
e16: 48 39 df cmp %rbx,%rdi
e19: 74 05 je e20 <main+0x440>
e1b: e8 50 fb ff ff callq 970 <_ZdlPv#plt>
e20: 48 89 ef mov %rbp,%rdi
e23: e8 98 fb ff ff callq 9c0 <_Unwind_Resume#plt>
e28: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
e2f: 00
For those interested, you may fix the issue by using signed numbers as in:
#include <type_traits>
...
if(static_cast<std::make_signed_t<decltype(colons)>>(colons) >= 2LL)
...
or wrap the if() statement around #pragma like so:
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-overflow"
if(colons >= 2LL)
#pragma GCC diagnostic pop
But this is clearly not the question here.
std::count is defined as: https://github.com/gcc-mirror/gcc/blob/16e2427f50c208dfe07d07f18009969502c25dc8/libstdc%2B%2B-v3/include/bits/stl_algo.h#L4045
/**
* #brief Count the number of copies of a value in a sequence.
* #ingroup non_mutating_algorithms
* #param __first An input iterator.
* #param __last An input iterator.
* #param __value The value to be counted.
* #return The number of iterators #c i in the range #p [__first,__last)
* for which #c *i == #p __value
*/
template<typename _InputIterator, typename _Tp>
_GLIBCXX20_CONSTEXPR
inline typename iterator_traits<_InputIterator>::difference_type
count(_InputIterator __first, _InputIterator __last, const _Tp& __value)
{
// concept requirements
__glibcxx_function_requires(_InputIteratorConcept<_InputIterator>)
__glibcxx_function_requires(_EqualOpConcept<
typename iterator_traits<_InputIterator>::value_type, _Tp>)
__glibcxx_requires_valid_range(__first, __last);
return std::__count_if(__first, __last,
__gnu_cxx::__ops::__iter_equals_val(__value));
}
Then https://github.com/gcc-mirror/gcc/blob/16e2427f50c208dfe07d07f18009969502c25dc8/libstdc%2B%2B-v3/include/bits/stl_algobase.h#L2118 :
template<typename _InputIterator, typename _Predicate>
_GLIBCXX20_CONSTEXPR
typename iterator_traits<_InputIterator>::difference_type
__count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred)
{
typename iterator_traits<_InputIterator>::difference_type __n = 0;
for (; __first != __last; ++__first)
if (__pred(__first))
++__n;
return __n;
}
__n is iterator_traits<_InputIterator>::difference_type which is ptrdiff_t a signed type. Doing ++__n could result in signed type overflow, but that would be undefined behavior. Ergo, std::count() can't return negative, cause that would be undefined behavior. Because it can't return negative, compiler can use jle, the number can't be negative.
I wanted to know how methods are implemented in C++. I wanted to know how methods are implemented "under the hood".
So, I have made a simple C++ program which has a class with 1 non static field and 1 non static, non virtual method.
Then I instantiated the class in the main function and called the method. I have used objdump -d option in order to see the CPU instructions of this program. I have a x86-64 processor.
Here's the code:
#include<stdio.h>
class TestClass {
public:
int x;
int xPlus2(){
return x + 2;
}
};
int main(){
TestClass tc1 = {5};
int variable = tc1.xPlus2();
printf("%d \n", variable);
return 0;
}
Here are instructions for the method xPlus2:
0000000000402c30 <_ZN9TestClass6xPlus2Ev>:
402c30: 55 push %rbp
402c31: 48 89 e5 mov %rsp,%rbp
402c34: 48 89 4d 10 mov %rcx,0x10(%rbp)
402c38: 48 8b 45 10 mov 0x10(%rbp),%rax
402c3c: 8b 00 mov (%rax),%eax
402c3e: 83 c0 02 add $0x2,%eax
402c41: 5d pop %rbp
402c42: c3 retq
402c43: 90 nop
402c44: 90 nop
402c45: 90 nop
402c46: 90 nop
402c47: 90 nop
402c48: 90 nop
402c49: 90 nop
402c4a: 90 nop
402c4b: 90 nop
402c4c: 90 nop
402c4d: 90 nop
402c4e: 90 nop
402c4f: 90 nop
If I understand it correctly, these instructions can be replaced by just 3 instructions, because I believe that I don't need to use the stack, I think the compiler used it redundantly:
mov (%rcx), eax
add $2, eax
retq
and then maybe I still need lots of nop instructions for synchronization purposes or whatnot. If you look at the CPU instructions, it looks like the value that x field has is stored at the location in memory which rcx register holds. You will see the rest of the CPU instructions in a moment. It is a little bit hard for me to track what has happened here (especially what is going on with the call of _main function), I don't even know what parts of assembly are important to look at. Compiler produces main function (as I expected), but then it also produced _main function which is called from the main, there are some weird functions in between those two as well.
Here are other parts of the assembly that I think may be interesting:
0000000000401550 <main>:
401550: 55 push %rbp
401551: 48 89 e5 mov %rsp,%rbp
401554: 48 83 ec 30 sub $0x30,%rsp
401558: e8 e3 00 00 00 callq 401640 <__main>
40155d: c7 45 f8 05 00 00 00 movl $0x5,-0x8(%rbp)
401564: 48 8d 45 f8 lea -0x8(%rbp),%rax
401568: 48 89 c1 mov %rax,%rcx
40156b: e8 c0 16 00 00 callq 402c30 <_ZN9TestClass6xPlus2Ev>
401570: 89 45 fc mov %eax,-0x4(%rbp)
401573: 8b 45 fc mov -0x4(%rbp),%eax
401576: 89 c2 mov %eax,%edx
401578: 48 8d 0d 81 2a 00 00 lea 0x2a81(%rip),%rcx # 404000 <.rdata>
40157f: e8 ec 14 00 00 callq 402a70 <printf>
401584: b8 00 00 00 00 mov $0x0,%eax
401589: 48 83 c4 30 add $0x30,%rsp
40158d: 5d pop %rbp
40158e: c3 retq
40158f: 90 nop
0000000000401590 <__do_global_dtors>:
401590: 48 83 ec 28 sub $0x28,%rsp
401594: 48 8b 05 75 1a 00 00 mov 0x1a75(%rip),%rax # 403010 <p.93846>
40159b: 48 8b 00 mov (%rax),%rax
40159e: 48 85 c0 test %rax,%rax
4015a1: 74 1d je 4015c0 <__do_global_dtors+0x30>
4015a3: ff d0 callq *%rax
4015a5: 48 8b 05 64 1a 00 00 mov 0x1a64(%rip),%rax # 403010 <p.93846>
4015ac: 48 8d 50 08 lea 0x8(%rax),%rdx
4015b0: 48 8b 40 08 mov 0x8(%rax),%rax
4015b4: 48 89 15 55 1a 00 00 mov %rdx,0x1a55(%rip) # 403010 <p.93846>
4015bb: 48 85 c0 test %rax,%rax
4015be: 75 e3 jne 4015a3 <__do_global_dtors+0x13>
4015c0: 48 83 c4 28 add $0x28,%rsp
4015c4: c3 retq
4015c5: 90 nop
4015c6: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
4015cd: 00 00 00
00000000004015d0 <__do_global_ctors>:
4015d0: 56 push %rsi
4015d1: 53 push %rbx
4015d2: 48 83 ec 28 sub $0x28,%rsp
4015d6: 48 8b 0d 23 2d 00 00 mov 0x2d23(%rip),%rcx # 404300 <.refptr.__CTOR_LIST__>
4015dd: 48 8b 11 mov (%rcx),%rdx
4015e0: 83 fa ff cmp $0xffffffff,%edx
4015e3: 89 d0 mov %edx,%eax
4015e5: 74 39 je 401620 <__do_global_ctors+0x50>
4015e7: 85 c0 test %eax,%eax
4015e9: 74 20 je 40160b <__do_global_ctors+0x3b>
4015eb: 89 c2 mov %eax,%edx
4015ed: 83 e8 01 sub $0x1,%eax
4015f0: 48 8d 1c d1 lea (%rcx,%rdx,8),%rbx
4015f4: 48 29 c2 sub %rax,%rdx
4015f7: 48 8d 74 d1 f8 lea -0x8(%rcx,%rdx,8),%rsi
4015fc: 0f 1f 40 00 nopl 0x0(%rax)
401600: ff 13 callq *(%rbx)
401602: 48 83 eb 08 sub $0x8,%rbx
401606: 48 39 f3 cmp %rsi,%rbx
401609: 75 f5 jne 401600 <__do_global_ctors+0x30>
40160b: 48 8d 0d 7e ff ff ff lea -0x82(%rip),%rcx # 401590 <__do_global_dtors>
401612: 48 83 c4 28 add $0x28,%rsp
401616: 5b pop %rbx
401617: 5e pop %rsi
401618: e9 f3 fe ff ff jmpq 401510 <atexit>
40161d: 0f 1f 00 nopl (%rax)
401620: 31 c0 xor %eax,%eax
401622: eb 02 jmp 401626 <__do_global_ctors+0x56>
401624: 89 d0 mov %edx,%eax
401626: 44 8d 40 01 lea 0x1(%rax),%r8d
40162a: 4a 83 3c c1 00 cmpq $0x0,(%rcx,%r8,8)
40162f: 4c 89 c2 mov %r8,%rdx
401632: 75 f0 jne 401624 <__do_global_ctors+0x54>
401634: eb b1 jmp 4015e7 <__do_global_ctors+0x17>
401636: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
40163d: 00 00 00
0000000000401640 <__main>:
401640: 8b 05 ea 59 00 00 mov 0x59ea(%rip),%eax # 407030 <initialized>
401646: 85 c0 test %eax,%eax
401648: 74 06 je 401650 <__main+0x10>
40164a: c3 retq
40164b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
401650: c7 05 d6 59 00 00 01 movl $0x1,0x59d6(%rip) # 407030 <initialized>
401657: 00 00 00
40165a: e9 71 ff ff ff jmpq 4015d0 <__do_global_ctors>
40165f: 90 nop
I think what you are looking for are these instructions:
40155d: c7 45 f8 05 00 00 00 movl $0x5,-0x8(%rbp)
401564: 48 8d 45 f8 lea -0x8(%rbp),%rax
401568: 48 89 c1 mov %rax,%rcx
40156b: e8 c0 16 00 00 callq 402c30 <_ZN9TestClass6xPlus2Ev>
401570: 89 45 fc mov %eax,-0x4(%rbp)
These match with the code from main:
TestClass tc1 = {5};
int variable = tc1.xPlus2();
At address 40155d the field tc1.x is initialized with the value 5.
At address 401564 the pointer to tc1 is loaded into the register %rax
At address 401568 the pointer to tc1 is copied into the register %rcx
At address 40156b is the call of the method tc1.xPlus2()
At address 401570 the result is store in variable
Your observations are mostly correct. rcx holds the this pointer to the object on which the method was called. x is stored in the first area of memory that the this pointer points to, so that is why rcx was dereferenced and the result added to. It is the responsibility of the caller to make sure that rcx is the address of the object before invoking the function. We can see main prepare rcx by setting it to an address in its stack frame. You are correct that the compiler produced inefficient code here and did not need to use the stack. Compiling with higher optimization levels -O1, -O2, or -O3 will likely fix that. These higher optimizations will probably get rid of the nops too, since they are used for function alignment. You can mostly ignore __main. It's used for libc initialization.
I have a class which has many members in it
int latest_encode_usage_ = 67;
int perf_target_framerate_ = 0;
std::map<uint8_t, uint16_t> pre_dlbitrate_;
#if defined(WEBRTC_TRANSCODE_CASE)
bool is_screen_share_;
#endif
and I have defined WEBRTC_TRANSCODE_CASE in build.gn
defines += ["WEBRTC_TRANSCODE_CASE"]
so the is_screen_share_ is visible in the class, I compile this code with RelWithDebInfo and got a shared library called liba.so, then I remove the preprocessor directives
bool is_perf_adaption_avalaible_;
int latest_encode_usage_ = 67;
int perf_target_framerate_ = 0;
std::map<uint8_t, uint16_t> pre_dlbitrate_;
bool is_screen_share_
and compile the code with RelWithDebInfo got libb.so, when I execute the command diff <(objdump -d liba.so) <(objdump -d libb.so) and got
1714248c1714248
< 7225b9: bf b8 0f 00 00 mov $0xfb8,%edi
---
> 7225b9: bf c0 0f 00 00 mov $0xfc0,%edi
, it seems 8 bytes different, but why ? I just don't understand. It's part of liba.so's objdump:
0000000000722580 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11ContentTypeE>:
722580: 55 push %rbp
722581: 48 89 e5 mov %rsp,%rbp
722584: 41 57 push %r15
722586: 41 56 push %r14
722588: 41 55 push %r13
72258a: 41 54 push %r12
72258c: 49 89 d5 mov %rdx,%r13
72258f: 53 push %rbx
722590: 48 89 fb mov %rdi,%rbx
722593: bf 90 00 00 00 mov $0x90,%edi
722598: 49 89 cf mov %rcx,%r15
72259b: 41 89 f6 mov %esi,%r14d
72259e: 48 83 ec 28 sub $0x28,%rsp
7225a2: 44 89 45 bc mov %r8d,-0x44(%rbp)
7225a6: e8 e5 0a 61 00 callq d33090 <_Znwm>
7225ab: 4c 89 ee mov %r13,%rsi
7225ae: 48 89 c7 mov %rax,%rdi
7225b1: 49 89 c4 mov %rax,%r12
7225b4: e8 17 0a 00 00 callq 722fd0 <_ZN6webrtc20OveruseFrameDetectorC1EPNS_25CpuOveruseMetricsObserverE>
7225b9: bf b8 0f 00 00 mov $0xfb8,%edi
7225be: 4c 89 65 c8 mov %r12,-0x38(%rbp)
7225c2: e8 c9 0a 61 00 callq d33090 <_Znwm>
7225c7: 44 8b 45 bc mov -0x44(%rbp),%r8d
7225cb: 48 89 c7 mov %rax,%rdi
7225ce: 4c 89 f9 mov %r15,%rcx
7225d1: 4c 89 ea mov %r13,%rdx
7225d4: 44 89 f6 mov %r14d,%esi
7225d7: 49 89 c4 mov %rax,%r12
7225da: 45 89 c1 mov %r8d,%r9d
7225dd: 4c 8d 45 c8 lea -0x38(%rbp),%r8
7225e1: e8 3a 3f 00 00 callq 726520 <_ZN6webrtc18VideoStreamEncoderC1EjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsESt10unique_ptrINS_20OveruseFra
7225e6: 48 8b 7d c8 mov -0x38(%rbp),%rdi
7225ea: 48 85 ff test %rdi,%rdi
7225ed: 74 06 je 7225f5 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11C
it's part of libb.so's objdump:
0000000000722580 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11ContentTypeE>:
722580: 55 push %rbp
722581: 48 89 e5 mov %rsp,%rbp
722584: 41 57 push %r15
722586: 41 56 push %r14
722588: 41 55 push %r13
72258a: 41 54 push %r12
72258c: 49 89 d5 mov %rdx,%r13
72258f: 53 push %rbx
722590: 48 89 fb mov %rdi,%rbx
722593: bf 90 00 00 00 mov $0x90,%edi
722598: 49 89 cf mov %rcx,%r15
72259b: 41 89 f6 mov %esi,%r14d
72259e: 48 83 ec 28 sub $0x28,%rsp
7225a2: 44 89 45 bc mov %r8d,-0x44(%rbp)
7225a6: e8 e5 0a 61 00 callq d33090 <_Znwm>
7225ab: 4c 89 ee mov %r13,%rsi
7225ae: 48 89 c7 mov %rax,%rdi
7225b1: 49 89 c4 mov %rax,%r12
7225b4: e8 17 0a 00 00 callq 722fd0 <_ZN6webrtc20OveruseFrameDetectorC1EPNS_25CpuOveruseMetricsObserverE>
7225b9: bf c0 0f 00 00 mov $0xfc0,%edi
7225be: 4c 89 65 c8 mov %r12,-0x38(%rbp)
7225c2: e8 c9 0a 61 00 callq d33090 <_Znwm>
7225c7: 44 8b 45 bc mov -0x44(%rbp),%r8d
7225cb: 48 89 c7 mov %rax,%rdi
7225ce: 4c 89 f9 mov %r15,%rcx
7225d1: 4c 89 ea mov %r13,%rdx
7225d4: 44 89 f6 mov %r14d,%esi
7225d7: 49 89 c4 mov %rax,%r12
7225da: 45 89 c1 mov %r8d,%r9d
7225dd: 4c 8d 45 c8 lea -0x38(%rbp),%r8
7225e1: e8 3a 3f 00 00 callq 726520 <_ZN6webrtc18VideoStreamEncoderC1EjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsESt10unique_ptrINS_20OveruseFra
7225e6: 48 8b 7d c8 mov -0x38(%rbp),%rdi
7225ea: 48 85 ff test %rdi,%rdi
7225ed: 74 06 je 7225f5 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11C.
Any help will be appreciateed!
I used gcc 6.3.0 with address sanitizer to compile the following code:
#include <iostream>
int increment(int &x)
{
x++;
return x;
}
int main()
{
int x = 0;
increment(x);
return 0;
}
The code gets compiled and instrumented. Objdump (-S) of the compiled code:
000008d0 <_Z9incrementRi>:
int increment(int &x) {
8d0: 55 push %ebp
8d1: 89 e5 mov %esp,%ebp
8d3: 56 push %esi
8d4: 53 push %ebx
8d5: 83 ec 08 sub $0x8,%esp
8d8: e8 9f 01 00 00 call a7c <__x86.get_pc_thunk.cx>
8dd: 81 c1 b3 89 00 00 add $0x89b3,%ecx
++x;
8e3: 8b 45 08 mov 0x8(%ebp),%eax
8e6: 89 c2 mov %eax,%edx
8e8: c1 ea 03 shr $0x3,%edx
8eb: 81 c2 00 00 00 20 add $0x20000000,%edx
8f1: 0f b6 12 movzbl (%edx),%edx
8f4: 84 d2 test %dl,%dl
8f6: 0f 95 45 f7 setne -0x9(%ebp)
8fa: 89 c6 mov %eax,%esi
8fc: 83 e6 07 and $0x7,%esi
8ff: 8d 5e 03 lea 0x3(%esi),%ebx
902: 38 d3 cmp %dl,%bl
904: 0f 9d c2 setge %dl
907: 22 55 f7 and -0x9(%ebp),%dl
90a: 84 d2 test %dl,%dl
90c: 74 0b je 919 <_Z9incrementRi+0x49>
90e: 83 ec 04 sub $0x4,%esp
911: 50 push %eax
912: 89 cb mov %ecx,%ebx
914: e8 a0 07 00 00 call 10b9 <__asan_report_load4>
919: 8b 45 08 mov 0x8(%ebp),%eax
91c: 8b 00 mov (%eax),%eax
91e: 8d 50 01 lea 0x1(%eax),%edx
921: 8b 45 08 mov 0x8(%ebp),%eax
924: 89 10 mov %edx,(%eax)
}
926: 90 nop
927: 8d 65 f8 lea -0x8(%ebp),%esp
92a: 5b pop %ebx
92b: 5e pop %esi
92c: 5d pop %ebp
92d: c3 ret
0000092e <main>:
int main(void)
{
92e: 8d 4c 24 04 lea 0x4(%esp),%ecx
932: 83 e4 f8 and $0xfffffff8,%esp
935: ff 71 fc pushl -0x4(%ecx)
938: 55 push %ebp
939: 89 e5 mov %esp,%ebp
93b: 57 push %edi
93c: 56 push %esi
93d: 53 push %ebx
93e: 51 push %ecx
93f: 83 ec 60 sub $0x60,%esp
942: e8 39 01 00 00 call a80 <__x86.get_pc_thunk.bx>
947: 81 c3 49 89 00 00 add $0x8949,%ebx
94d: 8d 75 90 lea -0x70(%ebp),%esi
950: 89 f7 mov %esi,%edi
952: 8d 83 d0 05 00 00 lea 0x5d0(%ebx),%eax
958: 83 38 00 cmpl $0x0,(%eax)
95b: 74 13 je 970 <main+0x42>
95d: 83 ec 04 sub $0x4,%esp
960: 6a 60 push $0x60
962: e8 b4 02 00 00 call c1b <__asan_stack_malloc_1>
967: 83 c4 08 add $0x8,%esp
96a: 85 c0 test %eax,%eax
96c: 74 02 je 970 <main+0x42>
96e: 89 c6 mov %eax,%esi
970: 8d 46 60 lea 0x60(%esi),%eax
973: c7 06 b3 8a b5 41 movl $0x41b58ab3,(%esi)
979: 8d 93 d0 e8 ff ff lea -0x1730(%ebx),%edx
97f: 89 56 04 mov %edx,0x4(%esi)
982: 8d 93 9e 76 ff ff lea -0x8962(%ebx),%edx
988: 89 56 08 mov %edx,0x8(%esi)
98b: 89 f3 mov %esi,%ebx
98d: c1 eb 03 shr $0x3,%ebx
990: c7 83 00 00 00 20 f1 movl $0xf1f1f1f1,0x20000000(%ebx)
997: f1 f1 f1
99a: c7 83 04 00 00 20 04 movl $0xf4f4f404,0x20000004(%ebx)
9a1: f4 f4 f4
9a4: c7 83 08 00 00 20 f3 movl $0xf3f3f3f3,0x20000008(%ebx)
9ab: f3 f3 f3
int x = 0;
9ae: c7 40 c0 00 00 00 00 movl $0x0,-0x40(%eax)
increment(x);
9b5: 83 ec 04 sub $0x4,%esp
9b8: 83 e8 40 sub $0x40,%eax
9bb: 50 push %eax
9bc: e8 0f ff ff ff call 8d0 <_Z9incrementRi>
9c1: 83 c4 08 add $0x8,%esp
return 0;
9c4: b8 00 00 00 00 mov $0x0,%eax
{
9c9: 39 f7 cmp %esi,%edi
9cb: 74 26 je 9f3 <main+0xc5>
9cd: c7 06 0e 36 e0 45 movl $0x45e0360e,(%esi)
9d3: c7 83 00 00 00 20 f5 movl $0xf5f5f5f5,0x20000000(%ebx)
9da: f5 f5 f5
9dd: c7 83 04 00 00 20 f5 movl $0xf5f5f5f5,0x20000004(%ebx)
9e4: f5 f5 f5
9e7: c7 83 08 00 00 20 f5 movl $0xf5f5f5f5,0x20000008(%ebx)
9ee: f5 f5 f5
9f1: eb 1e jmp a11 <main+0xe3>
9f3: c7 83 00 00 00 20 00 movl $0x0,0x20000000(%ebx)
9fa: 00 00 00
9fd: c7 83 04 00 00 20 00 movl $0x0,0x20000004(%ebx)
a04: 00 00 00
a07: c7 83 08 00 00 20 00 movl $0x0,0x20000008(%ebx)
a0e: 00 00 00
}
a11: 8d 65 f0 lea -0x10(%ebp),%esp
a14: 59 pop %ecx
a15: 5b pop %ebx
a16: 5e pop %esi
a17: 5f pop %edi
a18: 5d pop %ebp
a19: 8d 61 fc lea -0x4(%ecx),%esp
a1c: c3 ret
Execution crashes on instrumented code at line:
990: c7 83 00 00 00 20 f1 movl $0xf1f1f1f1,0x20000000(%ebx)
before increment(int &x) function is called.
ASAN option "stack-use-after-return" was enabled.
The code was compiled with:
gcc -O0 -g -fsanitize=address main.cpp
If the integer variable x is defined as a global variable the, code doesn't get instrumented and crash does not happen.
Before I posted my question, I found this question, that is very similar to my problem with address sanitizer.
So my question would be:
Why the execution of the code crashed at the mentioned line?
Is it possible that the instrumentation went wrong at some point?
Edit
GCC version and configure flags
Configured with:
../gcc-6.3.0/configure --prefix=/opt/V6.3.0 --target=i686-elf --with-pic
--with-newlib --enable-fully-dynamic-string --enable-languages=c,c++
--disable-initfini-array --disable-nls --disable-shared --disable-multilib
--disable-threads --disable-tls --disable-win32-registry --enable-sjlj-
exceptions --enable-frame-pointer --disable-__cxa_atexit --disable-libgomp
--disable-libquadmath --disable-libssp --disable-libada --disable-libitm
--disable-libstdcxx-verbose --disable-libstdcxx-visibility --with-default-
libstdcxx-abi=gcc4-compatible --without-headers
Thread model: single
gcc version 6.3.0 (GCC)
Can't someone explain to me why the output of this program is [nan, nan]? The code is supposed to load the value of d into the high and low 64-bits of the XMM1 register and then move the contents of XMM1 into a. Because a is not initialized to a set of specific values, D initializes each element to nan. If the movupd instruction was not in the objdump, I would understand the result, but the instruction is there. Thoughts?
import std.stdio;
void main()
{
enum double d = 1.0 / cast(double)2;
double[] a = new double[2];
auto aptr = a.ptr;
asm
{
movddup XMM1, d;
movupd [aptr], XMM1;
}
writeln(a);
}
Here is the objdump of the main function:
0000000000426b88 <_Dmain>:
426b88: 55 push %rbp
426b89: 48 8b ec mov %rsp,%rbp
426b8c: 48 83 ec 50 sub $0x50,%rsp
426b90: f2 48 0f 10 05 77 81 rex.W movsd 0x28177(%rip),%xmm0
426b97: 02 00
426b99: f2 48 0f 11 45 b0 rex.W movsd %xmm0,-0x50(%rbp)
426b9f: 48 be 02 00 00 00 00 movabs $0x2,%rsi
426ba6: 00 00 00
426ba9: f2 48 0f 10 05 66 81 rex.W movsd 0x28166(%rip),%xmm0
426bb0: 02 00
426bb2: 48 8d 7d c0 lea -0x40(%rbp),%rdi
426bb6: e8 65 d1 00 00 callq 433d20 <_memsetDouble>
426bbb: f2 48 0f 10 0d 4c 81 rex.W movsd 0x2814c(%rip),%xmm1
426bc2: 02 00
426bc4: f2 48 0f 11 4d c0 rex.W movsd %xmm1,-0x40(%rbp)
426bca: f2 48 0f 10 15 3d 81 rex.W movsd 0x2813d(%rip),%xmm2
426bd1: 02 00
426bd3: f2 48 0f 11 55 c8 rex.W movsd %xmm2,-0x38(%rbp)
426bd9: 48 8d 45 c0 lea -0x40(%rbp),%rax
426bdd: 48 89 45 d0 mov %rax,-0x30(%rbp)
426be1: 48 8d 55 e0 lea -0x20(%rbp),%rdx
426be5: 48 b8 02 00 00 00 00 movabs $0x2,%rax
426bec: 00 00 00
426bef: 48 89 c1 mov %rax,%rcx
426bf2: 49 89 d0 mov %rdx,%r8
426bf5: 51 push %rcx
426bf6: 41 50 push %r8
426bf8: 48 be 02 00 00 00 00 movabs $0x2,%rsi
426bff: 00 00 00
426c02: 48 bf c0 84 65 00 00 movabs $0x6584c0,%rdi
426c09: 00 00 00
426c0c: e8 87 ce 00 00 callq 433a98 <_d_arrayliteralTX>
426c11: 48 89 45 f0 mov %rax,-0x10(%rbp)
426c15: f2 48 0f 10 05 02 81 rex.W movsd 0x28102(%rip),%xmm0
426c1c: 02 00
426c1e: f2 48 0f 11 00 rex.W movsd %xmm0,(%rax)
426c23: f2 48 0f 10 0d f4 80 rex.W movsd 0x280f4(%rip),%xmm1
426c2a: 02 00
426c2c: 48 8b 45 f0 mov -0x10(%rbp),%rax
426c30: f2 48 0f 11 48 08 rex.W movsd %xmm1,0x8(%rax)
426c36: 48 8b 55 f0 mov -0x10(%rbp),%rdx
426c3a: 48 be 02 00 00 00 00 movabs $0x2,%rsi
426c41: 00 00 00
426c44: 41 58 pop %r8
426c46: 59 pop %rcx
426c47: 48 bf 08 00 00 00 00 movabs $0x8,%rdi
426c4e: 00 00 00
426c51: e8 8e 95 00 00 callq 4301e4 <_d_arraycopy>
426c56: f2 0f 12 4d b0 movddup -0x50(%rbp),%xmm1
426c5b: 66 0f 11 4d d0 movupd %xmm1,-0x30(%rbp)
426c60: ff 75 c8 pushq -0x38(%rbp)
426c63: ff 75 c0 pushq -0x40(%rbp)
426c66: e8 09 00 00 00 callq 426c74 <_D3std5stdio16__T7writelnTG2dZ7writelnFG2dZv>
426c6b: 48 83 c4 10 add $0x10,%rsp
426c6f: 31 c0 xor %eax,%eax
426c71: c9 leaveq
426c72: c3 retq
426c73: 90 nop
I looked into it, and apparently the compiler decides that by movupd [aptr], XMM1 you really mean movupd aptr, XMM1. Loading aptr into a register beforehand (mov aptr, RAX; movupd [RAX], XMM1) will make it work.
You should probably file a bug report.