clang generated code for a simple factorial function - c++

Here is the code :
unsigned int factorial(unsigned int n) {
if (n == 0) return 1;
return n * factorial(n - 1);
}
int main() {
return factorial(0);
}
I generate both gcc and clang assemblies using
g++ -g -O2 -c factorial.cpp (resp clang++)
objdump -d -M intel -S factorial.o
Here is what I get for gcc
factorial.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_Z9factorialj>:
unsigned int factorial(unsigned int n)
{
if (n == 0)
0: 85 ff test edi,edi
2: b8 01 00 00 00 mov eax,0x1
7: 74 11 je 1a <_Z9factorialj+0x1a>
9: 0f 1f 80 00 00 00 00 nop DWORD PTR [rax+0x0]
10: 0f af c7 imul eax,edi
13: 83 ef 01 sub edi,0x1
16: 75 f8 jne 10 <_Z9factorialj+0x10>
18: f3 c3 repz ret
{
return 1;
}
return n * factorial(n - 1);
}
1a: f3 c3 repz ret
Disassembly of section .text.startup:
0000000000000000 <main>:
if (n == 0)
0: b8 01 00 00 00 mov eax,0x1
5: c3 ret
and for clang
factorial.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_Z9factorialj>:
unsigned int factorial(unsigned int n)
{
0: b8 01 00 00 00 mov eax,0x1
if (n == 0)
5: 85 ff test edi,edi
7: 0f 84 ba 01 00 00 je 1c7 <_Z9factorialj+0x1c7>
d: b8 01 00 00 00 mov eax,0x1
{
return 1;
}
return n * factorial(n - 1);
12: 83 ff 08 cmp edi,0x8
15: 0f 82 a5 01 00 00 jb 1c0 <_Z9factorialj+0x1c0>
1b: 41 89 f8 mov r8d,edi
1e: 41 83 e0 f8 and r8d,0xfffffff8
22: 89 fa mov edx,edi
24: 83 e2 f8 and edx,0xfffffff8
27: 0f 84 93 01 00 00 je 1c0 <_Z9factorialj+0x1c0>
2d: 8d 4f f8 lea ecx,[rdi-0x8]
30: 89 c8 mov eax,ecx
32: c1 e8 03 shr eax,0x3
35: 0f ba e1 03 bt ecx,0x3
39: 72 24 jb 5f <_Z9factorialj+0x5f>
3b: 66 0f 6e c7 movd xmm0,edi
3f: 66 0f 70 c8 00 pshufd xmm1,xmm0,0x0
44: 66 0f 6f 05 00 00 00 movdqa xmm0,XMMWORD PTR [rip+0x0] # 4c <_Z9factorialj+0x4c>
4b: 00
4c: 66 0f fe c1 paddd xmm0,xmm1
50: 66 0f fe 0d 00 00 00 paddd xmm1,XMMWORD PTR [rip+0x0] # 58 <_Z9factorialj+0x58>
57: 00
58: b9 08 00 00 00 mov ecx,0x8
5d: eb 0e jmp 6d <_Z9factorialj+0x6d>
5f: 66 0f 6f 05 00 00 00 movdqa xmm0,XMMWORD PTR [rip+0x0] # 67 <_Z9factorialj+0x67>
66: 00
67: 31 c9 xor ecx,ecx
69: 66 0f 6f c8 movdqa xmm1,xmm0
6d: 85 c0 test eax,eax
6f: 0f 84 d4 00 00 00 je 149 <_Z9factorialj+0x149>
75: 89 d0 mov eax,edx
77: 29 c8 sub eax,ecx
79: 89 fe mov esi,edi
7b: 29 ce sub esi,ecx
7d: 66 0f 6f 15 00 00 00 movdqa xmm2,XMMWORD PTR [rip+0x0] # 85 <_Z9factorialj+0x85>
84: 00
85: 66 0f 6f 1d 00 00 00 movdqa xmm3,XMMWORD PTR [rip+0x0] # 8d <_Z9factorialj+0x8d>
8c: 00
8d: 0f 1f 00 nop DWORD PTR [rax]
90: 66 0f 6e e6 movd xmm4,esi
94: 66 0f 70 e4 00 pshufd xmm4,xmm4,0x0
99: 66 0f 6f ec movdqa xmm5,xmm4
9d: 66 0f fe ea paddd xmm5,xmm2
a1: 66 0f fe e3 paddd xmm4,xmm3
a5: 66 0f 70 f5 f5 pshufd xmm6,xmm5,0xf5
aa: 66 0f f4 e8 pmuludq xmm5,xmm0
ae: 66 0f 70 ed e8 pshufd xmm5,xmm5,0xe8
b3: 66 0f 70 c0 f5 pshufd xmm0,xmm0,0xf5
b8: 66 0f f4 c6 pmuludq xmm0,xmm6
bc: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
c1: 66 0f 62 e8 punpckldq xmm5,xmm0
c5: 66 0f 70 c4 f5 pshufd xmm0,xmm4,0xf5
ca: 66 0f f4 e1 pmuludq xmm4,xmm1
ce: 66 0f 70 e4 e8 pshufd xmm4,xmm4,0xe8
d3: 66 0f 70 c9 f5 pshufd xmm1,xmm1,0xf5
d8: 66 0f f4 c8 pmuludq xmm1,xmm0
dc: 66 0f 70 c1 e8 pshufd xmm0,xmm1,0xe8
e1: 66 0f 62 e0 punpckldq xmm4,xmm0
e5: 8d 4e f8 lea ecx,[rsi-0x8]
e8: 66 0f 6e c1 movd xmm0,ecx
ec: 66 0f 70 c8 00 pshufd xmm1,xmm0,0x0
f1: 66 0f 6f c1 movdqa xmm0,xmm1
f5: 66 0f fe c2 paddd xmm0,xmm2
f9: 66 0f fe cb paddd xmm1,xmm3
fd: 66 0f 70 f0 f5 pshufd xmm6,xmm0,0xf5
102: 66 0f f4 c5 pmuludq xmm0,xmm5
106: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
10b: 66 0f 70 ed f5 pshufd xmm5,xmm5,0xf5
110: 66 0f f4 ee pmuludq xmm5,xmm6
114: 66 0f 70 ed e8 pshufd xmm5,xmm5,0xe8
119: 66 0f 62 c5 punpckldq xmm0,xmm5
11d: 66 0f 70 e9 f5 pshufd xmm5,xmm1,0xf5
122: 66 0f f4 cc pmuludq xmm1,xmm4
126: 66 0f 70 c9 e8 pshufd xmm1,xmm1,0xe8
12b: 66 0f 70 e4 f5 pshufd xmm4,xmm4,0xf5
130: 66 0f f4 e5 pmuludq xmm4,xmm5
134: 66 0f 70 e4 e8 pshufd xmm4,xmm4,0xe8
139: 66 0f 62 cc punpckldq xmm1,xmm4
13d: 83 c6 f0 add esi,0xfffffff0
140: 83 c0 f0 add eax,0xfffffff0
143: 0f 85 47 ff ff ff jne 90 <_Z9factorialj+0x90>
149: 66 0f 70 d1 f5 pshufd xmm2,xmm1,0xf5
14e: 66 0f f4 c8 pmuludq xmm1,xmm0
152: 66 0f 70 c9 e8 pshufd xmm1,xmm1,0xe8
157: 66 0f 70 c0 f5 pshufd xmm0,xmm0,0xf5
15c: 66 0f f4 c2 pmuludq xmm0,xmm2
160: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
165: 66 0f 62 c8 punpckldq xmm1,xmm0
169: 66 0f 70 c1 4e pshufd xmm0,xmm1,0x4e
16e: 66 0f 70 d1 f5 pshufd xmm2,xmm1,0xf5
173: 66 0f f4 c8 pmuludq xmm1,xmm0
177: 66 0f 70 c9 e8 pshufd xmm1,xmm1,0xe8
17c: 66 0f 70 c0 f5 pshufd xmm0,xmm0,0xf5
181: 66 0f f4 c2 pmuludq xmm0,xmm2
185: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
18a: 66 0f 62 c8 punpckldq xmm1,xmm0
18e: 66 0f 70 c1 e5 pshufd xmm0,xmm1,0xe5
193: 66 0f 70 d1 f5 pshufd xmm2,xmm1,0xf5
198: 66 0f f4 c8 pmuludq xmm1,xmm0
19c: 66 0f 70 c9 e8 pshufd xmm1,xmm1,0xe8
1a1: 66 0f 70 c0 f5 pshufd xmm0,xmm0,0xf5
1a6: 66 0f f4 c2 pmuludq xmm0,xmm2
1aa: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
1af: 66 0f 62 c8 punpckldq xmm1,xmm0
1b3: 66 0f 7e c8 movd eax,xmm1
1b7: 39 fa cmp edx,edi
1b9: 74 0c je 1c7 <_Z9factorialj+0x1c7>
1bb: 44 29 c7 sub edi,r8d
1be: 66 90 xchg ax,ax
1c0: 0f af c7 imul eax,edi
if (n == 0)
1c3: ff cf dec edi
1c5: 75 f9 jne 1c0 <_Z9factorialj+0x1c0>
}
1c7: c3 ret
1c8: 0f 1f 84 00 00 00 00 nop DWORD PTR [rax+rax*1+0x0]
1cf: 00
00000000000001d0 <main>:
int main()
{
return factorial(0);
1d0: b8 01 00 00 00 mov eax,0x1
1d5: c3 ret
I understand they both notice they can unroll the loop and precompute the value but I don't get why clang generates all this code (and I have no idea what this can be ever doing and why there is sse stuff...)
Bonus question: while gcc precomputes the value up to factorial(7), clang computes it for any value.
Up to factorial(31) the values look fine, for factorial(32) and factorial(33) it returns 0x80000000 and for greater values it does xor eax, eax instead. What is this black magic ?

Related

GDB: Follow exception flow

Consider the following MWE:
#include <stdexcept>
#include <iostream>
int main() {
int a = 2;
try {
throw std::runtime_error("Example");
a = 4;
} catch (const std::exception &) {
a = 5;
}
std::cout << a << std::endl;
}
If I compile this with g++ -g -O0 test.cpp and then run the resulting binary through gdb via gdb ./a.out I can start stepping through the program. However, as soon as I reach the line that throws the exception inputting next or step does not lead me to the line a = 5 but instead immediately prints 5 to the console and then gdb tells me that the process has exited normally.
This is quite annoying as oftentimes it would actually be useful to figure out where the thrown exception is handled.
Is there a way to get gdb to follow the full program flow even when exceptions are involved?
My GDB version is GNU gdb (Ubuntu 12.1-0ubuntu1~22.04) 12.1
EDIT: To ensure that the thrown exception has not been optimized out of my binary, I checked the respective assembly via objdump and if interpret that corretly, throwing the exception is still part of the binary:
objdump -drwC -Mintel a.out
a.out: file format elf64-x86-64
Disassembly of section .init:
0000000000001000 <_init>:
1000: f3 0f 1e fa endbr64
1004: 48 83 ec 08 sub rsp,0x8
1008: 48 8b 05 d1 2f 00 00 mov rax,QWORD PTR [rip+0x2fd1] # 3fe0 <__gmon_start__#Base>
100f: 48 85 c0 test rax,rax
1012: 74 02 je 1016 <_init+0x16>
1014: ff d0 call rax
1016: 48 83 c4 08 add rsp,0x8
101a: c3 ret
Disassembly of section .plt:
0000000000001020 <.plt>:
1020: ff 35 2a 2f 00 00 push QWORD PTR [rip+0x2f2a] # 3f50 <_GLOBAL_OFFSET_TABLE_+0x8>
1026: f2 ff 25 2b 2f 00 00 bnd jmp QWORD PTR [rip+0x2f2b] # 3f58 <_GLOBAL_OFFSET_TABLE_+0x10>
102d: 0f 1f 00 nop DWORD PTR [rax]
1030: f3 0f 1e fa endbr64
1034: 68 00 00 00 00 push 0x0
1039: f2 e9 e1 ff ff ff bnd jmp 1020 <_init+0x20>
103f: 90 nop
1040: f3 0f 1e fa endbr64
1044: 68 01 00 00 00 push 0x1
1049: f2 e9 d1 ff ff ff bnd jmp 1020 <_init+0x20>
104f: 90 nop
1050: f3 0f 1e fa endbr64
1054: 68 02 00 00 00 push 0x2
1059: f2 e9 c1 ff ff ff bnd jmp 1020 <_init+0x20>
105f: 90 nop
1060: f3 0f 1e fa endbr64
1064: 68 03 00 00 00 push 0x3
1069: f2 e9 b1 ff ff ff bnd jmp 1020 <_init+0x20>
106f: 90 nop
1070: f3 0f 1e fa endbr64
1074: 68 04 00 00 00 push 0x4
1079: f2 e9 a1 ff ff ff bnd jmp 1020 <_init+0x20>
107f: 90 nop
1080: f3 0f 1e fa endbr64
1084: 68 05 00 00 00 push 0x5
1089: f2 e9 91 ff ff ff bnd jmp 1020 <_init+0x20>
108f: 90 nop
1090: f3 0f 1e fa endbr64
1094: 68 06 00 00 00 push 0x6
1099: f2 e9 81 ff ff ff bnd jmp 1020 <_init+0x20>
109f: 90 nop
10a0: f3 0f 1e fa endbr64
10a4: 68 07 00 00 00 push 0x7
10a9: f2 e9 71 ff ff ff bnd jmp 1020 <_init+0x20>
10af: 90 nop
10b0: f3 0f 1e fa endbr64
10b4: 68 08 00 00 00 push 0x8
10b9: f2 e9 61 ff ff ff bnd jmp 1020 <_init+0x20>
10bf: 90 nop
10c0: f3 0f 1e fa endbr64
10c4: 68 09 00 00 00 push 0x9
10c9: f2 e9 51 ff ff ff bnd jmp 1020 <_init+0x20>
10cf: 90 nop
10d0: f3 0f 1e fa endbr64
10d4: 68 0a 00 00 00 push 0xa
10d9: f2 e9 41 ff ff ff bnd jmp 1020 <_init+0x20>
10df: 90 nop
Disassembly of section .plt.got:
00000000000010e0 <__cxa_finalize#plt>:
10e0: f3 0f 1e fa endbr64
10e4: f2 ff 25 cd 2e 00 00 bnd jmp QWORD PTR [rip+0x2ecd] # 3fb8 <__cxa_finalize#GLIBC_2.2.5>
10eb: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
Disassembly of section .plt.sec:
00000000000010f0 <std::runtime_error::runtime_error(char const*)#plt>:
10f0: f3 0f 1e fa endbr64
10f4: f2 ff 25 65 2e 00 00 bnd jmp QWORD PTR [rip+0x2e65] # 3f60 <std::runtime_error::runtime_error(char const*)#GLIBCXX_3.4.21>
10fb: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001100 <__cxa_begin_catch#plt>:
1100: f3 0f 1e fa endbr64
1104: f2 ff 25 5d 2e 00 00 bnd jmp QWORD PTR [rip+0x2e5d] # 3f68 <__cxa_begin_catch#CXXABI_1.3>
110b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001110 <__cxa_allocate_exception#plt>:
1110: f3 0f 1e fa endbr64
1114: f2 ff 25 55 2e 00 00 bnd jmp QWORD PTR [rip+0x2e55] # 3f70 <__cxa_allocate_exception#CXXABI_1.3>
111b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001120 <__cxa_free_exception#plt>:
1120: f3 0f 1e fa endbr64
1124: f2 ff 25 4d 2e 00 00 bnd jmp QWORD PTR [rip+0x2e4d] # 3f78 <__cxa_free_exception#CXXABI_1.3>
112b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001130 <__cxa_atexit#plt>:
1130: f3 0f 1e fa endbr64
1134: f2 ff 25 45 2e 00 00 bnd jmp QWORD PTR [rip+0x2e45] # 3f80 <__cxa_atexit#GLIBC_2.2.5>
113b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001140 <std::ostream::operator<<(std::ostream& (*)(std::ostream&))#plt>:
1140: f3 0f 1e fa endbr64
1144: f2 ff 25 3d 2e 00 00 bnd jmp QWORD PTR [rip+0x2e3d] # 3f88 <std::ostream::operator<<(std::ostream& (*)(std::ostream&))#GLIBCXX_3.4>
114b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001150 <std::ios_base::Init::Init()#plt>:
1150: f3 0f 1e fa endbr64
1154: f2 ff 25 35 2e 00 00 bnd jmp QWORD PTR [rip+0x2e35] # 3f90 <std::ios_base::Init::Init()#GLIBCXX_3.4>
115b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001160 <__cxa_end_catch#plt>:
1160: f3 0f 1e fa endbr64
1164: f2 ff 25 2d 2e 00 00 bnd jmp QWORD PTR [rip+0x2e2d] # 3f98 <__cxa_end_catch#CXXABI_1.3>
116b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001170 <__cxa_throw#plt>:
1170: f3 0f 1e fa endbr64
1174: f2 ff 25 25 2e 00 00 bnd jmp QWORD PTR [rip+0x2e25] # 3fa0 <__cxa_throw#CXXABI_1.3>
117b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001180 <std::ostream::operator<<(int)#plt>:
1180: f3 0f 1e fa endbr64
1184: f2 ff 25 1d 2e 00 00 bnd jmp QWORD PTR [rip+0x2e1d] # 3fa8 <std::ostream::operator<<(int)#GLIBCXX_3.4>
118b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000001190 <_Unwind_Resume#plt>:
1190: f3 0f 1e fa endbr64
1194: f2 ff 25 15 2e 00 00 bnd jmp QWORD PTR [rip+0x2e15] # 3fb0 <_Unwind_Resume#GCC_3.0>
119b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
Disassembly of section .text:
00000000000011a0 <_start>:
11a0: f3 0f 1e fa endbr64
11a4: 31 ed xor ebp,ebp
11a6: 49 89 d1 mov r9,rdx
11a9: 5e pop rsi
11aa: 48 89 e2 mov rdx,rsp
11ad: 48 83 e4 f0 and rsp,0xfffffffffffffff0
11b1: 50 push rax
11b2: 54 push rsp
11b3: 45 31 c0 xor r8d,r8d
11b6: 31 c9 xor ecx,ecx
11b8: 48 8d 3d ca 00 00 00 lea rdi,[rip+0xca] # 1289 <main>
11bf: ff 15 03 2e 00 00 call QWORD PTR [rip+0x2e03] # 3fc8 <__libc_start_main#GLIBC_2.34>
11c5: f4 hlt
11c6: 66 2e 0f 1f 84 00 00 00 00 00 cs nop WORD PTR [rax+rax*1+0x0]
00000000000011d0 <deregister_tm_clones>:
11d0: 48 8d 3d 49 2e 00 00 lea rdi,[rip+0x2e49] # 4020 <__TMC_END__>
11d7: 48 8d 05 42 2e 00 00 lea rax,[rip+0x2e42] # 4020 <__TMC_END__>
11de: 48 39 f8 cmp rax,rdi
11e1: 74 15 je 11f8 <deregister_tm_clones+0x28>
11e3: 48 8b 05 ee 2d 00 00 mov rax,QWORD PTR [rip+0x2dee] # 3fd8 <_ITM_deregisterTMCloneTable#Base>
11ea: 48 85 c0 test rax,rax
11ed: 74 09 je 11f8 <deregister_tm_clones+0x28>
11ef: ff e0 jmp rax
11f1: 0f 1f 80 00 00 00 00 nop DWORD PTR [rax+0x0]
11f8: c3 ret
11f9: 0f 1f 80 00 00 00 00 nop DWORD PTR [rax+0x0]
0000000000001200 <register_tm_clones>:
1200: 48 8d 3d 19 2e 00 00 lea rdi,[rip+0x2e19] # 4020 <__TMC_END__>
1207: 48 8d 35 12 2e 00 00 lea rsi,[rip+0x2e12] # 4020 <__TMC_END__>
120e: 48 29 fe sub rsi,rdi
1211: 48 89 f0 mov rax,rsi
1214: 48 c1 ee 3f shr rsi,0x3f
1218: 48 c1 f8 03 sar rax,0x3
121c: 48 01 c6 add rsi,rax
121f: 48 d1 fe sar rsi,1
1222: 74 14 je 1238 <register_tm_clones+0x38>
1224: 48 8b 05 bd 2d 00 00 mov rax,QWORD PTR [rip+0x2dbd] # 3fe8 <_ITM_registerTMCloneTable#Base>
122b: 48 85 c0 test rax,rax
122e: 74 08 je 1238 <register_tm_clones+0x38>
1230: ff e0 jmp rax
1232: 66 0f 1f 44 00 00 nop WORD PTR [rax+rax*1+0x0]
1238: c3 ret
1239: 0f 1f 80 00 00 00 00 nop DWORD PTR [rax+0x0]
0000000000001240 <__do_global_dtors_aux>:
1240: f3 0f 1e fa endbr64
1244: 80 3d 05 2f 00 00 00 cmp BYTE PTR [rip+0x2f05],0x0 # 4150 <completed.0>
124b: 75 2b jne 1278 <__do_global_dtors_aux+0x38>
124d: 55 push rbp
124e: 48 83 3d 62 2d 00 00 00 cmp QWORD PTR [rip+0x2d62],0x0 # 3fb8 <__cxa_finalize#GLIBC_2.2.5>
1256: 48 89 e5 mov rbp,rsp
1259: 74 0c je 1267 <__do_global_dtors_aux+0x27>
125b: 48 8b 3d a6 2d 00 00 mov rdi,QWORD PTR [rip+0x2da6] # 4008 <__dso_handle>
1262: e8 79 fe ff ff call 10e0 <__cxa_finalize#plt>
1267: e8 64 ff ff ff call 11d0 <deregister_tm_clones>
126c: c6 05 dd 2e 00 00 01 mov BYTE PTR [rip+0x2edd],0x1 # 4150 <completed.0>
1273: 5d pop rbp
1274: c3 ret
1275: 0f 1f 00 nop DWORD PTR [rax]
1278: c3 ret
1279: 0f 1f 80 00 00 00 00 nop DWORD PTR [rax+0x0]
0000000000001280 <frame_dummy>:
1280: f3 0f 1e fa endbr64
1284: e9 77 ff ff ff jmp 1200 <register_tm_clones>
0000000000001289 <main>:
1289: f3 0f 1e fa endbr64
128d: 55 push rbp
128e: 48 89 e5 mov rbp,rsp
1291: 41 55 push r13
1293: 41 54 push r12
1295: 53 push rbx
1296: 48 83 ec 18 sub rsp,0x18
129a: c7 45 d4 02 00 00 00 mov DWORD PTR [rbp-0x2c],0x2
12a1: bf 10 00 00 00 mov edi,0x10
12a6: e8 65 fe ff ff call 1110 <__cxa_allocate_exception#plt>
12ab: 48 89 c3 mov rbx,rax
12ae: 48 8d 05 4f 0d 00 00 lea rax,[rip+0xd4f] # 2004 <_IO_stdin_used+0x4>
12b5: 48 89 c6 mov rsi,rax
12b8: 48 89 df mov rdi,rbx
12bb: e8 30 fe ff ff call 10f0 <std::runtime_error::runtime_error(char const*)#plt>
12c0: 48 8b 05 09 2d 00 00 mov rax,QWORD PTR [rip+0x2d09] # 3fd0 <std::runtime_error::~runtime_error()#GLIBCXX_3.4>
12c7: 48 89 c2 mov rdx,rax
12ca: 48 8d 05 4f 2a 00 00 lea rax,[rip+0x2a4f] # 3d20 <typeinfo for std::runtime_error#GLIBCXX_3.4>
12d1: 48 89 c6 mov rsi,rax
12d4: 48 89 df mov rdi,rbx
12d7: e8 94 fe ff ff call 1170 <__cxa_throw#plt>
12dc: f3 0f 1e fa endbr64
12e0: 49 89 c5 mov r13,rax
12e3: 49 89 d4 mov r12,rdx
12e6: 48 89 df mov rdi,rbx
12e9: e8 32 fe ff ff call 1120 <__cxa_free_exception#plt>
12ee: 4c 89 e8 mov rax,r13
12f1: 4c 89 e2 mov rdx,r12
12f4: eb 04 jmp 12fa <main+0x71>
12f6: f3 0f 1e fa endbr64
12fa: 48 83 fa 01 cmp rdx,0x1
12fe: 74 08 je 1308 <main+0x7f>
1300: 48 89 c7 mov rdi,rax
1303: e8 88 fe ff ff call 1190 <_Unwind_Resume#plt>
1308: 48 89 c7 mov rdi,rax
130b: e8 f0 fd ff ff call 1100 <__cxa_begin_catch#plt>
1310: 48 89 45 d8 mov QWORD PTR [rbp-0x28],rax
1314: c7 45 d4 05 00 00 00 mov DWORD PTR [rbp-0x2c],0x5
131b: e8 40 fe ff ff call 1160 <__cxa_end_catch#plt>
1320: 8b 45 d4 mov eax,DWORD PTR [rbp-0x2c]
1323: 89 c6 mov esi,eax
1325: 48 8d 05 14 2d 00 00 lea rax,[rip+0x2d14] # 4040 <std::cout#GLIBCXX_3.4>
132c: 48 89 c7 mov rdi,rax
132f: e8 4c fe ff ff call 1180 <std::ostream::operator<<(int)#plt>
1334: 48 8b 15 85 2c 00 00 mov rdx,QWORD PTR [rip+0x2c85] # 3fc0 <std::basic_ostream<char, std::char_traits<char> >& std::endl<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&)#GLIBCXX_3.4>
133b: 48 89 d6 mov rsi,rdx
133e: 48 89 c7 mov rdi,rax
1341: e8 fa fd ff ff call 1140 <std::ostream::operator<<(std::ostream& (*)(std::ostream&))#plt>
1346: b8 00 00 00 00 mov eax,0x0
134b: 48 83 c4 18 add rsp,0x18
134f: 5b pop rbx
1350: 41 5c pop r12
1352: 41 5d pop r13
1354: 5d pop rbp
1355: c3 ret
0000000000001356 <__static_initialization_and_destruction_0(int, int)>:
1356: f3 0f 1e fa endbr64
135a: 55 push rbp
135b: 48 89 e5 mov rbp,rsp
135e: 48 83 ec 10 sub rsp,0x10
1362: 89 7d fc mov DWORD PTR [rbp-0x4],edi
1365: 89 75 f8 mov DWORD PTR [rbp-0x8],esi
1368: 83 7d fc 01 cmp DWORD PTR [rbp-0x4],0x1
136c: 75 3b jne 13a9 <__static_initialization_and_destruction_0(int, int)+0x53>
136e: 81 7d f8 ff ff 00 00 cmp DWORD PTR [rbp-0x8],0xffff
1375: 75 32 jne 13a9 <__static_initialization_and_destruction_0(int, int)+0x53>
1377: 48 8d 05 d3 2d 00 00 lea rax,[rip+0x2dd3] # 4151 <std::__ioinit>
137e: 48 89 c7 mov rdi,rax
1381: e8 ca fd ff ff call 1150 <std::ios_base::Init::Init()#plt>
1386: 48 8d 05 7b 2c 00 00 lea rax,[rip+0x2c7b] # 4008 <__dso_handle>
138d: 48 89 c2 mov rdx,rax
1390: 48 8d 05 ba 2d 00 00 lea rax,[rip+0x2dba] # 4151 <std::__ioinit>
1397: 48 89 c6 mov rsi,rax
139a: 48 8b 05 4f 2c 00 00 mov rax,QWORD PTR [rip+0x2c4f] # 3ff0 <std::ios_base::Init::~Init()#GLIBCXX_3.4>
13a1: 48 89 c7 mov rdi,rax
13a4: e8 87 fd ff ff call 1130 <__cxa_atexit#plt>
13a9: 90 nop
13aa: c9 leave
13ab: c3 ret
00000000000013ac <_GLOBAL__sub_I_main>:
13ac: f3 0f 1e fa endbr64
13b0: 55 push rbp
13b1: 48 89 e5 mov rbp,rsp
13b4: be ff ff 00 00 mov esi,0xffff
13b9: bf 01 00 00 00 mov edi,0x1
13be: e8 93 ff ff ff call 1356 <__static_initialization_and_destruction_0(int, int)>
13c3: 5d pop rbp
13c4: c3 ret
Disassembly of section .fini:
00000000000013c8 <_fini>:
13c8: f3 0f 1e fa endbr64
13cc: 48 83 ec 08 sub rsp,0x8
13d0: 48 83 c4 08 add rsp,0x8
13d4: c3 ret

Why is g++ using jle instead of jbe for two unsigned numbers?

In a case where I get this error:
error: assuming signed overflow does not occur when simplifying conditional
I looked at the assembly and the if() uses:
d34: 48 83 fa 01 cmp $0x1,%rdx
d38: 7e 54 jle d8e <main+0x3ae>
Interestingly enough, %rdx is defined as an unsigned (std::size_t) and the number $0x1 is also defined as an unsigned (2UL in the original). So why would g++ decide to use jle instead of jbe?
Note: Just in case, I tried with if(colons > 1UL) ... and that did not help. Same error, same results in assembly.
C++ code to reproduce the error:
#include <algorithm>
#include <string>
#include <iostream>
int main(int argc, char * argv[])
{
std::string const in(argv[1]);
std::size_t const colons(std::count(in.begin(), in.end(), ':'));
if(colons >= 2UL)
{
std::cerr << "2 or more colons...\n";
}
else
{
std::cerr << "no or just one colon.\n";
}
return 0;
}
Command line used to reproduce the error:
g++ -Werror=strict-overflow -std=c++17 -O3 -o a a.cpp
To compile anyway, just don't use the -Werror=strict-overflow option.
The complete result (this is a bit of a killer since the std::count() gets overly optimized for speed):
00000000000009e0 <main>:
9e0: 41 55 push %r13
9e2: 41 54 push %r12
9e4: 55 push %rbp
9e5: 53 push %rbx
9e6: 48 83 ec 38 sub $0x38,%rsp
9ea: 4c 8b 66 08 mov 0x8(%rsi),%r12
9ee: 48 89 e3 mov %rsp,%rbx
9f1: 4c 8d 6b 10 lea 0x10(%rbx),%r13
9f5: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
9fc: 00 00
9fe: 48 89 44 24 28 mov %rax,0x28(%rsp)
a03: 31 c0 xor %eax,%eax
a05: 4d 85 e4 test %r12,%r12
a08: 4c 89 2c 24 mov %r13,(%rsp)
a0c: 0f 84 e8 03 00 00 je dfa <main+0x41a>
a12: 4c 89 e7 mov %r12,%rdi
a15: e8 16 ff ff ff callq 930 <strlen#plt>
a1a: 48 83 f8 0f cmp $0xf,%rax
a1e: 48 89 c5 mov %rax,%rbp
a21: 0f 87 7c 03 00 00 ja da3 <main+0x3c3>
a27: 48 83 f8 01 cmp $0x1,%rax
a2b: 0f 84 4f 03 00 00 je d80 <main+0x3a0>
a31: 48 85 c0 test %rax,%rax
a34: 0f 85 cc 03 00 00 jne e06 <main+0x426>
a3a: 48 8b 04 24 mov (%rsp),%rax
a3e: 48 89 6c 24 08 mov %rbp,0x8(%rsp)
a43: c6 04 28 00 movb $0x0,(%rax,%rbp,1)
a47: 48 8b 04 24 mov (%rsp),%rax
a4b: 48 8b 54 24 08 mov 0x8(%rsp),%rdx
a50: 48 8d 34 10 lea (%rax,%rdx,1),%rsi
a54: 48 39 f0 cmp %rsi,%rax
a57: 0f 84 31 03 00 00 je d8e <main+0x3ae>
a5d: 48 89 c1 mov %rax,%rcx
a60: 49 89 f1 mov %rsi,%r9
a63: 48 83 ea 01 sub $0x1,%rdx
a67: 48 f7 d9 neg %rcx
a6a: 49 29 c1 sub %rax,%r9
a6d: 41 ba 12 00 00 00 mov $0x12,%r10d
a73: 83 e1 0f and $0xf,%ecx
a76: 48 8d 78 01 lea 0x1(%rax),%rdi
a7a: 4c 8d 41 0f lea 0xf(%rcx),%r8
a7e: 49 83 f8 12 cmp $0x12,%r8
a82: 4d 0f 42 c2 cmovb %r10,%r8
a86: 4c 39 c2 cmp %r8,%rdx
a89: 0f 82 4b 03 00 00 jb dda <main+0x3fa>
a8f: 48 85 c9 test %rcx,%rcx
a92: 0f 84 52 03 00 00 je dea <main+0x40a>
a98: 45 31 d2 xor %r10d,%r10d
a9b: 80 38 3a cmpb $0x3a,(%rax)
a9e: 41 0f 94 c2 sete %r10b
aa2: 48 83 f9 01 cmp $0x1,%rcx
aa6: 0f 84 34 01 00 00 je be0 <main+0x200>
aac: 80 78 01 3a cmpb $0x3a,0x1(%rax)
ab0: 75 04 jne ab6 <main+0xd6>
ab2: 49 83 c2 01 add $0x1,%r10
ab6: 48 83 f9 02 cmp $0x2,%rcx
aba: 48 8d 78 02 lea 0x2(%rax),%rdi
abe: 0f 84 1c 01 00 00 je be0 <main+0x200>
ac4: 80 78 02 3a cmpb $0x3a,0x2(%rax)
ac8: 75 04 jne ace <main+0xee>
aca: 49 83 c2 01 add $0x1,%r10
ace: 48 83 f9 03 cmp $0x3,%rcx
ad2: 48 8d 78 03 lea 0x3(%rax),%rdi
ad6: 0f 84 04 01 00 00 je be0 <main+0x200>
adc: 80 78 03 3a cmpb $0x3a,0x3(%rax)
ae0: 75 04 jne ae6 <main+0x106>
ae2: 49 83 c2 01 add $0x1,%r10
ae6: 48 83 f9 04 cmp $0x4,%rcx
aea: 48 8d 78 04 lea 0x4(%rax),%rdi
aee: 0f 84 ec 00 00 00 je be0 <main+0x200>
af4: 80 78 04 3a cmpb $0x3a,0x4(%rax)
af8: 75 04 jne afe <main+0x11e>
afa: 49 83 c2 01 add $0x1,%r10
afe: 48 83 f9 05 cmp $0x5,%rcx
b02: 48 8d 78 05 lea 0x5(%rax),%rdi
b06: 0f 84 d4 00 00 00 je be0 <main+0x200>
b0c: 80 78 05 3a cmpb $0x3a,0x5(%rax)
b10: 75 04 jne b16 <main+0x136>
b12: 49 83 c2 01 add $0x1,%r10
b16: 48 83 f9 06 cmp $0x6,%rcx
b1a: 48 8d 78 06 lea 0x6(%rax),%rdi
b1e: 0f 84 bc 00 00 00 je be0 <main+0x200>
b24: 80 78 06 3a cmpb $0x3a,0x6(%rax)
b28: 0f 84 9a 02 00 00 je dc8 <main+0x3e8>
b2e: 48 83 f9 07 cmp $0x7,%rcx
b32: 48 8d 78 07 lea 0x7(%rax),%rdi
b36: 0f 84 a4 00 00 00 je be0 <main+0x200>
b3c: 80 78 07 3a cmpb $0x3a,0x7(%rax)
b40: 0f 84 8b 02 00 00 je dd1 <main+0x3f1>
b46: 48 83 f9 08 cmp $0x8,%rcx
b4a: 48 8d 78 08 lea 0x8(%rax),%rdi
b4e: 0f 84 8c 00 00 00 je be0 <main+0x200>
b54: 80 78 08 3a cmpb $0x3a,0x8(%rax)
b58: 75 04 jne b5e <main+0x17e>
b5a: 49 83 c2 01 add $0x1,%r10
b5e: 48 83 f9 09 cmp $0x9,%rcx
b62: 48 8d 78 09 lea 0x9(%rax),%rdi
b66: 74 78 je be0 <main+0x200>
b68: 80 78 09 3a cmpb $0x3a,0x9(%rax)
b6c: 75 04 jne b72 <main+0x192>
b6e: 49 83 c2 01 add $0x1,%r10
b72: 48 83 f9 0a cmp $0xa,%rcx
b76: 48 8d 78 0a lea 0xa(%rax),%rdi
b7a: 74 64 je be0 <main+0x200>
b7c: 80 78 0a 3a cmpb $0x3a,0xa(%rax)
b80: 75 04 jne b86 <main+0x1a6>
b82: 49 83 c2 01 add $0x1,%r10
b86: 48 83 f9 0b cmp $0xb,%rcx
b8a: 48 8d 78 0b lea 0xb(%rax),%rdi
b8e: 74 50 je be0 <main+0x200>
b90: 80 78 0b 3a cmpb $0x3a,0xb(%rax)
b94: 75 04 jne b9a <main+0x1ba>
b96: 49 83 c2 01 add $0x1,%r10
b9a: 48 83 f9 0c cmp $0xc,%rcx
b9e: 48 8d 78 0c lea 0xc(%rax),%rdi
ba2: 74 3c je be0 <main+0x200>
ba4: 80 78 0c 3a cmpb $0x3a,0xc(%rax)
ba8: 75 04 jne bae <main+0x1ce>
baa: 49 83 c2 01 add $0x1,%r10
bae: 48 83 f9 0d cmp $0xd,%rcx
bb2: 48 8d 78 0d lea 0xd(%rax),%rdi
bb6: 74 28 je be0 <main+0x200>
bb8: 80 78 0d 3a cmpb $0x3a,0xd(%rax)
bbc: 75 04 jne bc2 <main+0x1e2>
bbe: 49 83 c2 01 add $0x1,%r10
bc2: 48 83 f9 0f cmp $0xf,%rcx
bc6: 48 8d 78 0e lea 0xe(%rax),%rdi
bca: 75 14 jne be0 <main+0x200>
bcc: 80 78 0e 3a cmpb $0x3a,0xe(%rax)
bd0: 0f 84 0b 02 00 00 je de1 <main+0x401>
bd6: 48 8d 78 0f lea 0xf(%rax),%rdi
bda: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
be0: 49 29 c9 sub %rcx,%r9
be3: 66 45 0f ef c0 pxor %xmm8,%xmm8
be8: 66 0f ef e4 pxor %xmm4,%xmm4
bec: 4d 89 c8 mov %r9,%r8
bef: 66 0f ef db pxor %xmm3,%xmm3
bf3: 48 01 c8 add %rcx,%rax
bf6: 66 0f ef d2 pxor %xmm2,%xmm2
bfa: 49 c1 e8 04 shr $0x4,%r8
bfe: 66 0f 6f 35 5a 04 00 movdqa 0x45a(%rip),%xmm6 # 1060 <_IO_stdin_used+0x70>
c05: 00
c06: 31 c9 xor %ecx,%ecx
c08: 66 0f 6f 2d 60 04 00 movdqa 0x460(%rip),%xmm5 # 1070 <_IO_stdin_used+0x80>
c0f: 00
c10: 66 0f 6f cc movdqa %xmm4,%xmm1
c14: 66 44 0f 6f da movdqa %xmm2,%xmm11
c19: 66 0f 6f 00 movdqa (%rax),%xmm0
c1d: 48 83 c1 01 add $0x1,%rcx
c21: 48 83 c0 10 add $0x10,%rax
c25: 49 39 c8 cmp %rcx,%r8
c28: 66 0f 74 c6 pcmpeqb %xmm6,%xmm0
c2c: 66 0f db c5 pand %xmm5,%xmm0
c30: 66 0f 64 c8 pcmpgtb %xmm0,%xmm1
c34: 66 0f 6f f8 movdqa %xmm0,%xmm7
c38: 66 0f 60 f9 punpcklbw %xmm1,%xmm7
c3c: 66 0f 68 c1 punpckhbw %xmm1,%xmm0
c40: 66 0f 6f cb movdqa %xmm3,%xmm1
c44: 66 44 0f 6f d7 movdqa %xmm7,%xmm10
c49: 66 0f 65 cf pcmpgtw %xmm7,%xmm1
c4d: 66 44 0f 6f c8 movdqa %xmm0,%xmm9
c52: 66 44 0f 61 d1 punpcklwd %xmm1,%xmm10
c57: 66 0f 69 f9 punpckhwd %xmm1,%xmm7
c5b: 66 0f 6f cb movdqa %xmm3,%xmm1
c5f: 66 0f 65 c8 pcmpgtw %xmm0,%xmm1
c63: 66 45 0f 66 da pcmpgtd %xmm10,%xmm11
c68: 66 44 0f 61 c9 punpcklwd %xmm1,%xmm9
c6d: 66 0f 69 c1 punpckhwd %xmm1,%xmm0
c71: 66 41 0f 6f ca movdqa %xmm10,%xmm1
c76: 66 45 0f 6a d3 punpckhdq %xmm11,%xmm10
c7b: 66 41 0f 62 cb punpckldq %xmm11,%xmm1
c80: 66 41 0f d4 c8 paddq %xmm8,%xmm1
c85: 66 44 0f 6f c2 movdqa %xmm2,%xmm8
c8a: 66 41 0f d4 ca paddq %xmm10,%xmm1
c8f: 66 44 0f 6f d7 movdqa %xmm7,%xmm10
c94: 66 44 0f 66 c7 pcmpgtd %xmm7,%xmm8
c99: 66 41 0f 6a f8 punpckhdq %xmm8,%xmm7
c9e: 66 45 0f 62 d0 punpckldq %xmm8,%xmm10
ca3: 66 45 0f 6f c1 movdqa %xmm9,%xmm8
ca8: 66 41 0f d4 ca paddq %xmm10,%xmm1
cad: 66 0f d4 cf paddq %xmm7,%xmm1
cb1: 66 0f 6f fa movdqa %xmm2,%xmm7
cb5: 66 41 0f 66 f9 pcmpgtd %xmm9,%xmm7
cba: 66 44 0f 62 c7 punpckldq %xmm7,%xmm8
cbf: 66 44 0f 6a cf punpckhdq %xmm7,%xmm9
cc4: 66 0f 6f fa movdqa %xmm2,%xmm7
cc8: 66 41 0f d4 c8 paddq %xmm8,%xmm1
ccd: 66 0f 66 f8 pcmpgtd %xmm0,%xmm7
cd1: 66 44 0f 6f c0 movdqa %xmm0,%xmm8
cd6: 66 41 0f d4 c9 paddq %xmm9,%xmm1
cdb: 66 44 0f 62 c7 punpckldq %xmm7,%xmm8
ce0: 66 0f 6a c7 punpckhdq %xmm7,%xmm0
ce4: 66 41 0f d4 c8 paddq %xmm8,%xmm1
ce9: 66 0f d4 c8 paddq %xmm0,%xmm1
ced: 66 44 0f 6f c1 movdqa %xmm1,%xmm8
cf2: 0f 87 18 ff ff ff ja c10 <main+0x230>
cf8: 66 0f 73 d9 08 psrldq $0x8,%xmm1
cfd: 4c 89 c9 mov %r9,%rcx
d00: 66 41 0f d4 c8 paddq %xmm8,%xmm1
d05: 66 48 0f 7e ca movq %xmm1,%rdx
d0a: 48 83 e1 f0 and $0xfffffffffffffff0,%rcx
d0e: 48 8d 04 0f lea (%rdi,%rcx,1),%rax
d12: 4c 01 d2 add %r10,%rdx
d15: 49 39 c9 cmp %rcx,%r9
d18: 74 1a je d34 <main+0x354>
d1a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
d20: 31 c9 xor %ecx,%ecx
d22: 80 38 3a cmpb $0x3a,(%rax)
d25: 0f 94 c1 sete %cl
d28: 48 83 c0 01 add $0x1,%rax
d2c: 48 01 ca add %rcx,%rdx
d2f: 48 39 c6 cmp %rax,%rsi
d32: 75 ec jne d20 <main+0x340>
# Area of interest:
d34: 48 83 fa 01 cmp $0x1,%rdx
d38: 7e 54 jle d8e <main+0x3ae>
d3a: 48 8d 35 e7 02 00 00 lea 0x2e7(%rip),%rsi # 1028 <_IO_stdin_used+0x38>
d41: 48 8d 3d d8 12 20 00 lea 0x2012d8(%rip),%rdi # 202020 <_ZSt4cerr##GLIBCXX_3.4>
d48: e8 33 fc ff ff callq 980 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc#plt>
d4d: 48 8b 3c 24 mov (%rsp),%rdi
d51: 48 83 c3 10 add $0x10,%rbx
d55: 48 39 df cmp %rbx,%rdi
d58: 74 05 je d5f <main+0x37f>
d5a: e8 11 fc ff ff callq 970 <_ZdlPv#plt>
d5f: 31 c0 xor %eax,%eax
d61: 48 8b 5c 24 28 mov 0x28(%rsp),%rbx
d66: 64 48 33 1c 25 28 00 xor %fs:0x28,%rbx
d6d: 00 00
d6f: 0f 85 80 00 00 00 jne df5 <main+0x415>
d75: 48 83 c4 38 add $0x38,%rsp
d79: 5b pop %rbx
d7a: 5d pop %rbp
d7b: 41 5c pop %r12
d7d: 41 5d pop %r13
d7f: c3 retq
d80: 41 0f b6 04 24 movzbl (%r12),%eax
d85: 88 44 24 10 mov %al,0x10(%rsp)
d89: e9 ac fc ff ff jmpq a3a <main+0x5a>
d8e: 48 8d 35 a8 02 00 00 lea 0x2a8(%rip),%rsi # 103d <_IO_stdin_used+0x4d>
d95: 48 8d 3d 84 12 20 00 lea 0x201284(%rip),%rdi # 202020 <_ZSt4cerr##GLIBCXX_3.4>
d9c: e8 df fb ff ff callq 980 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc#plt>
da1: eb aa jmp d4d <main+0x36d>
da3: 48 8d 78 01 lea 0x1(%rax),%rdi
da7: e8 e4 fb ff ff callq 990 <_Znwm#plt>
dac: 48 89 6c 24 10 mov %rbp,0x10(%rsp)
db1: 48 89 04 24 mov %rax,(%rsp)
db5: 48 89 ea mov %rbp,%rdx
db8: 4c 89 e6 mov %r12,%rsi
dbb: 48 89 c7 mov %rax,%rdi
dbe: e8 8d fb ff ff callq 950 <memcpy#plt>
dc3: e9 72 fc ff ff jmpq a3a <main+0x5a>
dc8: 49 83 c2 01 add $0x1,%r10
dcc: e9 5d fd ff ff jmpq b2e <main+0x14e>
dd1: 49 83 c2 01 add $0x1,%r10
dd5: e9 6c fd ff ff jmpq b46 <main+0x166>
dda: 31 d2 xor %edx,%edx
ddc: e9 3f ff ff ff jmpq d20 <main+0x340>
de1: 49 83 c2 01 add $0x1,%r10
de5: e9 ec fd ff ff jmpq bd6 <main+0x1f6>
dea: 48 89 c7 mov %rax,%rdi
ded: 45 31 d2 xor %r10d,%r10d
df0: e9 eb fd ff ff jmpq be0 <main+0x200>
df5: e8 a6 fb ff ff callq 9a0 <__stack_chk_fail#plt>
dfa: 48 8d 3d f7 01 00 00 lea 0x1f7(%rip),%rdi # ff8 <_IO_stdin_used+0x8>
e01: e8 3a fb ff ff callq 940 <_ZSt19__throw_logic_errorPKc#plt>
e06: 4c 89 e8 mov %r13,%rax
e09: eb aa jmp db5 <main+0x3d5>
e0b: 48 8b 3c 24 mov (%rsp),%rdi
e0f: 48 83 c3 10 add $0x10,%rbx
e13: 48 89 c5 mov %rax,%rbp
e16: 48 39 df cmp %rbx,%rdi
e19: 74 05 je e20 <main+0x440>
e1b: e8 50 fb ff ff callq 970 <_ZdlPv#plt>
e20: 48 89 ef mov %rbp,%rdi
e23: e8 98 fb ff ff callq 9c0 <_Unwind_Resume#plt>
e28: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
e2f: 00
For those interested, you may fix the issue by using signed numbers as in:
#include <type_traits>
...
if(static_cast<std::make_signed_t<decltype(colons)>>(colons) >= 2LL)
...
or wrap the if() statement around #pragma like so:
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-overflow"
if(colons >= 2LL)
#pragma GCC diagnostic pop
But this is clearly not the question here.
std::count is defined as: https://github.com/gcc-mirror/gcc/blob/16e2427f50c208dfe07d07f18009969502c25dc8/libstdc%2B%2B-v3/include/bits/stl_algo.h#L4045
/**
* #brief Count the number of copies of a value in a sequence.
* #ingroup non_mutating_algorithms
* #param __first An input iterator.
* #param __last An input iterator.
* #param __value The value to be counted.
* #return The number of iterators #c i in the range #p [__first,__last)
* for which #c *i == #p __value
*/
template<typename _InputIterator, typename _Tp>
_GLIBCXX20_CONSTEXPR
inline typename iterator_traits<_InputIterator>::difference_type
count(_InputIterator __first, _InputIterator __last, const _Tp& __value)
{
// concept requirements
__glibcxx_function_requires(_InputIteratorConcept<_InputIterator>)
__glibcxx_function_requires(_EqualOpConcept<
typename iterator_traits<_InputIterator>::value_type, _Tp>)
__glibcxx_requires_valid_range(__first, __last);
return std::__count_if(__first, __last,
__gnu_cxx::__ops::__iter_equals_val(__value));
}
Then https://github.com/gcc-mirror/gcc/blob/16e2427f50c208dfe07d07f18009969502c25dc8/libstdc%2B%2B-v3/include/bits/stl_algobase.h#L2118 :
template<typename _InputIterator, typename _Predicate>
_GLIBCXX20_CONSTEXPR
typename iterator_traits<_InputIterator>::difference_type
__count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred)
{
typename iterator_traits<_InputIterator>::difference_type __n = 0;
for (; __first != __last; ++__first)
if (__pred(__first))
++__n;
return __n;
}
__n is iterator_traits<_InputIterator>::difference_type which is ptrdiff_t a signed type. Doing ++__n could result in signed type overflow, but that would be undefined behavior. Ergo, std::count() can't return negative, cause that would be undefined behavior. Because it can't return negative, compiler can use jle, the number can't be negative.

add #if defined preprocessor directives will affect alignment of class member

I have a class which has many members in it
int latest_encode_usage_ = 67;
int perf_target_framerate_ = 0;
std::map<uint8_t, uint16_t> pre_dlbitrate_;
#if defined(WEBRTC_TRANSCODE_CASE)
bool is_screen_share_;
#endif
and I have defined WEBRTC_TRANSCODE_CASE in build.gn
defines += ["WEBRTC_TRANSCODE_CASE"]
so the is_screen_share_ is visible in the class, I compile this code with RelWithDebInfo and got a shared library called liba.so, then I remove the preprocessor directives
bool is_perf_adaption_avalaible_;
int latest_encode_usage_ = 67;
int perf_target_framerate_ = 0;
std::map<uint8_t, uint16_t> pre_dlbitrate_;
bool is_screen_share_
and compile the code with RelWithDebInfo got libb.so, when I execute the command diff <(objdump -d liba.so) <(objdump -d libb.so) and got
1714248c1714248
< 7225b9: bf b8 0f 00 00 mov $0xfb8,%edi
---
> 7225b9: bf c0 0f 00 00 mov $0xfc0,%edi
, it seems 8 bytes different, but why ? I just don't understand. It's part of liba.so's objdump:
0000000000722580 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11ContentTypeE>:
722580: 55 push %rbp
722581: 48 89 e5 mov %rsp,%rbp
722584: 41 57 push %r15
722586: 41 56 push %r14
722588: 41 55 push %r13
72258a: 41 54 push %r12
72258c: 49 89 d5 mov %rdx,%r13
72258f: 53 push %rbx
722590: 48 89 fb mov %rdi,%rbx
722593: bf 90 00 00 00 mov $0x90,%edi
722598: 49 89 cf mov %rcx,%r15
72259b: 41 89 f6 mov %esi,%r14d
72259e: 48 83 ec 28 sub $0x28,%rsp
7225a2: 44 89 45 bc mov %r8d,-0x44(%rbp)
7225a6: e8 e5 0a 61 00 callq d33090 <_Znwm>
7225ab: 4c 89 ee mov %r13,%rsi
7225ae: 48 89 c7 mov %rax,%rdi
7225b1: 49 89 c4 mov %rax,%r12
7225b4: e8 17 0a 00 00 callq 722fd0 <_ZN6webrtc20OveruseFrameDetectorC1EPNS_25CpuOveruseMetricsObserverE>
7225b9: bf b8 0f 00 00 mov $0xfb8,%edi
7225be: 4c 89 65 c8 mov %r12,-0x38(%rbp)
7225c2: e8 c9 0a 61 00 callq d33090 <_Znwm>
7225c7: 44 8b 45 bc mov -0x44(%rbp),%r8d
7225cb: 48 89 c7 mov %rax,%rdi
7225ce: 4c 89 f9 mov %r15,%rcx
7225d1: 4c 89 ea mov %r13,%rdx
7225d4: 44 89 f6 mov %r14d,%esi
7225d7: 49 89 c4 mov %rax,%r12
7225da: 45 89 c1 mov %r8d,%r9d
7225dd: 4c 8d 45 c8 lea -0x38(%rbp),%r8
7225e1: e8 3a 3f 00 00 callq 726520 <_ZN6webrtc18VideoStreamEncoderC1EjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsESt10unique_ptrINS_20OveruseFra
7225e6: 48 8b 7d c8 mov -0x38(%rbp),%rdi
7225ea: 48 85 ff test %rdi,%rdi
7225ed: 74 06 je 7225f5 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11C
it's part of libb.so's objdump:
0000000000722580 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11ContentTypeE>:
722580: 55 push %rbp
722581: 48 89 e5 mov %rsp,%rbp
722584: 41 57 push %r15
722586: 41 56 push %r14
722588: 41 55 push %r13
72258a: 41 54 push %r12
72258c: 49 89 d5 mov %rdx,%r13
72258f: 53 push %rbx
722590: 48 89 fb mov %rdi,%rbx
722593: bf 90 00 00 00 mov $0x90,%edi
722598: 49 89 cf mov %rcx,%r15
72259b: 41 89 f6 mov %esi,%r14d
72259e: 48 83 ec 28 sub $0x28,%rsp
7225a2: 44 89 45 bc mov %r8d,-0x44(%rbp)
7225a6: e8 e5 0a 61 00 callq d33090 <_Znwm>
7225ab: 4c 89 ee mov %r13,%rsi
7225ae: 48 89 c7 mov %rax,%rdi
7225b1: 49 89 c4 mov %rax,%r12
7225b4: e8 17 0a 00 00 callq 722fd0 <_ZN6webrtc20OveruseFrameDetectorC1EPNS_25CpuOveruseMetricsObserverE>
7225b9: bf c0 0f 00 00 mov $0xfc0,%edi
7225be: 4c 89 65 c8 mov %r12,-0x38(%rbp)
7225c2: e8 c9 0a 61 00 callq d33090 <_Znwm>
7225c7: 44 8b 45 bc mov -0x44(%rbp),%r8d
7225cb: 48 89 c7 mov %rax,%rdi
7225ce: 4c 89 f9 mov %r15,%rcx
7225d1: 4c 89 ea mov %r13,%rdx
7225d4: 44 89 f6 mov %r14d,%esi
7225d7: 49 89 c4 mov %rax,%r12
7225da: 45 89 c1 mov %r8d,%r9d
7225dd: 4c 8d 45 c8 lea -0x38(%rbp),%r8
7225e1: e8 3a 3f 00 00 callq 726520 <_ZN6webrtc18VideoStreamEncoderC1EjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsESt10unique_ptrINS_20OveruseFra
7225e6: 48 8b 7d c8 mov -0x38(%rbp),%rdi
7225ea: 48 85 ff test %rdi,%rdi
7225ed: 74 06 je 7225f5 <_ZN6webrtc24CreateVideoStreamEncoderEjPNS_26VideoStreamEncoderObserverERKNS_26VideoStreamEncoderSettingsENS_18VideoEncoderConfig11C.
Any help will be appreciateed!

What does assembly code of function "do_compare" exactly do?

The do_compare function is in the libstdc++ library. It basically checks two strings and returns -1, 1, or 0 accordingly.
Here is the C++ code:
template<typename _CharT>
int
collate<_CharT>::
do_compare(const _CharT* __lo1, const _CharT* __hi1,
const _CharT* __lo2, const _CharT* __hi2) const
{
// strcoll assumes zero-terminated strings so we make a copy
// and then put a zero at the end.
const string_type __one(__lo1, __hi1);
const string_type __two(__lo2, __hi2);
const _CharT* __p = __one.c_str();
const _CharT* __pend = __one.data() + __one.length();
const _CharT* __q = __two.c_str();
const _CharT* __qend = __two.data() + __two.length();
// strcoll stops when it sees a nul character so we break
// the strings into zero-terminated substrings and pass those
// to strcoll.
for (;;)
{
const int __res = _M_compare(__p, __q);
if (__res)
return __res;
__p += char_traits<_CharT>::length(__p);
__q += char_traits<_CharT>::length(__q);
if (__p == __pend && __q == __qend)
return 0;
else if (__p == __pend)
return -1;
else if (__q == __qend)
return 1;
__p++;
__q++;
}
}
I have to put the entire assembly code of do_compare to show my problem, sorry:
0000000000101c40 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4>:
101c40: 41 57 push %r15
101c42: 41 56 push %r14
101c44: 49 89 fe mov %rdi,%r14
101c47: 48 89 f7 mov %rsi,%rdi
101c4a: 48 89 d6 mov %rdx,%rsi
101c4d: 41 55 push %r13
101c4f: 41 54 push %r12
101c51: 55 push %rbp
101c52: 4c 89 c5 mov %r8,%rbp
101c55: 53 push %rbx
101c56: 48 89 cb mov %rcx,%rbx
101c59: 48 83 ec 38 sub $0x38,%rsp
101c5d: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
101c64: 00 00
101c66: 48 89 44 24 28 mov %rax,0x28(%rsp)
101c6b: 31 c0 xor %eax,%eax
101c6d: 4c 8d 6c 24 27 lea 0x27(%rsp),%r13
101c72: 4c 89 ea mov %r13,%rdx
101c75: 4c 89 6c 24 18 mov %r13,0x18(%rsp)
101c7a: e8 f1 a2 f8 ff callq 8bf70 <_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag#plt>
101c7f: 4c 89 ea mov %r13,%rdx
101c82: 48 89 ee mov %rbp,%rsi
101c85: 48 89 df mov %rbx,%rdi
101c88: 49 89 c7 mov %rax,%r15
101c8b: 48 89 44 24 08 mov %rax,0x8(%rsp)
101c90: e8 db a2 f8 ff callq 8bf70 <_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag#plt>
101c95: 4d 8b 67 e8 mov -0x18(%r15),%r12
101c99: 4c 8b 68 e8 mov -0x18(%rax),%r13
101c9d: 48 89 c5 mov %rax,%rbp
101ca0: 48 89 44 24 10 mov %rax,0x10(%rsp)
101ca5: 4c 89 fb mov %r15,%rbx
101ca8: 4d 01 fc add %r15,%r12
101cab: 49 01 c5 add %rax,%r13
101cae: eb 32 jmp 101ce2 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xa2>
101cb0: 48 89 df mov %rbx,%rdi
101cb3: e8 98 87 f8 ff callq 8a450 <strlen#plt>
101cb8: 48 89 ef mov %rbp,%rdi
101cbb: 48 01 c3 add %rax,%rbx
101cbe: e8 8d 87 f8 ff callq 8a450 <strlen#plt>
101cc3: 48 01 c5 add %rax,%rbp
101cc6: 49 39 dc cmp %rbx,%r12
101cc9: 75 05 jne 101cd0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x90>
101ccb: 49 39 ed cmp %rbp,%r13
101cce: 74 27 je 101cf7 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xb7>
101cd0: 49 39 dc cmp %rbx,%r12
101cd3: 74 6b je 101d40 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x100>
101cd5: 49 39 ed cmp %rbp,%r13
101cd8: 74 76 je 101d50 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x110>
101cda: 48 83 c3 01 add $0x1,%rbx
101cde: 48 83 c5 01 add $0x1,%rbp
101ce2: 48 89 ea mov %rbp,%rdx
101ce5: 48 89 de mov %rbx,%rsi
101ce8: 4c 89 f7 mov %r14,%rdi
101ceb: e8 20 8b f8 ff callq 8a810 <_ZNKSt7collateIcE10_M_compareEPKcS2_#plt>
101cf0: 41 89 c7 mov %eax,%r15d
101cf3: 85 c0 test %eax,%eax
101cf5: 74 b9 je 101cb0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x70>
101cf7: 48 8b 7c 24 10 mov 0x10(%rsp),%rdi
101cfc: 48 8b 1d 9d 08 28 00 mov 0x28089d(%rip),%rbx # 3825a0 <_ZNSs4_Rep20_S_empty_rep_storageE##GLIBCXX_3.4-0x57e0>
101d03: 48 83 ef 18 sub $0x18,%rdi
101d07: 48 39 df cmp %rbx,%rdi
101d0a: 75 54 jne 101d60 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x120>
101d0c: 48 8b 7c 24 08 mov 0x8(%rsp),%rdi
101d11: 48 83 ef 18 sub $0x18,%rdi
101d15: 48 39 df cmp %rbx,%rdi
101d18: 75 56 jne 101d70 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x130>
101d1a: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx
101d1f: 64 48 33 0c 25 28 00 xor %fs:0x28,%rcx
101d26: 00 00
101d28: 44 89 f8 mov %r15d,%eax
101d2b: 75 4f jne 101d7c <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x13c>
101d2d: 48 83 c4 38 add $0x38,%rsp
101d31: 5b pop %rbx
101d32: 5d pop %rbp
101d33: 41 5c pop %r12
101d35: 41 5d pop %r13
101d37: 41 5e pop %r14
101d39: 41 5f pop %r15
101d3b: c3 retq
101d3c: 0f 1f 40 00 nopl 0x0(%rax)
101d40: 41 bf ff ff ff ff mov $0xffffffff,%r15d
101d46: eb af jmp 101cf7 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xb7>
101d48: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
101d4f: 00
101d50: 41 bf 01 00 00 00 mov $0x1,%r15d
101d56: eb 9f jmp 101cf7 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xb7>
101d58: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
101d5f: 00
101d60: 48 8b 74 24 18 mov 0x18(%rsp),%rsi
101d65: e8 96 fe ff ff callq 101c00 <_ZNSt14codecvt_bynameIcc11__mbstate_tED0Ev##GLIBCXX_3.4+0x20>
101d6a: eb a0 jmp 101d0c <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xcc>
101d6c: 0f 1f 40 00 nopl 0x0(%rax)
101d70: 48 8b 74 24 18 mov 0x18(%rsp),%rsi
101d75: e8 86 fe ff ff callq 101c00 <_ZNSt14codecvt_bynameIcc11__mbstate_tED0Ev##GLIBCXX_3.4+0x20>
101d7a: eb 9e jmp 101d1a <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0xda>
101d7c: e8 7f 95 f8 ff callq 8b300 <__stack_chk_fail#plt>
101d81: 48 89 c3 mov %rax,%rbx
101d84: 48 8b 7c 24 08 mov 0x8(%rsp),%rdi
101d89: 48 83 ef 18 sub $0x18,%rdi
101d8d: 48 3b 3d 0c 08 28 00 cmp 0x28080c(%rip),%rdi # 3825a0 <_ZNSs4_Rep20_S_empty_rep_storageE##GLIBCXX_3.4-0x57e0>
101d94: 74 0a je 101da0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x160>
101d96: 48 8b 74 24 18 mov 0x18(%rsp),%rsi
101d9b: e8 60 fe ff ff callq 101c00 <_ZNSt14codecvt_bynameIcc11__mbstate_tED0Ev##GLIBCXX_3.4+0x20>
101da0: 48 89 df mov %rbx,%rdi
101da3: e8 e8 a1 f8 ff callq 8bf90 <_Unwind_Resume#plt>
101da8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
101daf: 00
*******101db0: 53 push %rbx
101db1: 48 89 fb mov %rdi,%rbx
101db4: 48 8b 3f mov (%rdi),%rdi
101db7: 89 f0 mov %esi,%eax
101db9: 48 85 ff test %rdi,%rdi
101dbc: 74 05 je 101dc3 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x183>
101dbe: 83 fe ff cmp $0xffffffff,%esi
101dc1: 74 05 je 101dc8 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x188>
101dc3: 5b pop %rbx
101dc4: c3 retq
101dc5: 0f 1f 00 nopl (%rax)
101dc8: 48 8b 47 10 mov 0x10(%rdi),%rax
101dcc: 48 3b 47 18 cmp 0x18(%rdi),%rax
101dd0: 73 0e jae 101de0 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x1a0>
101dd2: 0f b6 00 movzbl (%rax),%eax
101dd5: 5b pop %rbx
101dd6: c3 retq
101dd7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
101dde: 00 00
101de0: 48 8b 07 mov (%rdi),%rax
101de3: ff 50 48 callq *0x48(%rax)
101de6: 83 f8 ff cmp $0xffffffff,%eax
101de9: 75 d8 jne 101dc3 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x183>
101deb: 48 c7 03 00 00 00 00 movq $0x0,(%rbx)
101df2: 5b pop %rbx
101df3: c3 retq
101df4: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
101dfb: 00 00 00
101dfe: 66 90 xchg %ax,%ax
101e00: 55 push %rbp
101e01: 89 f5 mov %esi,%ebp
101e03: 53 push %rbx
101e04: 48 89 fb mov %rdi,%rbx
101e07: 48 83 ec 08 sub $0x8,%rsp
101e0b: e8 b0 88 f8 ff callq 8a6c0 <_ZNKSt5ctypeIcE13_M_widen_initEv#plt>
101e10: 48 8b 03 mov (%rbx),%rax
101e13: 48 8b 40 30 mov 0x30(%rax),%rax
101e17: 48 3b 05 7a 11 28 00 cmp 0x28117a(%rip),%rax # 382f98 <_ZNKSt5ctypeIcE8do_widenEc##GLIBCXX_3.4+0x2e2c48>
101e1e: 75 10 jne 101e30 <_ZNKSt7collateIcE10do_compareEPKcS2_S2_S2_##GLIBCXX_3.4+0x1f0>
101e20: 48 83 c4 08 add $0x8,%rsp
101e24: 89 e8 mov %ebp,%eax
101e26: 5b pop %rbx
101e27: 5d pop %rbp
101e28: c3 retq
101e29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
101e30: 48 83 c4 08 add $0x8,%rsp
101e34: 40 0f be f5 movsbl %bpl,%esi
101e38: 48 89 df mov %rbx,%rdi
101e3b: 5b pop %rbx
101e3c: 5d pop %rbp
101e3d: ff e0 jmpq *%rax
101e3f: 90 nop
It seems to me that the assembly code not only performs the C++ code logic but also adds other logic.
As an example, the function _M_extract_int in libstdc++ which coverts a char to int calls this function as the following:
callq 0x101db0
The instruction address 0x101db0 is in the middle of the assembly code. The code section from 0x101db0 to 0x101dbc seems to have nothing to do with the above C++ code. Really confused about what is going on here...

SSE2 movddup Not Moving Values

Can't someone explain to me why the output of this program is [nan, nan]? The code is supposed to load the value of d into the high and low 64-bits of the XMM1 register and then move the contents of XMM1 into a. Because a is not initialized to a set of specific values, D initializes each element to nan. If the movupd instruction was not in the objdump, I would understand the result, but the instruction is there. Thoughts?
import std.stdio;
void main()
{
enum double d = 1.0 / cast(double)2;
double[] a = new double[2];
auto aptr = a.ptr;
asm
{
movddup XMM1, d;
movupd [aptr], XMM1;
}
writeln(a);
}
Here is the objdump of the main function:
0000000000426b88 <_Dmain>:
426b88: 55 push %rbp
426b89: 48 8b ec mov %rsp,%rbp
426b8c: 48 83 ec 50 sub $0x50,%rsp
426b90: f2 48 0f 10 05 77 81 rex.W movsd 0x28177(%rip),%xmm0
426b97: 02 00
426b99: f2 48 0f 11 45 b0 rex.W movsd %xmm0,-0x50(%rbp)
426b9f: 48 be 02 00 00 00 00 movabs $0x2,%rsi
426ba6: 00 00 00
426ba9: f2 48 0f 10 05 66 81 rex.W movsd 0x28166(%rip),%xmm0
426bb0: 02 00
426bb2: 48 8d 7d c0 lea -0x40(%rbp),%rdi
426bb6: e8 65 d1 00 00 callq 433d20 <_memsetDouble>
426bbb: f2 48 0f 10 0d 4c 81 rex.W movsd 0x2814c(%rip),%xmm1
426bc2: 02 00
426bc4: f2 48 0f 11 4d c0 rex.W movsd %xmm1,-0x40(%rbp)
426bca: f2 48 0f 10 15 3d 81 rex.W movsd 0x2813d(%rip),%xmm2
426bd1: 02 00
426bd3: f2 48 0f 11 55 c8 rex.W movsd %xmm2,-0x38(%rbp)
426bd9: 48 8d 45 c0 lea -0x40(%rbp),%rax
426bdd: 48 89 45 d0 mov %rax,-0x30(%rbp)
426be1: 48 8d 55 e0 lea -0x20(%rbp),%rdx
426be5: 48 b8 02 00 00 00 00 movabs $0x2,%rax
426bec: 00 00 00
426bef: 48 89 c1 mov %rax,%rcx
426bf2: 49 89 d0 mov %rdx,%r8
426bf5: 51 push %rcx
426bf6: 41 50 push %r8
426bf8: 48 be 02 00 00 00 00 movabs $0x2,%rsi
426bff: 00 00 00
426c02: 48 bf c0 84 65 00 00 movabs $0x6584c0,%rdi
426c09: 00 00 00
426c0c: e8 87 ce 00 00 callq 433a98 <_d_arrayliteralTX>
426c11: 48 89 45 f0 mov %rax,-0x10(%rbp)
426c15: f2 48 0f 10 05 02 81 rex.W movsd 0x28102(%rip),%xmm0
426c1c: 02 00
426c1e: f2 48 0f 11 00 rex.W movsd %xmm0,(%rax)
426c23: f2 48 0f 10 0d f4 80 rex.W movsd 0x280f4(%rip),%xmm1
426c2a: 02 00
426c2c: 48 8b 45 f0 mov -0x10(%rbp),%rax
426c30: f2 48 0f 11 48 08 rex.W movsd %xmm1,0x8(%rax)
426c36: 48 8b 55 f0 mov -0x10(%rbp),%rdx
426c3a: 48 be 02 00 00 00 00 movabs $0x2,%rsi
426c41: 00 00 00
426c44: 41 58 pop %r8
426c46: 59 pop %rcx
426c47: 48 bf 08 00 00 00 00 movabs $0x8,%rdi
426c4e: 00 00 00
426c51: e8 8e 95 00 00 callq 4301e4 <_d_arraycopy>
426c56: f2 0f 12 4d b0 movddup -0x50(%rbp),%xmm1
426c5b: 66 0f 11 4d d0 movupd %xmm1,-0x30(%rbp)
426c60: ff 75 c8 pushq -0x38(%rbp)
426c63: ff 75 c0 pushq -0x40(%rbp)
426c66: e8 09 00 00 00 callq 426c74 <_D3std5stdio16__T7writelnTG2dZ7writelnFG2dZv>
426c6b: 48 83 c4 10 add $0x10,%rsp
426c6f: 31 c0 xor %eax,%eax
426c71: c9 leaveq
426c72: c3 retq
426c73: 90 nop
I looked into it, and apparently the compiler decides that by movupd [aptr], XMM1 you really mean movupd aptr, XMM1. Loading aptr into a register beforehand (mov aptr, RAX; movupd [RAX], XMM1) will make it work.
You should probably file a bug report.