Windows 7: overshoot C++ std::this_thread::sleep_for - c++

Our code is written in C++ 11 (VS2012/Win 7-64bit). The C++ library provides a sleep_for function that we use. We observed that the C++ sleep_for sometimes shows a large overshoot. In other words we request to sleep for say 15 ms but the sleep turns out to be e.g. 100 ms. We see this when the load on the system is high.
My first reaction: “of course the sleeps "take longer" if there is a lot of load on the system and other threads are using the CPU”.
However the “funny” thing is that if we replace the sleep_for by a Windows API “Sleep” call then we do not see this behavior. I also saw that the sleep_for function under water makes a call to the Window API Sleep method.
The documentation for sleep_for states:
The function blocks the calling thread for at least the time that's specified by Rel_time. This function does not throw any exceptions.
So technically the function is working. However we did not expect to see a difference between C++ sleep_for and the regular Sleep(Ex) function.
Can somebody explain this behavior?

There is quite a bit of additional code executed if using sleep_for vs SleepEx.
For example calling SleepEx(15) generates the following assembly in debug mode (Visual Studio 2015):
; 9 : SleepEx(15, false);
mov esi, esp
push 0
push 15 ; 0000000fH
call DWORD PTR __imp__SleepEx#8
cmp esi, esp
call __RTC_CheckEsp
By contrast this code
const std::chrono::milliseconds duration(15);
std::this_thread::sleep_for(duration);
Generates the following:
; 9 : std::this_thread::sleep_for(std::chrono::milliseconds(15));
mov DWORD PTR $T1[ebp], 15 ; 0000000fH
lea eax, DWORD PTR $T1[ebp]
push eax
lea ecx, DWORD PTR $T2[ebp]
call duration
push eax
call sleep_for
add esp, 4
This calls into:
duration PROC ; std::chrono::duration<__int64,std::ratio<1,1000> >::duration<__int64,std::ratio<1,1000> ><int,void>, COMDAT
; _this$ = ecx
; 113 : { // construct from representation
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
push ecx
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd
pop ecx
mov DWORD PTR _this$[ebp], ecx
; 112 : : _MyRep(static_cast<_Rep>(_Val))
mov eax, DWORD PTR __Val$[ebp]
mov eax, DWORD PTR [eax]
cdq
mov ecx, DWORD PTR _this$[ebp]
mov DWORD PTR [ecx], eax
mov DWORD PTR [ecx+4], edx
; 114 : }
mov eax, DWORD PTR _this$[ebp]
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 4
duration ENDP
And calls into
sleep_for PROC ; std::this_thread::sleep_for<__int64,std::ratio<1,1000> >, COMDAT
; 151 : { // sleep for duration
push ebp
mov ebp, esp
sub esp, 268 ; 0000010cH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-268]
mov ecx, 67 ; 00000043H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR ___security_cookie
xor eax, ebp
mov DWORD PTR __$ArrayPad$[ebp], eax
; 152 : stdext::threads::xtime _Tgt = _To_xtime(_Rel_time);
mov eax, DWORD PTR __Rel_time$[ebp]
push eax
lea ecx, DWORD PTR $T1[ebp]
push ecx
call to_xtime
add esp, 8
mov edx, DWORD PTR [eax]
mov DWORD PTR $T2[ebp], edx
mov ecx, DWORD PTR [eax+4]
mov DWORD PTR $T2[ebp+4], ecx
mov edx, DWORD PTR [eax+8]
mov DWORD PTR $T2[ebp+8], edx
mov eax, DWORD PTR [eax+12]
mov DWORD PTR $T2[ebp+12], eax
mov ecx, DWORD PTR $T2[ebp]
mov DWORD PTR __Tgt$[ebp], ecx
mov edx, DWORD PTR $T2[ebp+4]
mov DWORD PTR __Tgt$[ebp+4], edx
mov eax, DWORD PTR $T2[ebp+8]
mov DWORD PTR __Tgt$[ebp+8], eax
mov ecx, DWORD PTR $T2[ebp+12]
mov DWORD PTR __Tgt$[ebp+12], ecx
; 153 : sleep_until(&_Tgt);
lea eax, DWORD PTR __Tgt$[ebp]
push eax
call sleep_until
add esp, 4
; 154 : }
push edx
mov ecx, ebp
push eax
lea edx, DWORD PTR $LN5#sleep_for
call #_RTC_CheckStackVars#8
pop eax
pop edx
pop edi
pop esi
pop ebx
mov ecx, DWORD PTR __$ArrayPad$[ebp]
xor ecx, ebp
call #__security_check_cookie#4
add esp, 268 ; 0000010cH
cmp ebp, esp
call __RTC_CheckEsp
mov esp, ebp
pop ebp
ret 0
npad 3
$LN5#sleep_for:
DD 1
DD $LN4#sleep_for
$LN4#sleep_for:
DD -24 ; ffffffe8H
DD 16 ; 00000010H
DD $LN3#sleep_for
$LN3#sleep_for:
DB 95 ; 0000005fH
DB 84 ; 00000054H
DB 103 ; 00000067H
DB 116 ; 00000074H
DB 0
sleep_for ENDP
Some conversion happens:
to_xtime PROC ; std::_To_xtime<__int64,std::ratio<1,1000> >, COMDAT
; 758 : { // convert duration to xtime
push ebp
mov ebp, esp
sub esp, 348 ; 0000015cH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-348]
mov ecx, 87 ; 00000057H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR ___security_cookie
xor eax, ebp
mov DWORD PTR __$ArrayPad$[ebp], eax
; 759 : xtime _Xt;
; 760 : if (_Rel_time <= chrono::duration<_Rep, _Period>::zero())
lea eax, DWORD PTR $T7[ebp]
push eax
call duration_zero ; std::chrono::duration<__int64,std::ratio<1,1000> >::zero
add esp, 4
push eax
mov ecx, DWORD PTR __Rel_time$[ebp]
push ecx
call chronos_operator ; std::chrono::operator<=<__int64,std::ratio<1,1000>,__int64,std::ratio<1,1000> >
add esp, 8
movzx edx, al
test edx, edx
je SHORT $LN2#To_xtime
; 761 : { // negative or zero relative time, return zero
; 762 : _Xt.sec = 0;
xorps xmm0, xmm0
movlpd QWORD PTR __Xt$[ebp], xmm0
; 763 : _Xt.nsec = 0;
mov DWORD PTR __Xt$[ebp+8], 0
; 764 : }
; 765 : else
jmp $LN3#To_xtime
$LN2#To_xtime:
; 766 : { // positive relative time, convert
; 767 : chrono::nanoseconds _T0 =
; 768 : chrono::system_clock::now().time_since_epoch();
lea eax, DWORD PTR $T5[ebp]
push eax
lea ecx, DWORD PTR $T6[ebp]
push ecx
call system_clock_now ; std::chrono::system_clock::now
add esp, 4
mov ecx, eax
call time_since_ephoch ; std::chrono::time_point<std::chrono::system_clock,std::chrono::duration<__int64,std::ratio<1,10000000> > >::time_since_epoch
push eax
lea ecx, DWORD PTR __T0$8[ebp]
call duration ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,10000000>,void>
; 769 : _T0 += _Rel_time;
mov eax, DWORD PTR __Rel_time$[ebp]
push eax
lea ecx, DWORD PTR $T4[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,1000>,void>
lea ecx, DWORD PTR $T4[ebp]
push ecx
lea ecx, DWORD PTR __T0$8[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::operator+=
; 770 : _Xt.sec = chrono::duration_cast<chrono::seconds>(_T0).count();
lea eax, DWORD PTR __T0$8[ebp]
push eax
lea ecx, DWORD PTR $T3[ebp]
push ecx
call duration_cast ; std::chrono::duration_cast<std::chrono::duration<__int64,std::ratio<1,1> >,__int64,std::ratio<1,1000000000> >
add esp, 8
mov ecx, eax
call duration_count ; std::chrono::duration<__int64,std::ratio<1,1> >::count
mov DWORD PTR __Xt$[ebp], eax
mov DWORD PTR __Xt$[ebp+4], edx
; 771 : _T0 -= chrono::seconds(_Xt.sec);
lea eax, DWORD PTR __Xt$[ebp]
push eax
lea ecx, DWORD PTR $T1[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1> >::duration<__int64,std::ratio<1,1> ><__int64,void>
push eax
lea ecx, DWORD PTR $T2[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,1>,void>
lea ecx, DWORD PTR $T2[ebp]
push ecx
lea ecx, DWORD PTR __T0$8[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::operator-=
; 772 : _Xt.nsec = (long)_T0.count();
lea ecx, DWORD PTR __T0$8[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::count
mov DWORD PTR __Xt$[ebp+8], eax
$LN3#To_xtime:
; 773 : }
; 774 : return (_Xt);
mov eax, DWORD PTR $T9[ebp]
mov ecx, DWORD PTR __Xt$[ebp]
mov DWORD PTR [eax], ecx
mov edx, DWORD PTR __Xt$[ebp+4]
mov DWORD PTR [eax+4], edx
mov ecx, DWORD PTR __Xt$[ebp+8]
mov DWORD PTR [eax+8], ecx
mov edx, DWORD PTR __Xt$[ebp+12]
mov DWORD PTR [eax+12], edx
mov eax, DWORD PTR $T9[ebp]
; 775 : }
push edx
mov ecx, ebp
push eax
lea edx, DWORD PTR $LN8#To_xtime
call #_RTC_CheckStackVars#8
pop eax
pop edx
pop edi
pop esi
pop ebx
mov ecx, DWORD PTR __$ArrayPad$[ebp]
xor ecx, ebp
call #__security_check_cookie#4
add esp, 348 ; 0000015cH
cmp ebp, esp
call __RTC_CheckEsp
mov esp, ebp
pop ebp
ret 0
$LN8#To_xtime:
DD 2
DD $LN7#To_xtime
$LN7#To_xtime:
DD -24 ; ffffffe8H
DD 16 ; 00000010H
DD $LN5#To_xtime
DD -40 ; ffffffd8H
DD 8
DD $LN6#To_xtime
$LN6#To_xtime:
DB 95 ; 0000005fH
DB 84 ; 00000054H
DB 48 ; 00000030H
DB 0
$LN5#To_xtime:
DB 95 ; 0000005fH
DB 88 ; 00000058H
DB 116 ; 00000074H
DB 0
to_xtime ENDP
Eventually the imported function gets called, the same one SleepEx has used.
sleep_until PROC ; std::this_thread::sleep_until, COMDAT
; 131 : { // sleep until _Abs_time
push ebp
mov ebp, esp
sub esp, 192 ; 000000c0H
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-192]
mov ecx, 48 ; 00000030H
mov eax, -858993460 ; ccccccccH
rep stosd
; 132 : _Thrd_sleep(_Abs_time);
mov esi, esp
mov eax, DWORD PTR __Abs_time$[ebp]
push eax
call DWORD PTR __imp___Thrd_sleep
add esp, 4
cmp esi, esp
call __RTC_CheckEsp
; 133 : }
pop edi
pop esi
pop ebx
add esp, 192 ; 000000c0H
cmp ebp, esp
call __RTC_CheckEsp
mov esp, ebp
pop ebp
ret 0
sleep_until ENDP
You should also be aware even SleepEx may not give 100% exact results as per the MSDN documentation https://msdn.microsoft.com/en-us/library/windows/desktop/ms686307(v=vs.85).aspx
This function causes a thread to relinquish the remainder of its time slice and become unrunnable for an interval based on the value of dwMilliseconds. The system clock "ticks" at a constant rate. If dwMilliseconds is less than the resolution of the system clock, the thread may sleep for less than the specified length of time. If dwMilliseconds is greater than one tick but less than two, the wait can be anywhere between one and two ticks, and so on. To increase the accuracy of the sleep interval, call the timeGetDevCaps function to determine the supported minimum timer resolution and the timeBeginPeriod function to set the timer resolution to its minimum. Use caution when calling timeBeginPeriod, as frequent calls can significantly affect the system clock, system power usage, and the scheduler. If you call timeBeginPeriod, call it one time early in the application and be sure to call the timeEndPeriod function at the very end of the application.

Related

C++ function not called directly?

Having such a simple c++ program
#include <iostream>
void my_int_func(int x) {
std::cout << "Hello World: " << x << std::endl;
}
int main() {
my_int_func(54);
}
I'm getting the following code in the IDA (PRO 7.5 SP3) while disassembled (and also similar results in the "Assembly Output" in the MSVC)
the main function:
_main proc near ; CODE XREF: _main_0↑j
push ebp
mov ebp, esp
sub esp, 0C0h
push ebx
push esi
push edi
__$EncStackInitStart_7:
mov edi, ebp
xor ecx, ecx
mov eax, 0CCCCCCCCh
rep stosd
__$EncStackInitEnd_7: ; x
push 36h ; '6'
call j_?my_int_func##YAXH#Z ; my_int_func(int)
add esp, 4
xor eax, eax
pop edi
pop esi
pop ebx
add esp, 0C0h
cmp ebp, esp
call j___RTC_CheckEsp
mov esp, ebp
pop ebp
retn
_main endp
the code "under" the call j_?my_int_func##YAXH#Z line above:
j_?my_int_func##YAXH#Z proc near ; CODE XREF: _main+19↓p
x = dword ptr 4
jmp ?my_int_func##YAXH#Z ; my_int_func(int)
j_?my_int_func##YAXH#Z endp
the code of the my_int_func function itself:
x = dword ptr 8
push ebp
mov ebp, esp
sub esp, 0C0h
push ebx
push esi
push edi
__$EncStackInitStart_6:
mov edi, ebp
xor ecx, ecx
mov eax, 0CCCCCCCCh
rep stosd
__$EncStackInitEnd_6:
mov esi, esp
push offset j_??$endl#DU?$char_traits#D#std###std##YAAAV?$basic_ostream#DU?$char_traits#D#std###0#AAV10##Z ; std::endl<char,std::char_traits<char>>(std::ostream &)
mov edi, esp
mov eax, [ebp+x]
push eax
push offset _Val ; "Hello World: "
mov ecx, ds:__imp_?cout#std##3V?$basic_ostream#DU?$char_traits#D#std###1#A ; std::ostream std::cout
push ecx ; _Ostr
call j_??$?6U?$char_traits#D#std###std##YAAAV?$basic_ostream#DU?$char_traits#D#std###0#AAV10#PBD#Z ; std::operator<<<std::char_traits<char>>(std::ostream &,char const *)
add esp, 8
mov ecx, eax
call ds:__imp_??6?$basic_ostream#DU?$char_traits#D#std###std##QAEAAV01#H#Z ; std::ostream::operator<<(int)
cmp edi, esp
call j___RTC_CheckEsp
mov ecx, eax
call ds:__imp_??6?$basic_ostream#DU?$char_traits#D#std###std##QAEAAV01#P6AAAV01#AAV01##Z#Z ; std::ostream::operator<<(std::ostream & (*)(std::ostream &))
cmp esi, esp
call j___RTC_CheckEsp
pop edi
pop esi
pop ebx
add esp, 0C0h
cmp ebp, esp
call j___RTC_CheckEsp
mov esp, ebp
pop ebp
retn
?my_int_func##YAXH#Z endp
My question is why there is an extra code to call the my_int_func function? Why does the my_int_func function is called indirectly by making a jmp instruction, not just directly by call'ing the my_int_func function addres?
P.S.
I'm working on the MSVC Community 2019 on Win 10 if it matters.

How to call an assembly procedure from c++ code

I'd like to call from my c++ code some 3-rd party text outputting procedure which i have an memory offset. Here is the assembly code of the function:
op_float_msg_ proc near ; initIntExtra_+628o
a3 = dword ptr -3Ch
_object = dword ptr -34h
a2 = byte ptr -30h
font = dword ptr -28h
outline_clr= dword ptr -24h
clr = dword ptr -20h
text = dword ptr -1Ch
push ebx
push ecx
push edx
push esi
push edi
push ebp
sub esp, 34h
mov ecx, eax
mov ebx, 65h
xor eax, eax
xor edx, edx
xor edi, edi
xor esi, esi
mov al, ds:_YellowColor
xor ebp, ebp
mov [esp+4Ch+clr], eax
xor eax, eax
mov [esp+4Ch+text], edx
mov al, ds:_colorTable ; Ś˝ÓşŰę
mov [esp+4Ch+font], ebx
mov [esp+4Ch+outline_clr], eax
loc_4592B6: ; op_float_msg_+BEj
mov eax, ecx ; a1
call interpretPopShort_ ; Get value type
mov word ptr [esp+esi+4Ch+a2], ax
mov eax, ecx ; a1
call interpretPopLong_ ; Get value
mov dx, word ptr [esp+esi+4Ch+a2]
mov [esp+ebp+4Ch+a3], eax
cmp dx, 9801h
jnz short loc_4592E9
mov ebx, eax ; a3
xor edx, edx
mov eax, ecx ; result
mov dx, word ptr [esp+esi+4Ch+a2] ; a2
call interpretDecStringRef_
loc_4592E9: ; op_float_msg_+57j
cmp edi, 1
jz short loc_459312
xor eax, eax
mov ax, word ptr [esp+esi+4Ch+a2]
and ah, 0F7h
cmp eax, 0C001h
jz short loc_459334
push edi
mov eax, [ecx]
push eax
push offset asc_50683C ; "script error: %s: invalid arg %d to flo"...
call interpretError_
add esp, 0Ch
jmp short loc_459334
; ---------------------------------------------------------------------------
loc_459312: ; op_float_msg_+6Cj
xor edx, edx
mov dx, word ptr [esp+esi+4Ch+a2] ; a2
mov eax, edx
and ah, 0F7h
cmp eax, 9001h
jnz short loc_459334
mov eax, ecx ; a1
mov ebx, [esp+ebp+4Ch+a3] ; a3
call interpretGetString_
mov [esp+4Ch+text], eax
loc_459334: ; op_float_msg_+7Dj ...
add ebp, 4
inc edi
add esi, 2
cmp edi, 3
jl loc_4592B6
mov edx, [esp+4Ch+_object]
test edx, edx
jnz short loc_459362
mov ebx, 1 ; type
mov edx, offset aFloat_msg ; "float_msg"
mov eax, ecx ; arg
call dbg_error_
jmp loc_459496
; ---------------------------------------------------------------------------
loc_459362: ; op_float_msg_+CAj
mov ebx, [esp+4Ch+text]
mov esi, edx
test ebx, ebx
jz short loc_459371
cmp byte ptr [ebx], 0
jnz short loc_459387
loc_459371: ; op_float_msg_+EAj
mov eax, esi ; result
call text_object_remove_
call tile_refresh_display_
add esp, 34h
pop ebp
pop edi
pop esi
pop edx
pop ecx
pop ebx
retn
Based on the code above how can I call pass the parameters to the function, especially the string to display? I know, that it's maybe a little guessing but can someone please help me try to deduce it?

GCC generated assembly

Why printf function causes the change of prologue?
C code_1:
#include <cstdio>
int main(){
int a = 11;
printf("%d", a);
}
GCC -m32 generated one:
.LC0:
.string "%d"
main:
lea ecx, [esp+4] // What's purpose of this three
and esp, -16 // lines?
push DWORD PTR [ecx-4] //
push ebp
mov ebp, esp
push ecx
sub esp, 20 // why sub 20?
mov DWORD PTR [ebp-12], 11
sub esp, 8
push DWORD PTR [ebp-12]
push OFFSET FLAT:.LC0
call printf
add esp, 16
mov eax, 0
mov ecx, DWORD PTR [ebp-4]
leave
lea esp, [ecx-4]
ret
C code_2:
#include <cstdio>
int main(){
int a = 11;
}
GCC -m32:
main:
push ebp
mov ebp, esp
sub esp, 16
mov DWORD PTR [ebp-4], 11
mov eax, 0
leave
ret
What is the purpose of first three lines added in first code?
Please, explain first assembly code, if you can.
EDIT:
64-bit mode:
.LC0:
.string "%d"
main:
push rbp
mov rbp, rsp
sub rsp, 16
mov DWORD PTR [rbp-4], 11
mov eax, DWORD PTR [rbp-4]
mov esi, eax
mov edi, OFFSET FLAT:.LC0
mov eax, 0
call printf
mov eax, 0
leave
ret
The insight is that the compiler keep the stack aligned at function calls.
The alignment is 16 byte.
lea ecx, [esp+4] ;Save original ESP to ECX (ESP+4 actually)
and esp, -16 ;Align stack on 16 bytes (Lower esp)
push DWORD PTR [ecx-4] ;Push main return address (Stack at 16B + 4)
;My guess is to aid debugging tools that expect the RA
;to be at [ebp+04h]
push ebp
mov ebp, esp ;Prolog (Stack at 16B+8)
push ecx ;Save ECX (Original stack pointer) (Stack at 16B+12)
sub esp, 20 ;Reserve 20 bytes (Stack at 16B+0, ALIGNED AGAIN)
;4 for alignment + 1x16 for a variable (variable space is
;allocated in multiple of 16)
mov DWORD PTR [ebp-12], 11 ;a = 11
sub esp, 8 ;Stack at 16B+8 for later alignment
push DWORD PTR [ebp-12] ;a
push OFFSET FLAT:.LC0 ;"%d" (Stack at 16B)
call printf
add esp, 16 ;Remove args+pad from the stack (Stack at 16B)
mov eax, 0 ;Return 0
mov ecx, DWORD PTR [ebp-4] ;Restore ECX without the need to add to esp
leave ;Restore EBP
lea esp, [ecx-4] ;Restore original ESP
ret
I don't know why the compiler saves esp+4 in ecx instead of esp (esp+4 is the address of the first parameter of main).

Proprietary DLL returning empty files after 3h45m

I'm using a proprietary DLL (CP5200.dll) to communicate with 10 scrolling message LED signs. I'm using openFrameworks generate and save images of the text I want to display, and then using the DLL to package the images into data the sign can process.
I call the following functions a few times a minute, and they return a file of 2-4kb , depending on image size, but at a certain point - around 3hrs 45 minutes after startup, they start returning files of 128 bytes, which result in a blank LED display when uploaded. I'm hypothesizing that there's a buffer inside the dll that doesn't get emptied, or something of the sort, but I can't make sense of the decompiled code.
Here are the functions:
int CP5200_Program_AddPicture(HOBJECT hObj, int nWinNo, const char* pPictFile, int nMode, int nEffect, int nSpeed, int nStay, int nCompress)
int CP5200_Program_SaveToFile(HOBJECT hObj, const char* pFilename)
Decompiled functions:
Exported fn(): CP5200_Program_AddImage - Ord:00C3h
:1000FD20 51 push ecx
:1000FD21 55 push ebp
:1000FD22 8B6C240C mov ebp, dword ptr [esp+0C]
:1000FD26 85ED test ebp, ebp
:1000FD28 7508 jne 1000FD32
:1000FD2A 83C8FF or eax, FFFFFFFF
:1000FD2D 5D pop ebp
:1000FD2E 59 pop ecx
:1000FD2F C23000 ret 0030
Function fully disassembled. I couldn't manage to decompile.
:1000FC50 ; Exported entry 15. CP5200_Program_AddPicture
:1000FC50
:1000FC50 ; ¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦ S U B R O U T I N E ¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦
:1000FC50
:1000FC50
:1000FC50 public CP5200_Program_AddPicture
:1000FC50 CP5200_Program_AddPicture proc near
:1000FC50
:1000FC50 arg_0 = dword ptr 8
:1000FC50 arg_4 = dword ptr 0Ch
:1000FC50 arg_8 = dword ptr 10h
:1000FC50 arg_C = dword ptr 14h
:1000FC50 arg_10 = dword ptr 18h
:1000FC50 arg_14 = dword ptr 1Ch
:1000FC50 arg_18 = dword ptr 20h
:1000FC50 arg_1C = dword ptr 24h
:1000FC50
:1000FC50 push ebx
:1000FC51 mov ebx, [esp+arg_0]
:1000FC55 test ebx, ebx
:1000FC57 jnz short loc_1000FC60
:1000FC59 or eax, 0FFFFFFFFh
:1000FC5C pop ebx
:1000FC5D retn 20h
:1000FC60 ; ---------------------------------------------------------------------------
:1000FC60
:1000FC60 loc_1000FC60: ; CODE XREF: CP5200_Program_AddPicture+7j
:1000FC60 push esi
:1000FC61 push edi
:1000FC62 mov edi, ebx
:1000FC64 mov esi, offset aCprogram ; "CProgram"
:1000FC69 mov ecx, 9
:1000FC6E xor eax, eax
:1000FC70 repe cmpsb
:1000FC72 jz short loc_1000FC79
:1000FC74 sbb eax, eax
:1000FC76 sbb eax, 0FFFFFFFFh
:1000FC79
:1000FC79 loc_1000FC79: ; CODE XREF: CP5200_Program_AddPicture+22j
:1000FC79 test eax, eax
:1000FC7B jz short loc_1000FC86
:1000FC7D pop edi
:1000FC7E pop esi
:1000FC7F or eax, 0FFFFFFFFh
:1000FC82 pop ebx
:1000FC83 retn 20h
:1000FC86 ; ---------------------------------------------------------------------------
:1000FC86
:1000FC86 loc_1000FC86: ; CODE XREF: CP5200_Program_AddPicture+2Bj
:1000FC86 mov esi, [esp+8+arg_4]
:1000FC8A test esi, esi
:1000FC8C jl short loc_1000FD07
:1000FC8E mov ecx, ebx
:1000FC90 call sub_10018020
:1000FC95 cmp esi, eax
:1000FC97 jge short loc_1000FD07
:1000FC99 push esi
:1000FC9A mov ecx, ebx
:1000FC9C call sub_10018030
:1000FCA1 push 3Eh
:1000FCA3 mov edi, eax
:1000FCA5 call ??2#YAPAXI#Z ; operator new(uint)
:1000FCAA add esp, 4
:1000FCAD test eax, eax
:1000FCAF jz short loc_1000FCBE
:1000FCB1 mov ecx, eax
:1000FCB3 call sub_100012E0
:1000FCB8 mov esi, eax
:1000FCBA test esi, esi
:1000FCBC jnz short loc_1000FCC9
:1000FCBE
:1000FCBE loc_1000FCBE: ; CODE XREF: CP5200_Program_AddPicture+5Fj
:1000FCBE pop edi
:1000FCBF pop esi
:1000FCC0 mov eax, 0FFFFFFFCh
:1000FCC5 pop ebx
:1000FCC6 retn 20h
:1000FCC9 ; ---------------------------------------------------------------------------
:1000FCC9
:1000FCC9 loc_1000FCC9: ; CODE XREF: CP5200_Program_AddPicture+6Cj
:1000FCC9 mov eax, [esp+8+arg_8]
:1000FCCD push eax
:1000FCCE lea ecx, [esi+25h]
:1000FCD1 call sub_100076A0
:1000FCD6 mov ecx, [esp+8+arg_C]
:1000FCDA mov edx, [esp+8+arg_10]
:1000FCDE mov eax, [esp+8+arg_14]
:1000FCE2 mov [esi+2Ah], ecx
:1000FCE5 mov ecx, [esp+8+arg_18]
:1000FCE9 mov [esi+2Eh], edx
:1000FCEC mov edx, [esp+8+arg_1C]
:1000FCF0 mov [esi+36h], ecx
:1000FCF3 push esi
:1000FCF4 mov ecx, edi
:1000FCF6 mov [esi+32h], eax
:1000FCF9 mov [esi+3Ah], edx
:1000FCFC call sub_100015A0
:1000FD01 pop edi
:1000FD02 pop esi
:1000FD03 pop ebx
:1000FD04 retn 20h
:1000FD0F CP5200_Program_AddPicture endp
Exported fn(): CP5200_Program_SaveToFile - Ord:0013h
:1000CE80 8B542404 mov edx, dword ptr [esp+04]
:1000CE84 85D2 test edx, edx
:1000CE86 741F je 1000CEA7
:1000CE88 56 push esi
:1000CE89 57 push edi
:1000CE8A 8BFA mov edi, edx
:1000CE8C BE8C060610 mov esi, 1006068C
:1000CE91 B909000000 mov ecx, 00000009
:1000CE96 33C0 xor eax, eax
:1000CE98 F3 repz
:1000CE99 A6 cmpsb
:1000CE9A 5F pop edi
:1000CE9B 5E pop esi
:1000CE9C 7405 je 1000CEA3
:1000CE9E 1BC0 sbb eax, eax
:1000CEA0 83D8FF sbb eax, FFFFFFFF
I'm writing in VS 2012, with openframeworks version of_v0.8.3_vs_release.
Does the decompiled code give any clues as to what's happening in the DLL that causes this behavior?

Why is __fastcall assebmler code larger than __stdcall one in MS C++?

I have disassembled two different variations of Swap function (simple value-swap between two pointers).
1). __fastcall http://pastebin.com/ux5LMktz
2). __stdcall (function without explicit calling convention modifier will have a __stdcall by default, because of MS C++ compiler for Windows) http://pastebin.com/eGR6VUjX
As I know, __fastcall is implemented differently, depending on the compiler, but basically it puts the first two arguments (left to right) into ECX and EDX register. And there could be stack use, but if the arguments are too long.
But as for the link at 1-st option, you can see, that value is pushed into the ECX registry, and there is no real difference between two variations of swap function.
And __fastcall variant does use:
00AA261F pop ecx
00AA2620 mov dword ptr [ebp-14h],edx
00AA2623 mov dword ptr [ebp-8],ecx
Which are not used in __stdcall version.
So it doesn't look like more optimized (as __fasctcall must be , by its definition).
I'm a newbie in ASM language and calling convention, so I ask you for a piece of advice. Maybe __fastcall is faster exactly in my sample, but I don't see it, do I?
Thanks!
Try turning on optimization, then comparing the results. Your fastcall version has many redundant operations because it's not optimized.
Here's output of VS 2010 with /Ox.
fastcall:
; _firstValue$ = ecx
; _secondValue$ = edx
?CallMe1##YIXPAH0#Z PROC ; CallMe1
mov eax, DWORD PTR [ecx]
push esi
mov esi, DWORD PTR [edx]
cmp eax, esi
je SHORT $LN1#CallMe1
mov DWORD PTR [ecx], esi
mov DWORD PTR [edx], eax
$LN1#CallMe1:
pop esi
ret 0
?CallMe1##YIXPAH0#Z ENDP ; CallMe1
stdcall:
_firstValue$ = 8 ; size = 4
_secondValue$ = 12 ; size = 4
?CallMe2##YGXPAH0#Z PROC ; CallMe2
mov edx, DWORD PTR _firstValue$[esp-4]
mov eax, DWORD PTR [edx]
push esi
mov esi, DWORD PTR _secondValue$[esp]
mov ecx, DWORD PTR [esi]
cmp eax, ecx
je SHORT $LN1#CallMe2
mov DWORD PTR [edx], ecx
mov DWORD PTR [esi], eax
$LN1#CallMe2:
pop esi
ret 8
?CallMe2##YGXPAH0#Z ENDP ; CallMe2
cdecl (what you mistakenly call stdcall in your example):
_firstValue$ = 8 ; size = 4
_secondValue$ = 12 ; size = 4
?CallMe3##YAXPAH0#Z PROC ; CallMe3
mov edx, DWORD PTR _firstValue$[esp-4]
mov eax, DWORD PTR [edx]
push esi
mov esi, DWORD PTR _secondValue$[esp]
mov ecx, DWORD PTR [esi]
cmp eax, ecx
je SHORT $LN1#CallMe3
mov DWORD PTR [edx], ecx
mov DWORD PTR [esi], eax
$LN1#CallMe3:
pop esi
ret 0
?CallMe3##YAXPAH0#Z ENDP ; CallMe3