Call not returning properly [X86_ASM] - c++

This is C++ using x86 inline assembly [Intel syntax]
Function:
DWORD *Call ( size_t lArgs, ... ){
DWORD *_ret = new DWORD[lArgs];
__asm {
xor edx, edx
xor esi, esi
xor edi, edi
inc edx
start:
cmp edx, lArgs
je end
push eax
push edx
push esi
mov esi, 0x04
imul esi, edx
mov ecx, esi
add ecx, _ret
push ecx
call dword ptr[ebp+esi] //Doesn't return to the next instruction, returns to the caller of the parent function.
pop ecx
mov [ecx], eax
pop eax
pop edx
pop esi
inc edx
jmp start
end:
mov eax, _ret
ret
}
}
The purpose of this function is to call multiple functions/addresses without calling them individually.
Why I'm having you debug it?
I have to start school for the day, and I need to have it done by evening.
Thanks alot, iDomo

Thank you for a complete compile-able example, it makes solving problems much easier.
According to your Call function signature, when the stack frame is set up, the lArgs is at ebp+8 , and the pointers start at ebp+C. And you have a few other issues. Here's a corrected version with some push/pop optimizations and cleanup, tested on MSVC 2010 (16.00.40219.01) :
DWORD *Call ( size_t lArgs, ... ) {
DWORD *_ret = new DWORD[lArgs];
__asm {
xor edx, edx
xor esi, esi
xor edi, edi
inc edx
push esi
start:
cmp edx, lArgs
; since you started counting at 1 instead of 0
; you need to stop *after* reaching lArgs
ja end
push edx
; you're trying to call [ebp+0xC+edx*4-4]
; a simpler way of expressing that - 4*edx + 8
; (4*edx is the same as edx << 2)
mov esi, edx
shl esi, 2
add esi, 0x8
call dword ptr[ebp+esi]
; and here you want to write the return value
; (which, btw, your printfs don't produce, so you'll get garbage)
; into _ret[edx*4-4] , which equals ret[esi - 0xC]
add esi, _ret
sub esi, 0xC
mov [esi], eax
pop edx
inc edx
jmp start
end:
pop esi
mov eax, _ret
; ret ; let the compiler clean up, because it created a stack frame and allocated space for the _ret pointer
}
}
And don't forget to delete[] the memory returned from this function after you're done.

I notice that, before calling, you push EAX, EDX, ESI, ECX (in order), but don't pop in the reverse order after returning. If the first CALL returns properly, but subsequent ones don't, that could be the issue.

Related

How to call an assembly procedure from c++ code

I'd like to call from my c++ code some 3-rd party text outputting procedure which i have an memory offset. Here is the assembly code of the function:
op_float_msg_ proc near ; initIntExtra_+628o
a3 = dword ptr -3Ch
_object = dword ptr -34h
a2 = byte ptr -30h
font = dword ptr -28h
outline_clr= dword ptr -24h
clr = dword ptr -20h
text = dword ptr -1Ch
push ebx
push ecx
push edx
push esi
push edi
push ebp
sub esp, 34h
mov ecx, eax
mov ebx, 65h
xor eax, eax
xor edx, edx
xor edi, edi
xor esi, esi
mov al, ds:_YellowColor
xor ebp, ebp
mov [esp+4Ch+clr], eax
xor eax, eax
mov [esp+4Ch+text], edx
mov al, ds:_colorTable ; Ś˝ÓşŰę
mov [esp+4Ch+font], ebx
mov [esp+4Ch+outline_clr], eax
loc_4592B6: ; op_float_msg_+BEj
mov eax, ecx ; a1
call interpretPopShort_ ; Get value type
mov word ptr [esp+esi+4Ch+a2], ax
mov eax, ecx ; a1
call interpretPopLong_ ; Get value
mov dx, word ptr [esp+esi+4Ch+a2]
mov [esp+ebp+4Ch+a3], eax
cmp dx, 9801h
jnz short loc_4592E9
mov ebx, eax ; a3
xor edx, edx
mov eax, ecx ; result
mov dx, word ptr [esp+esi+4Ch+a2] ; a2
call interpretDecStringRef_
loc_4592E9: ; op_float_msg_+57j
cmp edi, 1
jz short loc_459312
xor eax, eax
mov ax, word ptr [esp+esi+4Ch+a2]
and ah, 0F7h
cmp eax, 0C001h
jz short loc_459334
push edi
mov eax, [ecx]
push eax
push offset asc_50683C ; "script error: %s: invalid arg %d to flo"...
call interpretError_
add esp, 0Ch
jmp short loc_459334
; ---------------------------------------------------------------------------
loc_459312: ; op_float_msg_+6Cj
xor edx, edx
mov dx, word ptr [esp+esi+4Ch+a2] ; a2
mov eax, edx
and ah, 0F7h
cmp eax, 9001h
jnz short loc_459334
mov eax, ecx ; a1
mov ebx, [esp+ebp+4Ch+a3] ; a3
call interpretGetString_
mov [esp+4Ch+text], eax
loc_459334: ; op_float_msg_+7Dj ...
add ebp, 4
inc edi
add esi, 2
cmp edi, 3
jl loc_4592B6
mov edx, [esp+4Ch+_object]
test edx, edx
jnz short loc_459362
mov ebx, 1 ; type
mov edx, offset aFloat_msg ; "float_msg"
mov eax, ecx ; arg
call dbg_error_
jmp loc_459496
; ---------------------------------------------------------------------------
loc_459362: ; op_float_msg_+CAj
mov ebx, [esp+4Ch+text]
mov esi, edx
test ebx, ebx
jz short loc_459371
cmp byte ptr [ebx], 0
jnz short loc_459387
loc_459371: ; op_float_msg_+EAj
mov eax, esi ; result
call text_object_remove_
call tile_refresh_display_
add esp, 34h
pop ebp
pop edi
pop esi
pop edx
pop ecx
pop ebx
retn
Based on the code above how can I call pass the parameters to the function, especially the string to display? I know, that it's maybe a little guessing but can someone please help me try to deduce it?

How do I call a different subroutine using parameters passed into the current one (for use in recursion)?

I have two functions that take integers x and y read from input.
product returns x * y
power returns x ^ y, however it uses recursion and product to compute this. so x would be "base" and y is "exponent".
They called from C++:
int a, b, x, y;
a = product(x, y);
b = power(x, y);
and here is the asm. I got the product to work, however am having trouble with power because I am not sure of the syntax/method/convention to call product from it (and call itself for the recursion). EDIT: Recursion must be used.
global product
global power
section .text
product:
push ebp
mov ebp, esp
sub esp, 4
push edi
push esi
xor eax, eax
mov edi, [ebp+8]
mov esi, [ebp+12]
mov [ebp-4], edi
product_loop:
add [ebp-4], edi
mov eax, [ebp-4]
sub esi, 1
cmp esi, 1
jne product_loop
product_done:
pop esi
pop edi
mov esp, ebp
pop ebp
ret
power:
push ebp
mov ebp, esp
sub esp, 4
push edi
push esi
push ebx
xor eax, eax
mov edi, [ebp+8]
mov esi, [ebp+12]
;;;
check:
cmp esi, 1 ; if exp < 1
jl power_stop
recursion: ; else (PLEASE HELP!!!!!!!!)
; eax = call product (base, (power(base, exp-1))
power_stop:
mov eax, 1 ; return 1
power_done:
push ebx
pop esi
pop edi
mov esp, ebp
pop ebp
ret
EDIT: My solution!
power:
; Standard prologue
push ebp ; Save the old base pointer
mov ebp, esp ; Set new value of the base pointer
sub esp, 4 ; make room for 1 local variable result
push ebx ; this is exp-1
xor eax, eax ; Place zero in EAX. We will keep a running sum
mov eax, [ebp+12] ; exp
mov ebx, [ebp+8] ; base
cmp eax, 1 ; n >= 1
jge L1 ; if not, go do a recursive call
mov eax, 1 ; otherwise return 1
jmp L2
L1:
dec eax ; exp-1
push eax ; push argument 2: exp-1
push ebx ; push argument 1: base
call power ; do the call, result goes in eax: power(base, exp-1)
add esp, 8 ; get rid of arguments
push eax ; push argument 2: power(base, exponent-1)
push ebx ; push argument 1: base
call product ; product(base, power(base, exponent-1))
L2:
; Standard epilogue
pop ebx ; restore register
mov esp, ebp ; deallocate local variables
pop ebp ; Restore the callers base pointer.
ret ; Return to the caller.
You are using CDECL calling convention, so you have to first push the arguments in the stack in backward direction, then call the function and then clean the stack after the return.
push arg_last
push arg_first
call MyFunction
add esp, 8 ; the argument_count*argument_size
But here are some notes on your code:
Your function product does not return any value. Use mov eax, [ebp-4] immediately after product_done label.
Multiplication is much easy to be made by the instruction mul or imul. Using addition is the slowest possible way.
Computing the power by recursion is not the best idea. Use the following algorithm:
Y = 1;
if N=0 exit.
if N is odd -> Y = Y*x; N=N-1
if N is even -> Y = Y*Y; N=N/2
goto 2
Use SHR instruction in order to divide N by 2. Use test instrction in order to check odd/even number.
This way, you simply don't need to call product from power function.
If you're not sure how to write the assembly, you can generally write it in C++ and assemble it for clues - something like:
int power(int n, int exp)
{
return exp == 0 ? 1 :
exp == 1 ? n :
product(n, power(n, exp - 1));
}
Then you should just be able to use gcc -S or whatever your compiler's equivalent switch for assembly output is, or disassemble the machine code if you prefer.
For example, the function above, thrown in with int product(int x, int y) { return x * y; } and int main() { return product(3, 4); }, compiled with Microsoft's compiler ala cl /Fa power.cc:
; Listing generated by Microsoft (R) Optimizing Compiler Version 15.00.30729.01
TITLE C:\home\anthony\user\dev\power.cc
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC ?product##YAHHH#Z ; product
; Function compile flags: /Odtp
_TEXT SEGMENT
_x$ = 8 ; size = 4
_y$ = 12 ; size = 4
?product##YAHHH#Z PROC ; product
; File c:\home\anthony\user\dev\power.cc
; Line 1
push ebp
mov ebp, esp
mov eax, DWORD PTR _x$[ebp]
imul eax, DWORD PTR _y$[ebp]
pop ebp
ret 0
?product##YAHHH#Z ENDP ; product
_TEXT ENDS
PUBLIC ?power##YAHHH#Z ; power
; Function compile flags: /Odtp
_TEXT SEGMENT
tv73 = -8 ; size = 4
tv74 = -4 ; size = 4
_n$ = 8 ; size = 4
_exp$ = 12 ; size = 4
?power##YAHHH#Z PROC ; power
; Line 4
push ebp
mov ebp, esp
sub esp, 8
; Line 7
cmp DWORD PTR _exp$[ebp], 0
jne SHORT $LN5#power
mov DWORD PTR tv74[ebp], 1
jmp SHORT $LN6#power
$LN5#power:
cmp DWORD PTR _exp$[ebp], 1
jne SHORT $LN3#power
mov eax, DWORD PTR _n$[ebp]
mov DWORD PTR tv73[ebp], eax
jmp SHORT $LN4#power
$LN3#power:
mov ecx, DWORD PTR _exp$[ebp]
sub ecx, 1
push ecx
mov edx, DWORD PTR _n$[ebp]
push edx
call ?power##YAHHH#Z ; power
add esp, 8
push eax
mov eax, DWORD PTR _n$[ebp]
push eax
call ?product##YAHHH#Z ; product
add esp, 8
mov DWORD PTR tv73[ebp], eax
$LN4#power:
mov ecx, DWORD PTR tv73[ebp]
mov DWORD PTR tv74[ebp], ecx
$LN6#power:
mov eax, DWORD PTR tv74[ebp]
; Line 8
mov esp, ebp
pop ebp
ret 0
?power##YAHHH#Z ENDP ; power
_TEXT ENDS
PUBLIC _main
; Function compile flags: /Odtp
_TEXT SEGMENT
_main PROC
; Line 11
push ebp
mov ebp, esp
; Line 12
push 4
push 3
call ?power##YAHHH#Z ; power
add esp, 8
; Line 13
pop ebp
ret 0
_main ENDP
_TEXT ENDS
END
To walk you through this:
?power##YAHHH#Z PROC ; power
; Line 4
push ebp
mov ebp, esp
sub esp, 8
The above is the entry code for the power function - just adjusting the stack pointer to jump over the function arguments, which it will access below as _exp$[ebp] (that's exp) and _n$[ebp] (i.e. n).
; Line 7
cmp DWORD PTR _exp$[ebp], 0
jne SHORT $LN5#power
mov DWORD PTR tv74[ebp], 1
jmp SHORT $LN6#power
Basically, if exp is not equal to 0 we'll continue at label $LN5#power below, but if it is 0 then load 1 into the return value location on the stack at tv74[ebp] and jump to the function return instructions at $LN6#power.
$LN5#power:
cmp DWORD PTR _exp$[ebp], 1
jne SHORT $LN3#power
mov eax, DWORD PTR _n$[ebp]
mov DWORD PTR tv73[ebp], eax
jmp SHORT $LN4#power
Similar to the above - if exp is 1 then put n into eax and therefrom into the return value stack memory, then jump to the return instructions.
Now it starts to get interesting...
$LN3#power:
mov ecx, DWORD PTR _exp$[ebp]
sub ecx, 1
push ecx
Subtract 1 from exp and push in onto the stack...
mov edx, DWORD PTR _n$[ebp]
push edx
Also push n onto the stack...
call ?power##YAHHH#Z ; power
Recursively call the power function, which will use the two values pushes above.
add esp, 8
A stack adjustment after the function above returns.
push eax
Put the result of the recursive call - which the power return instructions leave in the eax register - onto the stack...
mov eax, DWORD PTR _n$[ebp]
push eax
Also push n onto the stack...
call ?product##YAHHH#Z ; product
Call the product function to multiple the value returned by the call to power above by n.
add esp, 8
mov DWORD PTR tv73[ebp], eax
Copy the result of product into a temporary address on the stack....
$LN4#power:
mov ecx, DWORD PTR tv73[ebp]
mov DWORD PTR tv74[ebp], ecx
Pick up the value from that tv73 temporary location and copy it into tv74...
$LN6#power:
mov eax, DWORD PTR tv74[ebp]
Finally, move the the product() result from tv74 into the eax register for convenient and fast access after the product call returns.
; Line 8
mov esp, ebp
pop ebp
ret 0
Clean up the stack and return.

Why is __fastcall assebmler code larger than __stdcall one in MS C++?

I have disassembled two different variations of Swap function (simple value-swap between two pointers).
1). __fastcall http://pastebin.com/ux5LMktz
2). __stdcall (function without explicit calling convention modifier will have a __stdcall by default, because of MS C++ compiler for Windows) http://pastebin.com/eGR6VUjX
As I know, __fastcall is implemented differently, depending on the compiler, but basically it puts the first two arguments (left to right) into ECX and EDX register. And there could be stack use, but if the arguments are too long.
But as for the link at 1-st option, you can see, that value is pushed into the ECX registry, and there is no real difference between two variations of swap function.
And __fastcall variant does use:
00AA261F pop ecx
00AA2620 mov dword ptr [ebp-14h],edx
00AA2623 mov dword ptr [ebp-8],ecx
Which are not used in __stdcall version.
So it doesn't look like more optimized (as __fasctcall must be , by its definition).
I'm a newbie in ASM language and calling convention, so I ask you for a piece of advice. Maybe __fastcall is faster exactly in my sample, but I don't see it, do I?
Thanks!
Try turning on optimization, then comparing the results. Your fastcall version has many redundant operations because it's not optimized.
Here's output of VS 2010 with /Ox.
fastcall:
; _firstValue$ = ecx
; _secondValue$ = edx
?CallMe1##YIXPAH0#Z PROC ; CallMe1
mov eax, DWORD PTR [ecx]
push esi
mov esi, DWORD PTR [edx]
cmp eax, esi
je SHORT $LN1#CallMe1
mov DWORD PTR [ecx], esi
mov DWORD PTR [edx], eax
$LN1#CallMe1:
pop esi
ret 0
?CallMe1##YIXPAH0#Z ENDP ; CallMe1
stdcall:
_firstValue$ = 8 ; size = 4
_secondValue$ = 12 ; size = 4
?CallMe2##YGXPAH0#Z PROC ; CallMe2
mov edx, DWORD PTR _firstValue$[esp-4]
mov eax, DWORD PTR [edx]
push esi
mov esi, DWORD PTR _secondValue$[esp]
mov ecx, DWORD PTR [esi]
cmp eax, ecx
je SHORT $LN1#CallMe2
mov DWORD PTR [edx], ecx
mov DWORD PTR [esi], eax
$LN1#CallMe2:
pop esi
ret 8
?CallMe2##YGXPAH0#Z ENDP ; CallMe2
cdecl (what you mistakenly call stdcall in your example):
_firstValue$ = 8 ; size = 4
_secondValue$ = 12 ; size = 4
?CallMe3##YAXPAH0#Z PROC ; CallMe3
mov edx, DWORD PTR _firstValue$[esp-4]
mov eax, DWORD PTR [edx]
push esi
mov esi, DWORD PTR _secondValue$[esp]
mov ecx, DWORD PTR [esi]
cmp eax, ecx
je SHORT $LN1#CallMe3
mov DWORD PTR [edx], ecx
mov DWORD PTR [esi], eax
$LN1#CallMe3:
pop esi
ret 0
?CallMe3##YAXPAH0#Z ENDP ; CallMe3

Access violation on 'ret' instruction

I've got this function, which consists mostly of inline asm.
long *toarrayl(int members, ...){
__asm{
push esp
mov eax, members
imul eax, 4
push eax
call malloc
mov edx, eax
mov edi, eax
xor ecx, ecx
xor esi, esi
loopx:
cmp ecx, members
je done
mov esi, 4
imul esi, ecx
add esi, ebp
mov eax, [esi+0xC]
mov [edi], eax
inc ecx
add edi, 4
jmp loopx
done:
mov eax, edx
pop esp
ret
}
}
And upon running, I get an access violation on the return instruction.
I'm using VC++ 6, and it can sometimes mean to point at the line above, so possible on 'pop esp'.
If you could help me out, it'd be great.
Thanks, iDomo.
You are failing to manage the stack pointer correctly. In particular, your call to malloc unbalances the stack, and your pop esp ends up popping the wrong value into esp. The access violation therefore occurs when you try to ret from an invalid stack (the CPU cannot read the return address). It's unclear why you are pushing and popping esp; that accomplishes nothing.
As you spotted, you should never use the instruction POP ESP - when you see that in the code, you know something extremely wrong has happened. Of course, calling malloc inside asseembler code is also rather a bad thing to do - you have for example forgotten to check if it returned NULL, so you may well crash. Stick that outside your assembler - and check for NULL, it's much easier to debug "Couldn't allocate memory at line 54 in file mycode.c" than "Somewhere in the assembler, we got a
Here's some suggestions for improvement, which should speed up your loop a bit:
long *toarrayl(int members, ...){
__asm{
mov eax, members
imul eax, 4
push eax
call malloc
add esp, 4
mov edx, eax
mov edi, eax
mov ecx, members
lea esi, [ebp+0xc]
loopx:
mov eax, [esi]
mov [edi], eax
add edi, 4
add esi, 4
dec ecx
jnz loopx
mov lret, eax
ret
}
}
Improvements: Remove multiply by four in every loop. Just increment esi instead. Use decrement on ecx, instead of increament, and load it up with members before the loop. This allows usage of just one jump in the loop, rather than two. Remove reduntant move from edx, to eax. Use eax directly.
I've figured out the answer on my own.
For those who have had this same, or alike problem:
The actual exception was occuring after the user code, when vc++ automatically pops/restores the registers to their states before the function was called. Since I miss-aligned the stack pointer when calling malloc, there was an access violation when poping from the stack. I wasn't able to see this in the editor because it wasn't my code, so it was just shown as the last of my code in the function.
To correct this, just add an add esp, (size of parameters for previous call) after the calls you make.
Fixed code:
long *toarrayl(int members, ...){
__asm{
mov eax, members
imul eax, 4
push eax
call malloc
add esp, 4
mov edx, eax
mov edi, eax
xor ecx, ecx
xor esi, esi
loopx:
cmp ecx, members
je done
mov esi, 4
imul esi, ecx
add esi, ebp
mov eax, [esi+0xC]
mov [edi], eax
inc ecx
add edi, 4
jmp loopx
done:
mov eax, edx
ret
}
//return (long*)0;
}
Optimized code:
long *toarrayl(int members, ...){
__asm{
mov eax, members
shl eax, 2
push eax
call malloc
add esp, 4
;cmp eax, 0
;je _error
mov edi, eax
mov ecx, members
lea esi, [ebp+0xC]
loopx:
mov edx, [esi]
mov [edi], edx
add edi, 4
add esi, 4
dec ecx
jnz loopx
}
}

How's __RTC_CheckEsp implemented?

__RTC_CheckEsp is a call that verifies the correctness of the esp, stack, register. It is called to ensure that the value of the esp was saved across a function call.
Anyone knows how it's implemented?
Well a little bit of inspection of the assembler gives it away
0044EE35 mov esi,esp
0044EE37 push 3039h
0044EE3C mov ecx,dword ptr [ebp-18h]
0044EE3F add ecx,70h
0044EE42 mov eax,dword ptr [ebp-18h]
0044EE45 mov edx,dword ptr [eax+70h]
0044EE48 mov eax,dword ptr [edx+0Ch]
0044EE4B call eax
0044EE4D cmp esi,esp
0044EE4F call #ILT+6745(__RTC_CheckEsp) (42BA5Eh)
There are 2 lines to note in this. First note at 0x44ee35 it stores the current value of esp to esi.
Then after the function call is completed it does a cmp between esp and esi. They should both be the same now. If they aren't then someone has either unwound the stack twice or not unwound it.
The _RTC_CheckEsp function looks like this:
_RTC_CheckEsp:
00475A60 jne esperror (475A63h)
00475A62 ret
esperror:
00475A63 push ebp
00475A64 mov ebp,esp
00475A66 sub esp,0
00475A69 push eax
00475A6A push edx
00475A6B push ebx
00475A6C push esi
00475A6D push edi
00475A6E mov eax,dword ptr [ebp+4]
00475A71 push 0
00475A73 push eax
00475A74 call _RTC_Failure (42C34Bh)
00475A79 add esp,8
00475A7C pop edi
00475A7D pop esi
00475A7E pop ebx
00475A7F pop edx
00475A80 pop eax
00475A81 mov esp,ebp
00475A83 pop ebp
00475A84 ret
As you can see the first thing it check is whether the result of the earlier comparison were "not equal" ie esi != esp. If thats the case then it jumps to the failure code. If they ARE the same then the function simply returns.
If you're any good at asm, maybe this helps:
jne (Jump if Not Equal) - jumps if the ZERO flag is NZ (NotZero)
_RTC_CheckEsp:
004C8690 jne esperror (4C8693h)
004C8692 ret
esperror:
004C8693 push ebp
004C8694 mov ebp,esp
004C8696 sub esp,0
004C8699 push eax
004C869A push edx
004C869B push ebx
004C869C push esi
004C869D push edi
004C869E mov eax,dword ptr [ebp+4]
004C86A1 push 0
004C86A3 push eax
004C86A4 call _RTC_Failure (4550F8h)
004C86A9 add esp,8
004C86AC pop edi
004C86AD pop esi
004C86AE pop ebx
004C86AF pop edx
004C86B0 pop eax
004C86B1 mov esp,ebp
004C86B3 pop ebp
004C86B4 ret
004C86B5 int 3
004C86B6 int 3
004C86B7 int 3
004C86B8 int 3
004C86B9 int 3
004C86BA int 3
004C86BB int 3
004C86BC int 3
004C86BD int 3
004C86BE int 3
004C86BF int 3