nasm function called from C ends up segfaulting - c++

I have to find min max values in array using only one conditional jump directive.
After compiling and linking the two files below I get a Segmentation Fault (core dumped), but I don't understand why that is.
Question: What is causing the segmentation fault?
main.cpp
#include <cstdio>
#include <time.h>
using namespace std;
extern "C" void minmax(int n, int * tab, int * max, int * min);
int main(){
const int rozmiar = 100000;
const int liczba_powtorzen = 10000;
int tab[rozmiar] = {1, 3, 3, -65, 3, 123, 4, 32, 342, 22, 11, 32, 44, 12, 324, 43};
tab[rozmiar-1] = -1000;
int min, max;
min = 99999;
max = -99999;
clock_t start, stop;
start = clock();
for(int i=0; i<liczba_powtorzen; i++){
minmax(rozmiar, tab, &max, &min);
}
printf("min = %d max = %d\n", min, max);
stop = clock();
printf("\n time = %f ( %d cykli)", (stop - start)*1.0/CLOCKS_PER_SEC, (stop - start));
return 0;
}
minmax.asm
global minmax ; required for linker and NASM
section .text ; start of the "CODE segment"
minmax:
push ebp
mov ebp, esp ; set up the EBP
push ecx ; save used registers
push esi
mov ecx, [ebp+8] ; array length n
mov esi, [ebp+12] ; array address
mov eax, [ebp+16] ;max
mov edi, [ebp+20] ; min
lp: add eax, [esi] ; fetch an array element
cmp eax, [esi]
jl max ; max<[esi] ->update max
cmp edi, [esi]
jg min ; min>[esi] ->update min
add esi, 4 ; move to another element
loop lp ; loop over all elements
max:
mov eax, esi
ret
min:
mov edi, esi
ret
pop esi ; restore used registers
pop ecx
pop ebp
ret ; return to caller

Long story, short:
You need to restore the stack before using ret.
Your asm implementation is faulty on many levels, but the reason for your segmentation fault is poor understanding of how ret works.
Invalid use of ret
ret does not bring you back to the last jump, it reads the value that is at the top of the stack, and returns to that address.
After you jump to either min: or max:, you call ret, where you should be jumping back to your loop.
This means that it will try to return back to the address at the top of the stack, which certainly isn't a valid address; you modified it upon entering the function.
push ebp
mov ebp, esp ; set up the EBP
push ecx ; save used registers
push esi ; note, this is where `ret` will try to go

I do not know what you're exactly trying to do, but the assembler function is written poor.
Try this:
push ebp
mov ebp, esp ; set up the EBP
push ecx ; save used registers
push esi
mov ecx, [ebp+8] ; array length n
mov esi, [ebp+12] ; array address
mov eax, 0x80000000
mov edi,[ebp+16]
mov [edi], eax
mov eax, 0x7fffffff
mov edi,[ebp+20]
mov [edi], eax
lp:
mov edi,[ebp+16]
lodsd
cmp [edi], eax
jg short _min_test
mov [edi], eax
_min_test:
mov edi,[ebp+20]
cmp [edi], eax
jl short _loop
mov [edi], eax
_loop:
loop lp
pop esi ; restore used registers
pop ecx
pop ebp
ret ; return to caller

Related

How do I convert C++ function to assembly(x86_64)?

This is my .CPP file
#include <iostream>
using namespace std;
extern "C" void KeysAsm(int arr[], int n, int thetha, int rho);
// Keep this and call it from assembler
extern "C"
void crim(int *xp, int *yp) {
int temp = *xp;
*xp = *yp;
*yp = temp+2;
}
// Translate this into Intel assembler
void KeysCpp(int arr[], int n, int thetha, int rho){
for (int i = 0; i < n - 1; i++) {
for (int j = 0; j < n - i - 1; j++) {
if (arr[j] > arr[j + 1]) {
crim(&arr[j], &arr[j + 1]);
}
}
arr[i]= arr[i] + thetha / rho * 2 - 4;
}
}
// Function to print an array
void printArray(int arr[], int size){
int i;
for (i = 0; i < size; i++)
cout << arr[i] << "\n";
cout << endl;
}
int main() {
int gamma1[]{
9,
270,
88,
-12,
456,
80,
45,
123,
427,
999
};
int gamma2[]{
900,
312,
542,
234,
234,
1,
566,
123,
427,
111
};
printf("Array:\n");
printArray(gamma1, 10);
KeysAsm(gamma1, 10, 5, 6);
printf("Array Result Asm:\n");
printArray(gamma1, 10);
KeysCpp(gamma2, 10, 5, 6);
printf("Array Result Cpp:\n");
printArray(gamma2, 10);
}
What I want to do is, convert the KeysCpp function into assembly language and call it from this very .CPP file. I want to keep the crim function as it is in .CPP, while only converting the KeysCpp.
Here is my .ASM file
PUBLIC KeysAsm
includelib kernel32.lib
_DATA SEGMENT
EXTERN crim:PROC
_DATA ENDS
_TEXT SEGMENT
KeysAsm PROC
push rbp
mov rbp, rsp
sub rsp, 40
mov QWORD PTR [rbp-24], rdi
mov DWORD PTR [rbp-28], esi
mov DWORD PTR [rbp-32], edx
mov DWORD PTR [rbp-36], ecx
mov DWORD PTR [rbp-4], 0
jmp L3
L3:
mov eax, DWORD PTR [rbp-28]
sub eax, 1
cmp DWORD PTR [rbp-4], eax
jl L7
L4:
mov eax, DWORD PTR [rbp-28]
sub eax, DWORD PTR [rbp-4]
sub eax, 1
cmp DWORD PTR [rbp-8], eax
jl L6
L5:
add DWORD PTR [rbp-8], 1
L6:
mov eax, DWORD PTR [rbp-8]
cdqe
lea rdx, [0+rax*4]
mov rax, QWORD PTR [rbp-24]
add rax, rdx
mov edx, DWORD PTR [rax]
mov eax, DWORD PTR [rbp-8]
cdqe
add rax, 1
lea rcx, [0+rax*4]
mov rax, QWORD PTR [rbp-24]
add rax, rcx
mov eax, DWORD PTR [rax]
cmp edx, eax
jle L5
mov eax, DWORD PTR [rbp-8]
cdqe
add rax, 1
lea rdx, [0+rax*4]
mov rax, QWORD PTR [rbp-24]
add rdx, rax
mov eax, DWORD PTR [rbp-8]
cdqe
lea rcx, [0+rax*4]
mov rax, QWORD PTR [rbp-24]
add rax, rcx
mov rsi, rdx
mov rdi, rax
call crim
L7:
mov DWORD PTR [rbp-8], 0
jmp L4
KeysAsm ENDP
_TEXT ENDS
END
I am using Visual Studio 2017 to run this project.
I am getting next error when I run this code.
Unhandled exception at 0x00007FF74B0E429C in MatrixMultiplication.exe: Stack cookie instrumentation code detected a stack-based buffer overrun. occurred
Your asm looks like it's expecting the x86-64 System V calling convention, with args in RDI, ESI, EDX, ECX. But you said you're compiling with Visual Studio, so the compiler-generated code will use the Windows x64 calling convention: RCX, EDX, R8D, R9D.
And when you call crim, it can use shadow space (32 bytes above its return address, which you didn't reserve space for).
It looks like you got this asm from un-optimized compiler output, probably from https://godbolt.org/z/ea4MPh81r using GCC for Linux, without using -mabi=ms to override the default -mabi=sysv when compiling for non-Windows targets. And then you modified it to make the loop infinite, with a jmp at the bottom instead of a ret? Maybe a different GCC version than 12.2 since the label numbers and code don't match exactly.
(The signs of being un-optimized compiler output are all the reloads from [rbp-whatever], and redoing sign-extension before using an int to index an array with cdqe. A human would know the int must be non-negative. And being GCC specifically, the numbered label like .L1: etc. where you just removed the ., and of heavily using RAX for as much as possible in a debug build. And choices like lea rdx, [0+rax*4] to copy-and-shift, and the exact syntax it used to print that instruction in Intel syntax match GCC.)
To compile a single function for Windows x64, isolate it and give the compiler only prototypes for anything it calls
extern "C" void crim(int *xp, int *yp); // prototype only
void KeysCpp(int arr[], int n, int thetha, int rho){
for (int i = 0; i < n - 1; i++) {
for (int j = 0; j < n - i - 1; j++) {
if (arr[j] > arr[j + 1]) {
crim(&arr[j], &arr[j + 1]);
}
}
arr[i]= arr[i] + thetha / rho * 2 - 4;
}
}
Then on Godbolt, use gcc -O3 -mabi=ms, or use MSVC which always targets Windows. https://godbolt.org/z/Mj5Gb54b5 shows both GCC and MSVC with optimization enabled.
KeysCpp(int*, int, int, int): ; demangled name
cmp edx, 1
jle .L11 ; "shrink wrap" optimization: early-out on n<=1 before saving regs
push r15 ; save some call-preserved regs
push r14
lea r14, [rcx+4] ; arr + 1
push r13
mov r13, rcx
Unfortunately GCC fails to hoist the thetha / rho * 2 - 4 loop-invariant, instead redoing idiv every time through the loop. Seems like an obvious optimization since those are local vars whose address hasn't been taken at all, and it keeps thetha (typo for theta?) and rho in registers. So MSVC is much more efficient here. Clang also misses this optimization.

Need assistance with loop to iteratively add to sum variable in Assembly

I'm trying to convert the following C++ code into Assembly (MASM, Irvine32):
const int SIZE = 10;
int numbers[SIZE] = {10,60,20,33,72,89,45,65,72,18};
int limit = 50;
int index = 0;
int sum = 0;
while( index < SIZE )
{
if( numbers[index] <= limit )
{
sum = sum + numbers[index]; // sum += array[index];
}
index++;
}
If anyone could clear up where I'm going wrong -- I'm getting errors at L1: it's just spouting out "+10". I believe it's because I cannot translate sum=sum+numbers[index] into Assembly. If anyone could help me do that it would be fantastic. My attempt at translating it (lines starting from "total: mov esi, offset numbers" to "inc index") is obviously incorrect.
.data
SYZ = 10
numbers DWORD 10, 60, 20, 33, 72, 89, 45, 65, 72, 18
limit DWORD 50
index DWORD 0
sum DWORD 0
.code
main PROC
mov eax, index
mov ebx, SYZ
top: cmp eax, ebx
jae next
jb total
total: mov esi, OFFSET numbers
mov ecx, limit
cmp [esi], ecx
jbe L1
L1: add eax, ebx
inc index
jmp top
next: mov edx, sum
call WriteInt
exit
main ENDP
END main
Your conditional branch that implements the if is wrong. It should look like:
top:
...
cmp [esi], ecx
ja L1 ; conditional jump *over* an ADD instruction
add eax, [esi] ; [esi] is array[index] if you increment ESI properly...
L1: inc index
jmp top
In your C++, you can see that if numbers[index] <= limit then you want to update the sum, otherwise just increment the index and go back to the "top"; aka recheck the stopping condition.
Your original asm code was doing a condition check and then continuing regardless of the result.
cmp [esi], ecx
jbe L1 ; jump or fall-through to L1, condition irrelevant
L1: add eax, ebx
The C++ equivalent of your original asm is:
if( numbers[index] <= limit )
{
}
sum += ebx;
index++;
I'm not sure if this will solve all of your problems, but it will definitely solve one of them.
.data
SYZ = 10
numbers DWORD 10, 60, 20, 33, 72, 89, 45, 65, 72, 18
limit DWORD 50
index DWORD 0
sum DWORD 0
.code
main PROC
mov eax, index
mov ebx, SYZ
mov esi, OFFSET numbers
mov ecx, limit
mov edx, 0
top: cmp eax, ebx
jae next
cmp [esi], ecx
ja L1
add edx, [esi]
L1: inc index
mov eax, index
add esi, 4
jmp top
next: mov sum, edx
mov eax, sum
call WriteInt

How do I call a different subroutine using parameters passed into the current one (for use in recursion)?

I have two functions that take integers x and y read from input.
product returns x * y
power returns x ^ y, however it uses recursion and product to compute this. so x would be "base" and y is "exponent".
They called from C++:
int a, b, x, y;
a = product(x, y);
b = power(x, y);
and here is the asm. I got the product to work, however am having trouble with power because I am not sure of the syntax/method/convention to call product from it (and call itself for the recursion). EDIT: Recursion must be used.
global product
global power
section .text
product:
push ebp
mov ebp, esp
sub esp, 4
push edi
push esi
xor eax, eax
mov edi, [ebp+8]
mov esi, [ebp+12]
mov [ebp-4], edi
product_loop:
add [ebp-4], edi
mov eax, [ebp-4]
sub esi, 1
cmp esi, 1
jne product_loop
product_done:
pop esi
pop edi
mov esp, ebp
pop ebp
ret
power:
push ebp
mov ebp, esp
sub esp, 4
push edi
push esi
push ebx
xor eax, eax
mov edi, [ebp+8]
mov esi, [ebp+12]
;;;
check:
cmp esi, 1 ; if exp < 1
jl power_stop
recursion: ; else (PLEASE HELP!!!!!!!!)
; eax = call product (base, (power(base, exp-1))
power_stop:
mov eax, 1 ; return 1
power_done:
push ebx
pop esi
pop edi
mov esp, ebp
pop ebp
ret
EDIT: My solution!
power:
; Standard prologue
push ebp ; Save the old base pointer
mov ebp, esp ; Set new value of the base pointer
sub esp, 4 ; make room for 1 local variable result
push ebx ; this is exp-1
xor eax, eax ; Place zero in EAX. We will keep a running sum
mov eax, [ebp+12] ; exp
mov ebx, [ebp+8] ; base
cmp eax, 1 ; n >= 1
jge L1 ; if not, go do a recursive call
mov eax, 1 ; otherwise return 1
jmp L2
L1:
dec eax ; exp-1
push eax ; push argument 2: exp-1
push ebx ; push argument 1: base
call power ; do the call, result goes in eax: power(base, exp-1)
add esp, 8 ; get rid of arguments
push eax ; push argument 2: power(base, exponent-1)
push ebx ; push argument 1: base
call product ; product(base, power(base, exponent-1))
L2:
; Standard epilogue
pop ebx ; restore register
mov esp, ebp ; deallocate local variables
pop ebp ; Restore the callers base pointer.
ret ; Return to the caller.
You are using CDECL calling convention, so you have to first push the arguments in the stack in backward direction, then call the function and then clean the stack after the return.
push arg_last
push arg_first
call MyFunction
add esp, 8 ; the argument_count*argument_size
But here are some notes on your code:
Your function product does not return any value. Use mov eax, [ebp-4] immediately after product_done label.
Multiplication is much easy to be made by the instruction mul or imul. Using addition is the slowest possible way.
Computing the power by recursion is not the best idea. Use the following algorithm:
Y = 1;
if N=0 exit.
if N is odd -> Y = Y*x; N=N-1
if N is even -> Y = Y*Y; N=N/2
goto 2
Use SHR instruction in order to divide N by 2. Use test instrction in order to check odd/even number.
This way, you simply don't need to call product from power function.
If you're not sure how to write the assembly, you can generally write it in C++ and assemble it for clues - something like:
int power(int n, int exp)
{
return exp == 0 ? 1 :
exp == 1 ? n :
product(n, power(n, exp - 1));
}
Then you should just be able to use gcc -S or whatever your compiler's equivalent switch for assembly output is, or disassemble the machine code if you prefer.
For example, the function above, thrown in with int product(int x, int y) { return x * y; } and int main() { return product(3, 4); }, compiled with Microsoft's compiler ala cl /Fa power.cc:
; Listing generated by Microsoft (R) Optimizing Compiler Version 15.00.30729.01
TITLE C:\home\anthony\user\dev\power.cc
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC ?product##YAHHH#Z ; product
; Function compile flags: /Odtp
_TEXT SEGMENT
_x$ = 8 ; size = 4
_y$ = 12 ; size = 4
?product##YAHHH#Z PROC ; product
; File c:\home\anthony\user\dev\power.cc
; Line 1
push ebp
mov ebp, esp
mov eax, DWORD PTR _x$[ebp]
imul eax, DWORD PTR _y$[ebp]
pop ebp
ret 0
?product##YAHHH#Z ENDP ; product
_TEXT ENDS
PUBLIC ?power##YAHHH#Z ; power
; Function compile flags: /Odtp
_TEXT SEGMENT
tv73 = -8 ; size = 4
tv74 = -4 ; size = 4
_n$ = 8 ; size = 4
_exp$ = 12 ; size = 4
?power##YAHHH#Z PROC ; power
; Line 4
push ebp
mov ebp, esp
sub esp, 8
; Line 7
cmp DWORD PTR _exp$[ebp], 0
jne SHORT $LN5#power
mov DWORD PTR tv74[ebp], 1
jmp SHORT $LN6#power
$LN5#power:
cmp DWORD PTR _exp$[ebp], 1
jne SHORT $LN3#power
mov eax, DWORD PTR _n$[ebp]
mov DWORD PTR tv73[ebp], eax
jmp SHORT $LN4#power
$LN3#power:
mov ecx, DWORD PTR _exp$[ebp]
sub ecx, 1
push ecx
mov edx, DWORD PTR _n$[ebp]
push edx
call ?power##YAHHH#Z ; power
add esp, 8
push eax
mov eax, DWORD PTR _n$[ebp]
push eax
call ?product##YAHHH#Z ; product
add esp, 8
mov DWORD PTR tv73[ebp], eax
$LN4#power:
mov ecx, DWORD PTR tv73[ebp]
mov DWORD PTR tv74[ebp], ecx
$LN6#power:
mov eax, DWORD PTR tv74[ebp]
; Line 8
mov esp, ebp
pop ebp
ret 0
?power##YAHHH#Z ENDP ; power
_TEXT ENDS
PUBLIC _main
; Function compile flags: /Odtp
_TEXT SEGMENT
_main PROC
; Line 11
push ebp
mov ebp, esp
; Line 12
push 4
push 3
call ?power##YAHHH#Z ; power
add esp, 8
; Line 13
pop ebp
ret 0
_main ENDP
_TEXT ENDS
END
To walk you through this:
?power##YAHHH#Z PROC ; power
; Line 4
push ebp
mov ebp, esp
sub esp, 8
The above is the entry code for the power function - just adjusting the stack pointer to jump over the function arguments, which it will access below as _exp$[ebp] (that's exp) and _n$[ebp] (i.e. n).
; Line 7
cmp DWORD PTR _exp$[ebp], 0
jne SHORT $LN5#power
mov DWORD PTR tv74[ebp], 1
jmp SHORT $LN6#power
Basically, if exp is not equal to 0 we'll continue at label $LN5#power below, but if it is 0 then load 1 into the return value location on the stack at tv74[ebp] and jump to the function return instructions at $LN6#power.
$LN5#power:
cmp DWORD PTR _exp$[ebp], 1
jne SHORT $LN3#power
mov eax, DWORD PTR _n$[ebp]
mov DWORD PTR tv73[ebp], eax
jmp SHORT $LN4#power
Similar to the above - if exp is 1 then put n into eax and therefrom into the return value stack memory, then jump to the return instructions.
Now it starts to get interesting...
$LN3#power:
mov ecx, DWORD PTR _exp$[ebp]
sub ecx, 1
push ecx
Subtract 1 from exp and push in onto the stack...
mov edx, DWORD PTR _n$[ebp]
push edx
Also push n onto the stack...
call ?power##YAHHH#Z ; power
Recursively call the power function, which will use the two values pushes above.
add esp, 8
A stack adjustment after the function above returns.
push eax
Put the result of the recursive call - which the power return instructions leave in the eax register - onto the stack...
mov eax, DWORD PTR _n$[ebp]
push eax
Also push n onto the stack...
call ?product##YAHHH#Z ; product
Call the product function to multiple the value returned by the call to power above by n.
add esp, 8
mov DWORD PTR tv73[ebp], eax
Copy the result of product into a temporary address on the stack....
$LN4#power:
mov ecx, DWORD PTR tv73[ebp]
mov DWORD PTR tv74[ebp], ecx
Pick up the value from that tv73 temporary location and copy it into tv74...
$LN6#power:
mov eax, DWORD PTR tv74[ebp]
Finally, move the the product() result from tv74 into the eax register for convenient and fast access after the product call returns.
; Line 8
mov esp, ebp
pop ebp
ret 0
Clean up the stack and return.

Debugger Opens While Compiling Inline Assembly Code

I am using some code from Github, but it won't compile. The debugger opens in the middle of the code compiling. This is a personal project and it would be really helpful if anyone could make useful edits to the code as I am new to assembly.
Here is the output that I am getting until the debugger opens:
'Assembly.exe': Loaded
'C:\Users\Mayank\Desktop\Assembly\Debug\Assembly.exe', Symbols loaded.
'Assembly.exe': Loaded 'C:\Windows\SysWOW64\ntdll.dll', Cannot find or
open the PDB file 'Assembly.exe': Loaded
'C:\Windows\SysWOW64\kernel32.dll', Cannot find or open the PDB file
'Assembly.exe': Loaded 'C:\Windows\SysWOW64\KernelBase.dll', Cannot
find or open the PDB file 'Assembly.exe': Loaded
'C:\Windows\SysWOW64\msvcr100d.dll', Symbols loaded. First-chance
exception at 0x011013fe in Assembly.exe: 0xC00000FD: Stack overflow.
Unhandled exception at 0x011013fe in Assembly.exe: 0xC00000FD: Stack
overflow.
Here is the code:
//
// main.cpp
// MergeSortC
//
// Copyright (c) 2012 Mayank. All rights reserved.
//
#include <iostream>
#include <cmath>
#include <stdio.h>
using namespace std;
const int ARRAYSIZE = 30;
int main()
{
int arr[ARRAYSIZE];
int temp_arr[ARRAYSIZE];
int number;
for(int x = 0; x < ARRAYSIZE; x++)
{
number = (rand() % 99) + 1;
arr[x] = number;
}
Merge_Sort:
__asm
{
// EAX - Array start
// ECX - array length
// Merge_Sort (first half)
// Length of the first half
// ECX /= 2
push ECX
shr ECX, 2
call Merge_Sort
pop ECX
// Merge_Sort (second half)
push arr
push EBX
push ECX
// Length of the second half
// ECX = ECX - ECX/2
mov EDX, ECX
shr EDX, 1
sub ARRAYSIZE, EDX
imul EDX, 4
// Start index of the second half
// EAX = EAX + (ECX/2) * 4
add arr, EDX
push EDX
call Merge_Sort
pop EDX
pop ECX
pop EBX
pop arr
pushad
// Merge (first half, second half)
// Length of first half = ECX/2
// Length of second half = ECX - ECX/2
mov EDX, ECX
shr ECX, 1
sub EDX, ECX
// Start of second half = EAX + (ECX/2) * 4
mov EBX, EAX
mov EDI, ECX
imul EDI, 4
add EBX, EDI
// Index of temp array = 0
sub EDI, EDI
call Merge
popad
// Copy back the merged array from temp_arr to arr
call Merge_Copy_Back_Temp
ret
};
Merge:
__asm
{
// Merge two arrays contents.
// The final merged array will be in temp_arr
// Merging is done recursively.
// Arguments:
// EAX - First array's start
// EBX - Second array's start
// ECX - Length of first array
// EDX - Length of second array
// EDI - Index in temp array
pushad
// Handle the cases where one array is empty
cmp ARRAYSIZE, 0
jz First_Array_Over
cmp EDX, 0
jz Second_Array_Over
// Compare first elements of both the arrays
push ARRAYSIZE
push EDI
mov ECX, [arr]
mov EDI, [ECX]
cmp ECX, EDI
pop EDI
pop ECX
// Pick which ever is the least and update that array
jl Update_First_Array
jmp Update_Second_Array
ret
};
Update_First_Array:
__asm
{
// min_elem = min (first elements of first array and second array)
// Put min_elem into the temp array
push dword ptr [EAX]
pop dword ptr [temp_arr + EDI * 4]
add EAX, 4
dec ECX
inc EDI
// Recursively call Merge on the updated array and the
// other array
call Merge
popad
ret
};
Update_Second_Array:
__asm
{
// min_elem = min (first elements of first array and second array)
// Put min_elem into the temp array
push dword ptr [EBX]
pop dword ptr [temp_arr + EDI * 4]
add EBX, 4
dec EDX
inc EDI
// Recursively call Merge on the updated array and the
// other array
call Merge
popad
ret
};
Merge_Copy_Back_Temp:
__asm
{
// Copy back the temp array into original array
// Arguments:
// EAX - original array address
// ECX - original array length
pushad
// For copying back, the destination array is EAX
mov EBX, EAX
// Now, the source array is temp_arr
mov EAX, temp_arr
call Copy_Array
popad
ret
};
First_Array_Over:
__asm
{
// Copy the rest of the second array to the temp arr
// because the first array is empty
pushad
mov EAX, EBX
mov ECX, EDX
mov EBX, temp_arr
imul EDI, 4
add EBX, EDI
call Copy_Array
popad
popad
ret
};
Second_Array_Over:
__asm
{
// Copy the rest of the first array to the temp arr
// because the second array is empty
pushad
mov EBX, temp_arr
imul EDI, 4
add EBX, EDI
call Copy_Array
popad
popad
ret
};
Copy_Array:
__asm
{
// Copy array to destination array
// EAX - Array start
// EBX - Destination array
// ECX - Array length
// Trivial case
cmp ECX, 0
jz Copy_Empty_Array
push ECX
sub EDI, EDI
};
copy_loop:
__asm
{
// Copy each element
push dword ptr [EAX + EDI * 4]
pop dword ptr [EBX + EDI * 4]
inc EDI
loop copy_loop
pop ECX
ret
};
Copy_Empty_Array:
__asm
{
ret
};
Read_Arr:
__asm
{
// EAX - array start
// ECX - array length
mov ESI, arr
sub EDI, EDI
};
loop1:
__asm
{
// Read each element
lea eax,[esi+edx*4]
inc EDI
loop loop1
ret
};
return 0;
}
I dare to disbelieve about the debugger opening during compilation. That said, this code
Merge_Sort:
__asm
{
// EAX - Array start
// ECX - array length
// Merge_Sort (first half)
// Length of the first half
// ECX /= 2
push ECX
shr ECX, 2
call Merge_Sort
has to overflow the stack when running: pushing ECX, shifting ECX, calling itself.
Short of installing the compiler myself, I suggest that you work through the code by adding #if 0 / #endif around a bunch of the code and seeing if you can figure out which instruction(s) the compiler is upset about - it clearly is the compiler crashing, which isn't a very good thing, but does happen.
Of course, that's after you have googled for the error message, etc... ;)

Call not returning properly [X86_ASM]

This is C++ using x86 inline assembly [Intel syntax]
Function:
DWORD *Call ( size_t lArgs, ... ){
DWORD *_ret = new DWORD[lArgs];
__asm {
xor edx, edx
xor esi, esi
xor edi, edi
inc edx
start:
cmp edx, lArgs
je end
push eax
push edx
push esi
mov esi, 0x04
imul esi, edx
mov ecx, esi
add ecx, _ret
push ecx
call dword ptr[ebp+esi] //Doesn't return to the next instruction, returns to the caller of the parent function.
pop ecx
mov [ecx], eax
pop eax
pop edx
pop esi
inc edx
jmp start
end:
mov eax, _ret
ret
}
}
The purpose of this function is to call multiple functions/addresses without calling them individually.
Why I'm having you debug it?
I have to start school for the day, and I need to have it done by evening.
Thanks alot, iDomo
Thank you for a complete compile-able example, it makes solving problems much easier.
According to your Call function signature, when the stack frame is set up, the lArgs is at ebp+8 , and the pointers start at ebp+C. And you have a few other issues. Here's a corrected version with some push/pop optimizations and cleanup, tested on MSVC 2010 (16.00.40219.01) :
DWORD *Call ( size_t lArgs, ... ) {
DWORD *_ret = new DWORD[lArgs];
__asm {
xor edx, edx
xor esi, esi
xor edi, edi
inc edx
push esi
start:
cmp edx, lArgs
; since you started counting at 1 instead of 0
; you need to stop *after* reaching lArgs
ja end
push edx
; you're trying to call [ebp+0xC+edx*4-4]
; a simpler way of expressing that - 4*edx + 8
; (4*edx is the same as edx << 2)
mov esi, edx
shl esi, 2
add esi, 0x8
call dword ptr[ebp+esi]
; and here you want to write the return value
; (which, btw, your printfs don't produce, so you'll get garbage)
; into _ret[edx*4-4] , which equals ret[esi - 0xC]
add esi, _ret
sub esi, 0xC
mov [esi], eax
pop edx
inc edx
jmp start
end:
pop esi
mov eax, _ret
; ret ; let the compiler clean up, because it created a stack frame and allocated space for the _ret pointer
}
}
And don't forget to delete[] the memory returned from this function after you're done.
I notice that, before calling, you push EAX, EDX, ESI, ECX (in order), but don't pop in the reverse order after returning. If the first CALL returns properly, but subsequent ones don't, that could be the issue.