Inline Assembly Code Does Not Compile in Visual C++ 2010 Express

Inline Assembly Code Does Not Compile in Visual C++ 2010 Express - c++

I have some assembly merge sort code that I obtained from Github and I am trying to embed it into Inline Assembly in C++, but it won't compile and keeps returning these errors:
1>c:\users\mayank\desktop\assembly\assembly\main.cpp(147): error
C2415: improper operand type
The code that I am attempting to run is this:
#include <iostream>
#include <cmath>
#include <stdio.h>
using namespace std;
const int ARRAYSIZE = 30;
int main()
{
int arr[ARRAYSIZE];
int temp_arr[ARRAYSIZE];
int number;
for(int x = 0; x < ARRAYSIZE; x++)
{
number = (rand() % 99) + 1;
arr[x] = number;
}
/*
READ_ARR_LEN:
__asm
{
// Read the length of the array
//GetLInt [30] // Size of input array
//PutLInt [30]
}
GET_ARRAY:
__asm
{
//intel_syntax
// Get values in arr from the user
mov EAX, arr
mov ECX, ARR_LEN
call Read_Arr
// Run Merge Sort on the array
mov EAX, arr
mov EBX, temp_arr
mov ECX, ARR_LEN
call Merge_Sort
// EXIT
};;
*/
Merge_Sort:
__asm
{
// EAX - Array start
// ECX - array length
// Arrays of size 0 or 1 are already sorted
cmp ARRAYSIZE, 2
jl Trivial_Merge_Sort
// Merge_Sort (first half)
// Length of the first half
// ECX /= 2
push ARRAYSIZE
shr ARRAYSIZE, 1
call Merge_Sort
pop ARRAYSIZE
// Merge_Sort (second half)
push arr
push EBX
push ARRAYSIZE
// Length of the second half
// ECX = ECX - ECX/2
mov EDX, ARRAYSIZE
shr EDX, 1
sub ARRAYSIZE, EDX
imul EDX, 4
// Start index of the second half
// EAX = EAX + (ECX/2) * 4
add arr, EDX
push EDX
call Merge_Sort
pop EDX
pop ARRAYSIZE
pop EBX
pop arr
pushad
// Merge (first half, second half)
// Length of first half = ECX/2
// Length of second half = ECX - ECX/2
mov EDX, ECX
shr ECX, 1
sub EDX, ECX
// Start of second half = EAX + (ECX/2) * 4
mov EBX, EAX
mov EDI, ECX
imul EDI, 4
add EBX, EDI
// Index of temp array = 0
sub EDI, EDI
call Merge
popad
// Copy back the merged array from temp_arr to arr
call Merge_Copy_Back_Temp
ret
};
Trivial_Merge_Sort:
__asm
{
// In case of arrays of length 0 or 1
ret
};
Merge:
__asm
{
// Merge two arrays contents.
// The final merged array will be in temp_arr
// Merging is done recursively.
// Arguments:
// EAX - First array's start
// EBX - Second array's start
// ECX - Length of first array
// EDX - Length of second array
// EDI - Index in temp array
pushad
// Handle the cases where one array is empty
cmp ARRAYSIZE, 0
jz First_Array_Over
cmp EDX, 0
jz Second_Array_Over
// Compare first elements of both the arrays
push ARRAYSIZE
push EDI
mov ARRAYSIZE, [arr]
mov EDI, [ARRAYSIZE]
cmp ARRAYSIZE, EDI
pop EDI
pop ARRAYSIZE
// Pick which ever is the least and update that array
jl Update_First_Array
jmp Update_Second_Array
};
Update_First_Array:
__asm
{
// min_elem = min (first elements of first array and second array)
// Put min_elem into the temp array
push dword ptr [EAX]
pop dword ptr [temp_arr + EDI * 4]
add EAX, 4
dec ECX
inc EDI
// Recursively call Merge on the updated array and the
// other array
call Merge
popad
ret
};
Update_Second_Array:
__asm
{
// min_elem = min (first elements of first array and second array)
// Put min_elem into the temp array
push dword ptr [EBX]
pop dword ptr [temp_arr + EDI * 4]
add EBX, 4
dec EDX
inc EDI
// Recursively call Merge on the updated array and the
// other array
call Merge
popad
ret
};
Merge_Copy_Back_Temp:
__asm
{
// Copy back the temp array into original array
// Arguments:
// EAX - original array address
// ECX - original array length
pushad
// For copying back, the destination array is EAX
mov EBX, EAX
// Now, the source array is temp_arr
mov EAX, temp_arr
call Copy_Array
popad
ret
};
Trivial_Merge:
__asm
{
// Note: One array is empty means no need to merge.
popad
ret
};
First_Array_Over:
__asm
{
// Copy the rest of the second array to the temp arr
// because the first array is empty
pushad
mov EAX, EBX
mov ECX, EDX
mov EBX, temp_arr
imul EDI, 4
add EBX, EDI
call Copy_Array
popad
popad
ret
};
Second_Array_Over:
__asm
{
// Copy the rest of the first array to the temp arr
// because the second array is empty
pushad
mov EBX, temp_arr
imul EDI, 4
add EBX, EDI
call Copy_Array
popad
popad
ret
};
Copy_Array:
__asm
{
// Copy array to destination array
// EAX - Array start
// EBX - Destination array
// ECX - Array length
// Trivial case
cmp ECX, 0
jz Copy_Empty_Array
push ECX
sub EDI, EDI
};
copy_loop:
__asm
{
// Copy each element
push dword ptr [EAX + EDI * 4]
pop dword ptr [EBX + EDI * 4]
inc EDI
loop copy_loop
pop ECX
ret
};
Copy_Empty_Array:
__asm
{
ret
};
Read_Arr:
__asm
{
// EAX - array start
// ECX - array length
mov ESI, EAX
sub EDI, EDI
};
loop1:
__asm
{
// Read each element
lea eax,[esi+edx*4]
inc EDI
loop loop1
ret
};
return 0;
}

(Update: In the original code posted in the question there were attempts to address memory as DWORD [address], which is incompatible with the syntax used by Visual C++'s inline assembler as I point out in my answer below.)
Visual C++ uses MASM syntax for its inline assembly, so you need to use DWORD PTR instead of just DWORD. That's what's causing these compilation errors.
See e.g. this table from The Art of Assembly.

This looks like the code is from this github repository.
In that code, GetLInt is actually a NASM macro which is included in an external macro definition file and calls a function proc_GetLInt, which in turn is provided in an object file io.o, the source code is not there. The problem is therefore simply that
you didn't realize that GetLint is external code that you're missing
even if you took all files from that repository, it would work because NASM macros don't work directly in VC++ inline assembly
even if you fixed the macro problem, you still don't have the GetLInt function because it is provided as a linux object file only, you'd have to write it yourself
How do you fix this?
That code was meant to provide a self-contained assembler program that handles all input/output on its own. Since you're inlining it in VC++, you have much more powerful I/O handling at your hands already. Use those instead, i.e. make sure that the values you want to sort are already in arr before the inline assembly starts.
Then, looking at the code: Merge_Sort expects the start of your array in EAX, and its length in ECX. You can get both from your C++ code. When you do that, you no longer need the READ_ARR_LEN and GET_ARRAY blocks from the assembler code.
I am rather reluctant to reproduce parts of the code with modifications as I cannot find a licence file on github that would say I may do so. Let me try to describe: You need to manually move the pointer to arr into EAX and the content of ARRAYSIZE into EBX at the very start of the assembler routine. (*) As I can see, you have already taken care of filling the array with numbers, so there's nothing you need to do there.
Then you need to remove all unnecessary assembler functions and calls to them. You also should either condense all your separate __asm blocks into one or use external variables to conserve and restore registers between blocks (or read the tutorial here, but just using one block works and is less hassle).
Finally, you have to be careful with the stack frames: every call has to have a matching ret. This is easy to stumble over as the merge sort procedure is recursive.
(*) Careful with VC++'s way of treating variables in inside asm blocks, be sure to actually use pointers when you need them.
So all in all, porting this to VC++ is not a trivial task.

Related

Assembly: loop through a sequence of characters and swap them

My assignment is to Implement a function in assembly that would do the following:
loop through a sequence of characters and swap them such that the end result is the original string in reverse ( 100 points )
Hint: collect the string from user as a C-string then pass it to the assembly function along with the number of characters entered by the user. To find out the number of characters use strlen() function.
i have written both c++ and assembly programs and it works fine for extent: for example if i input 12345 the out put is correctly shown as 54321 , but if go more than 5 characters : the out put starts to be incorrect: for example if i input 123456 the output is :653241. i will greatly appreciate anyone who can point where my mistake is:
.code
_reverse PROC
push ebp
mov ebp,esp ;stack pointer to ebp
mov ebx,[ebp+8] ; address of first array element
mov ecx,[ebp+12] ; the number of elemets in array
mov eax,ebx
mov ebp,0 ;move 0 to base pointer
mov edx,0 ; set data register to 0
mov edi,0
Setup:
mov esi , ecx
shr ecx,1
add ecx,edx
dec esi
reverse:
cmp ebp , ecx
je allDone
mov edx, eax
add eax , edi
add edx , esi
Swap:
mov bl, [edx]
mov bh, [eax]
mov [edx],bh
mov [eax],bl
inc edi
dec esi
cmp edi, esi
je allDone
inc ebp
jmp reverse
allDone:
pop ebp ; pop ebp out of stack
ret ; retunr the value of eax
_reverse ENDP
END
and here is my c++ code:
#include<iostream>
#include <string>
using namespace std;
extern"C"
char reverse(char*, int);
int main()
{
char str[64] = {NULL};
int lenght;
cout << " Please Enter the text you want to reverse:";
cin >> str;
lenght = strlen(str);
reverse(str, lenght);
cout << " the reversed of the input is: " << str << endl;
}

You didn't comment your code, so IDK what exactly you're trying to do, but it looks like you are manually doing the array indexing with MOV / ADD instead of using an addressing mode like [eax + edi].
However, it looks like you're modifying your original value and then using it in a way that would make sense if it was unmodified.
mov edx, eax ; EAX holds a pointer to the start of array, read every iter
add eax , edi ; modify the start of the array!!!
add edx , esi
Swap:
inc edi
dec esi
EAX grows by EDI every step, and EDI increases linearly. So EAX increases geometrically (integral(x * dx) = x^2).
Single-stepping this in a debugger should have found this easily.
BTW, the normal way to do this is to walk one pointer up, one pointer down, and fall out of the loop when they cross. Then you don't need a separate counter, just cmp / ja. (Don't check for JNE or JE, because they can cross each other without ever being equal.)

Overall you the right idea to start at both ends of the string and swap elements until you get to the middle. Implementation is horrible though.
mov ebp,0 ;move 0 to base pointer
This seems to be loop counter (comment is useless or even worse); I guess idea was to swap length/2 elements which is perfectly fine. HINT I'd just compare pointers/indexes and exit once they collide.
mov edx,0 ; set data register to 0
...
add ecx,edx
mov edx, eax
Useless and misleading.
mov edi,0
mov esi , ecx
dec esi
Looks like indexes to start/end of the string. OK. HINT I'd go with pointers to start/end of the string; but indexes work too
cmp ebp , ecx
je allDone
Exit if did length/2 iterations. OK.
mov edx, eax
add eax , edi
add edx , esi
eax and edx point to current symbols to be swapped. Almost OK but this clobbers eax! Each loop iteration after second will use wrong pointers! This is what caused your problem in the first place. This wouldn't have happened if you used pointers instead indexes, or if you'd used offset addressing [eax+edi]/[eax+esi]
...
Swap part is OK
cmp edi, esi
je allDone
Second exit condition, this time comparing for index collision! Generally one exit condition should be enough; several exit conditions usually either superfluous or hint at some flaw in the algorithm. Also equality comparison is not enough - indexes can go from edi<esi to edi>esi during single iteration.

C/C++ Inline asm improper operand type

I have the following code, that is supposed to XOR a block of memory:
void XorBlock(DWORD dwStartAddress, DWORD dwSize, DWORD dwsKey)
{
DWORD dwKey;
__asm
{
push eax
push ecx
mov ecx, dwStartAddress // Move Start Address to ECX
add ecx, dwSize // Add the size of the function to ECX
mov eax, dwStartAddress // Copy the Start Address to EAX
crypt_loop: // Start of the loop
xor byte ptr ds:[eax], dwKey // XOR The current byte with 0x4D
inc eax // Increment EAX with dwStartAddress++
cmp eax,ecx // Check if every byte is XORed
jl crypt_loop; // Else jump back to the start label
pop ecx // pop ECX from stack
pop eax // pop EAX from stack
}
}
However, the argument dwKey gives me an error. The code works perfectly if for example the dwKey is replaced by 0x5D.

I think you have two problems.
First, "xor" can't take two memory operands (ds:[eax] is a memory location and dwKey is a memory location); secondly, you've used "byte ptr" to indicate you want a byte, but you're trying to use a DWORD and assembly can't automatically convert those.
So, you'll probably have to load your value into an 8-bit register and then do it. For example:
void XorBlock(DWORD dwStartAddress, DWORD dwSize, DWORD dwsKey)
{
DWORD dwKey;
__asm
{
push eax
push ecx
mov ecx, dwStartAddress // Move Start Address to ECX
add ecx, dwSize // Add the size of the function to ECX
mov eax, dwStartAddress // Copy the Start Address to EAX
mov ebx, dwKey // <---- LOAD dwKey into EBX
crypt_loop : // Start of the loop
xor byte ptr ds : [eax], bl // XOR The current byte with the low byte of EBX
inc eax // Increment EAX with dwStartAddress++
cmp eax, ecx // Check if every byte is XORed
jl crypt_loop; // Else jump back to the start label
pop ecx // pop ECX from stack
pop eax // pop EAX from stack
}
}
Although, it also looks like dwKey is uninitialized in your code; maybe you should just "mov bl, 0x42". I'm also not sure you need to push and pop the registers; I can't remember what registers you are allowed to clobber with MSVC++ inline assembler.
But, in the end, I think Alan Stokes is correct in his comment: it is very unlikely assembly is actually faster than C/C++ code in this case. The compiler can easily generate this code on its own, and you might find the compiler actually does unexpected optimizations to make it run even faster than the "obvious" assembly does (for example, loop unrolling).

Creating loop in x86 assembly and use of arrays? [duplicate]

I am currently learning assembly programming as part of one of my university modules. I have a program written in C++ with inline x86 assembly which takes a string of 6 characters and encrypts them based on the encryption key.
Here's the full program: https://gist.github.com/anonymous/1bb0c3be77566d9b791d
My code fo the encrypt_chars function:
void encrypt_chars (int length, char EKey)
{ char temp_char; // char temporary store
for (int i = 0; i < length; i++) // encrypt characters one at a time
{
temp_char = OChars [i]; // temp_char now contains the address values of the individual character
__asm
{
push eax // Save values contained within register to stack
push ecx
movzx ecx, temp_char
push ecx // Push argument #2
lea eax, EKey
push eax // Push argument #1
call encrypt
add esp, 8 // Clean parameters of stack
mov temp_char, al // Move the temp character into a register
pop ecx
pop eax
}
EChars [i] = temp_char; // Store encrypted char in the encrypted chars array
}
return;
// Inputs: register EAX = 32-bit address of Ekey,
// ECX = the character to be encrypted (in the low 8-bit field, CL).
// Output: register EAX = the encrypted value of the source character (in the low 8-bit field, AL).
__asm
{
encrypt:
push ebp // Set stack
mov ebp, esp // Set up the base pointer
mov eax, [ebp + 8] // Move value of parameter 1 into EAX
mov ecx, [ebp + 12] // Move value of parameter 2 into ECX
push edi // Used for string and memory array copying
push ecx // Loop counter for pushing character onto stack
not byte ptr[eax] // Negation
add byte ptr[eax], 0x04 // Adds hex 4 to EKey
movzx edi, byte ptr[eax] // Moves value of EKey into EDI using zeroes
pop eax // Pop the character value from stack
xor eax, edi // XOR character to give encrypted value of source
pop edi // Pop original address of EDI from the stack
rol al, 1 // Rotates the encrypted value of source by 1 bit (left)
rol al, 1 // Rotates the encrypted value of source by 1 bit (left) again
add al, 0x04 // Adds hex 4 to encrypted value of source
mov esp, ebp // Deallocate values
pop ebp // Restore the base pointer
ret
}
//--- End of Assembly code
}
My questions are:
What is the best/ most efficient way to convert this for loop into assembly?
Is there a way to remove the call for encrypt and place the code directly in its place?
How can I optimise/minimise the use of registers and instructions to make the code smaller and potentially faster?
Is there a way for me to convert the OChars and EChars arrays into assembly?
If possible, would you be able to provide me with an explanation of how the solution works as I am eager to learn.

I can't help with optimization or the cryptography but i can show you a way to go about making a loop, if you look at the loop in this function:
void f()
{
int a, b ;
for(a = 10, b = 1; a != 0; --a)
{
b = b << 2 ;
}
}
The loop is essentially:
for(/*initialize*/; /*condition*/; /*modify*/)
{
// run code
}
So the function in assembly would look something along these lines:
_f:
push ebp
mov ebp, esp
sub esp, 8 ; int a,b
initialize: ; for
mov dword ptr [ebp-4], 10 ; a = 10,
mov dword ptr [ebp-8], 1 ; b = 1
mov eax, [ebp-4]
condition:
test eax, eax ; tests if a == 0
je exit
runCode:
mov eax, [ebp-8]
shl eax, 2 ; b = b << 2
mov dword ptr [ebp-8], eax
modify:
mov eax, [ebp-4]
sub eax, 1 ; --a
mov dword ptr [ebp-4], eax
jmp condition
exit:
mov esp, ebp
pop ebp
ret
Plus I show in the source how you make local variables;
subtract the space from the stack pointer.
and access them through the base pointer.
I tried to make the source as generic intel x86 assembly syntax as i could so my apologies if anything needs changing for your specific environment i was more aiming to give a general idea about how to construct a loop in assembly then giving you something you can copy, paste and run.

I would suggest to look into assembly code which is generated by compiler. You can change and optimize it later.
How do you get assembler output from C/C++ source in gcc?

Inline asm and c array questions

This is a homework. I have 3 arrays, v1={5,4,3,2,1} ,v2={1,2,3,4,5} and v3={2,3,5,1,4}, the assigment is to change the 1 to 6. Of course, any solution like v1[4]=6, in asm or c is forbidden. So this was my code:
First Code
void main(){
int myArray[5]={5,4,3,2,1};
__asm {
mov ecx,0 //using ecx as counter
myLoop:
mov eax, myArray[ecx] //moving the content on myArray in position ecx to eax
cmp eax,1 //comparing eax to 1
je is_one //if its equal jump to label is_one
inc ecx //ecx+1
cmp ecx,5 //since all vectors have size 5, comparing if ecx is equal to 5
jne myLoop //if not, repeat
jmp Done //if true, go to label Done
is_one:
mov myArray[ecx],6 //changing the content in myArray position ecx to 6
inc ecx //ecx+1
cmp ecx,5 // ecx=5?
jne myLoop //no? repeat loop
jmp Done //yes? Done
Done:
}
printArray(myArray);
}
this didn't work, tried many things like mov eax,6 or mov [eax+ecx],6 , nothing worked until I found this solution
Many tries later code
void main(){
int myArray[5]={5,4,3,2,1};
__asm {
mov ecx,0 //using ecx as counter
myLoop:
mov eax, myArray[TYPE myArray*ecx] //I don't understand how this works
cmp eax,1 //comparing eax to 1
je is_one //if its equal jump to label is_one
inc ecx //ecx+1
cmp ecx,5 //since all vectors have size 5, comparing if ecx is equal to 5
jne myLoop //if not, repeat
jmp Done //if true, go to label Done
is_one:
mov myArray[TYPE myArray*ecx],6 //Uhh...
inc ecx //ecx+1
cmp ecx,5 // ecx=5?
jne myLoop //no? repeat loop
jmp Done //yes? Done
Done:
}
printArray(myArray);
}
And that works like a charm. But I don't understand how or why the MOV array[TYPE array * index], value works(besides TYPE returning the size as explained in link), and why not the others.
Also, since I have to do this for 3 arrays, I tried to copy and paste all the code to changingArray(int myArray[]), declared the 3 arrays in the main, and passed them to changingArray, but now is not changing them. Im pretty sure that with vector you dont have to pass with &, I could be wrong. Still, I can't see why it doesn't change them. So...
Final Code
void changingArray(int myArray[]){
__asm {
mov ecx,0 //using ecx as counter
myLoop:
mov eax, myArray[TYPE myArray*ecx] //I don't understand how this works
cmp eax,1 //comparing eax to 1
je is_one //if its equal jump to label is_one
inc ecx //ecx+1
cmp ecx,5 //since all vectors have size 5, comparing if ecx is equal to 5
jne myLoop //if not, repeat
jmp Done //if true, go to label Done
is_one:
mov myArray[TYPE myArray*ecx],6 //Uhh...
inc ecx //ecx+1
cmp ecx,5 // ecx=5?
jne myLoop //no? repeat loop
jmp Done //yes? Done
Done:
}
printArray(myArray);
}
void main(){
//for some odd reason, they arent changing
int v1[5]={5,4,3,2,1};
int v2[5]={1,2,3,4,5};
int v3[5]={2,3,5,1,4};
changingArray(v1);
changingArray(v2);
changingArray(v3);
}
TL:DR section:
Homework of changing the number 1 to 6 in 3 arrays v1={5,4,3,2,1} ,v2={1,2,3,4,5} and v3={2,3,5,1,4}
1-I don't get why the first code doesn't work, but many tries later code works (the MOV array[TYPE array * index], value instruction).
2- Since I need to do this with 3 arrays, I put all the code in changingArray(int myArray[]), and in the main I declared my 3 arrays in main as shown in final code. While many tries code did change the array, this doesnt. Probably I just made a mistake in c and not asm, but I don't see it.
And sorry for bad english, is not my first language.

mov eax, myArray[TYPE myArray*ecx]
Here the address referred to is (base address of myArray) + sizeof(the type of elements of myArray) * ecx. In assembly language the indexing should be done in bytes.

Debugger Opens While Compiling Inline Assembly Code

I am using some code from Github, but it won't compile. The debugger opens in the middle of the code compiling. This is a personal project and it would be really helpful if anyone could make useful edits to the code as I am new to assembly.
Here is the output that I am getting until the debugger opens:
'Assembly.exe': Loaded
'C:\Users\Mayank\Desktop\Assembly\Debug\Assembly.exe', Symbols loaded.
'Assembly.exe': Loaded 'C:\Windows\SysWOW64\ntdll.dll', Cannot find or
open the PDB file 'Assembly.exe': Loaded
'C:\Windows\SysWOW64\kernel32.dll', Cannot find or open the PDB file
'Assembly.exe': Loaded 'C:\Windows\SysWOW64\KernelBase.dll', Cannot
find or open the PDB file 'Assembly.exe': Loaded
'C:\Windows\SysWOW64\msvcr100d.dll', Symbols loaded. First-chance
exception at 0x011013fe in Assembly.exe: 0xC00000FD: Stack overflow.
Unhandled exception at 0x011013fe in Assembly.exe: 0xC00000FD: Stack
overflow.
Here is the code:
//
// main.cpp
// MergeSortC
//
// Copyright (c) 2012 Mayank. All rights reserved.
//
#include <iostream>
#include <cmath>
#include <stdio.h>
using namespace std;
const int ARRAYSIZE = 30;
int main()
{
int arr[ARRAYSIZE];
int temp_arr[ARRAYSIZE];
int number;
for(int x = 0; x < ARRAYSIZE; x++)
{
number = (rand() % 99) + 1;
arr[x] = number;
}
Merge_Sort:
__asm
{
// EAX - Array start
// ECX - array length
// Merge_Sort (first half)
// Length of the first half
// ECX /= 2
push ECX
shr ECX, 2
call Merge_Sort
pop ECX
// Merge_Sort (second half)
push arr
push EBX
push ECX
// Length of the second half
// ECX = ECX - ECX/2
mov EDX, ECX
shr EDX, 1
sub ARRAYSIZE, EDX
imul EDX, 4
// Start index of the second half
// EAX = EAX + (ECX/2) * 4
add arr, EDX
push EDX
call Merge_Sort
pop EDX
pop ECX
pop EBX
pop arr
pushad
// Merge (first half, second half)
// Length of first half = ECX/2
// Length of second half = ECX - ECX/2
mov EDX, ECX
shr ECX, 1
sub EDX, ECX
// Start of second half = EAX + (ECX/2) * 4
mov EBX, EAX
mov EDI, ECX
imul EDI, 4
add EBX, EDI
// Index of temp array = 0
sub EDI, EDI
call Merge
popad
// Copy back the merged array from temp_arr to arr
call Merge_Copy_Back_Temp
ret
};
Merge:
__asm
{
// Merge two arrays contents.
// The final merged array will be in temp_arr
// Merging is done recursively.
// Arguments:
// EAX - First array's start
// EBX - Second array's start
// ECX - Length of first array
// EDX - Length of second array
// EDI - Index in temp array
pushad
// Handle the cases where one array is empty
cmp ARRAYSIZE, 0
jz First_Array_Over
cmp EDX, 0
jz Second_Array_Over
// Compare first elements of both the arrays
push ARRAYSIZE
push EDI
mov ECX, [arr]
mov EDI, [ECX]
cmp ECX, EDI
pop EDI
pop ECX
// Pick which ever is the least and update that array
jl Update_First_Array
jmp Update_Second_Array
ret
};
Update_First_Array:
__asm
{
// min_elem = min (first elements of first array and second array)
// Put min_elem into the temp array
push dword ptr [EAX]
pop dword ptr [temp_arr + EDI * 4]
add EAX, 4
dec ECX
inc EDI
// Recursively call Merge on the updated array and the
// other array
call Merge
popad
ret
};
Update_Second_Array:
__asm
{
// min_elem = min (first elements of first array and second array)
// Put min_elem into the temp array
push dword ptr [EBX]
pop dword ptr [temp_arr + EDI * 4]
add EBX, 4
dec EDX
inc EDI
// Recursively call Merge on the updated array and the
// other array
call Merge
popad
ret
};
Merge_Copy_Back_Temp:
__asm
{
// Copy back the temp array into original array
// Arguments:
// EAX - original array address
// ECX - original array length
pushad
// For copying back, the destination array is EAX
mov EBX, EAX
// Now, the source array is temp_arr
mov EAX, temp_arr
call Copy_Array
popad
ret
};
First_Array_Over:
__asm
{
// Copy the rest of the second array to the temp arr
// because the first array is empty
pushad
mov EAX, EBX
mov ECX, EDX
mov EBX, temp_arr
imul EDI, 4
add EBX, EDI
call Copy_Array
popad
popad
ret
};
Second_Array_Over:
__asm
{
// Copy the rest of the first array to the temp arr
// because the second array is empty
pushad
mov EBX, temp_arr
imul EDI, 4
add EBX, EDI
call Copy_Array
popad
popad
ret
};
Copy_Array:
__asm
{
// Copy array to destination array
// EAX - Array start
// EBX - Destination array
// ECX - Array length
// Trivial case
cmp ECX, 0
jz Copy_Empty_Array
push ECX
sub EDI, EDI
};
copy_loop:
__asm
{
// Copy each element
push dword ptr [EAX + EDI * 4]
pop dword ptr [EBX + EDI * 4]
inc EDI
loop copy_loop
pop ECX
ret
};
Copy_Empty_Array:
__asm
{
ret
};
Read_Arr:
__asm
{
// EAX - array start
// ECX - array length
mov ESI, arr
sub EDI, EDI
};
loop1:
__asm
{
// Read each element
lea eax,[esi+edx*4]
inc EDI
loop loop1
ret
};
return 0;
}

I dare to disbelieve about the debugger opening during compilation. That said, this code
Merge_Sort:
__asm
{
// EAX - Array start
// ECX - array length
// Merge_Sort (first half)
// Length of the first half
// ECX /= 2
push ECX
shr ECX, 2
call Merge_Sort
has to overflow the stack when running: pushing ECX, shifting ECX, calling itself.

Short of installing the compiler myself, I suggest that you work through the code by adding #if 0 / #endif around a bunch of the code and seeing if you can figure out which instruction(s) the compiler is upset about - it clearly is the compiler crashing, which isn't a very good thing, but does happen.
Of course, that's after you have googled for the error message, etc... ;)

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js